Bug 458256. Use LoadLibraryW instead of LoadLibrary (patch by DougT). r+sr=vlad
[wine-gecko.git] / tools / relic / relic
blob717be506b6ff432512fc8773698c79c7802dd3a9
1 #!/usr/bin/python
3 # ***** BEGIN LICENSE BLOCK *****
4 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 # The contents of this file are subject to the Mozilla Public License Version
7 # 1.1 (the "License"); you may not use this file except in compliance with
8 # the License. You may obtain a copy of the License at
9 # http://www.mozilla.org/MPL/
11 # Software distributed under the License is distributed on an "AS IS" basis,
12 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 # for the specific language governing rights and limitations under the
14 # License.
16 # The Original Code is the relic relicensing tool.
18 # The Initial Developer of the Original Code is
19 # Trent Mick <TrentM@ActiveState.com>.
20 # Portions created by the Initial Developer are Copyright (C) 2003-2005
21 # the Initial Developer. All Rights Reserved.
23 # Contributor(s):
24 # Gervase Markham <gerv@gerv.net>
25 # Patrick Fey <bugzilla@nachtarbeiter.net>
27 # Alternatively, the contents of this file may be used under the terms of
28 # either the GNU General Public License Version 2 or later (the "GPL"), or
29 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 # in which case the provisions of the GPL or the LGPL are applicable instead
31 # of those above. If you wish to allow use of your version of this file only
32 # under the terms of either the GPL or the LGPL, and not to allow others to
33 # use your version of this file under the terms of the MPL, indicate your
34 # decision by deleting the provisions above and replace them with the notice
35 # and other provisions required by the GPL or the LGPL. If you do not delete
36 # the provisions above, a recipient may use your version of this file under
37 # the terms of any one of the MPL, the GPL or the LGPL.
39 # ***** END LICENSE BLOCK *****
41 # Adapted from the 'lick' and 'ripl' Python scripts. (See:
42 # <http://bugzilla.mozilla.org/show_bug.cgi?id=98089>)
44 """
45 relic - RE-LICense a given file, set of files, or directory of files
46 from the Mozilla source tree
48 Usage:
49 relic [options...] [files...]
50 relic [options...] < files...
52 Options to Select Mode (use one):
53 <none> List the licenses in each file.
54 -s, --statistics Should a summary table of licenses in each file.
55 The -x, --extended option may be added to show
56 some additional detail to the stats.
57 -r, --relicense Modify the given files to include to
58 appropriate Mozilla license, where
59 "appropriate" is either the NPL/GPL/LPGL
60 tri-license if was already under the NPL or
61 the MPL/LPGL/GPL license in all other cases.
62 -R, --force-relicense
63 Relicenses files (as -r|--relicense), but
64 does NOT skip files that already appear to
65 have a complete license.
66 -A, --add Add a license to files that do not appear to
67 have one.
68 -I, --initial-developers
69 Display initial developer for each file.
71 General Options:
72 -h, --help dump this help and exit
73 -V, --version dump this script's version and exit
74 -v, --verbose verbose output
75 -d, --debug more verbose output
77 -f, --force Continue processing after an error. (Errors
78 are summarized at end.)
79 -q, --quick Quick scanning. Use only basic license checks
80 (only use in report mode).
81 -M, --MPL Replace NPL licenses with MPL ones.
82 -a, --all Check all files (only skip CVS directories).
83 --dry-run Go through motions but don't actually change
84 any files.
85 --backup Make backups of changes files with
86 relicensing. Backup filenames are the
87 original filename suffixed with a ~# where
88 "#" is the lowest number to avoid a file
89 conflict.
90 -o <orig_code_is> Provide fallback value for the "Original
91 Code is" block.
92 -D <orig_code_date> Provide fallback value for the date
93 that is part of the "Original Code is" block.
94 -i <initial_dev> Provide fallback value for the "Initial
95 Developer of the Original Code is" block.
96 -y <year> Provide fallback value for "Initial
97 Developer" copyright year.
98 --defaults Use the following default fallback values:
99 original_code_is: "mozilla.org Code"
100 initial_copyright_date: "2001"
101 initial_developer: "Netscape Communications
102 Corporation"
103 Note: the "Original Code" date is generally
104 not required, so a default is not included
105 here.
107 Examples:
108 # List license in files under mozilla/js/src.
109 relic mozilla/js/src # list licenses in files
110 relic -s mozilla/js/src # show summary stats on licenses
111 relic -r mozilla/js/src # re-license files
115 import os
116 import sys
117 import re
118 import getopt
119 import pprint
120 import shutil
123 class RelicError(Exception):
124 pass
127 #---- setup logging
129 try:
130 # This package will be std in Python 2.3, but many Python 2.2
131 # installation will not have it.
132 import logging
133 logging.basicConfig()
134 except ImportError:
135 # Local fallback logging module.
136 try:
137 import _logging as logging
138 except ImportError:
139 sys.stderr.write("Your Python installation does not have the logging "
140 "package, nor could the fallback _logging module be "
141 "found. One of the two is required to run this "
142 "script.\n\n")
143 raise
145 log = logging.getLogger("relic")
147 #---- globals
149 _version_ = (0, 7, 2)
151 # When processing files, 'relic' skips files and directories according
152 # to these settings. Note: files identified in .cvsignore files are also
153 # skipped.
154 _g_skip_exts = [".mdp", ".order", ".dsp", ".dsw", ".uf"]
155 _g_skip_file_basenames = [
156 # Used by CVS (and this script)
157 ".cvsignore",
159 # GPL with autoconf exception
160 "config.guess",
161 "config.sub",
163 # Auto-generated from other files
164 "configure",
166 # license and readme files
167 "license",
168 "readme",
169 "copyright",
170 "LICENSE-MPL",
171 "MPL-1.1.txt",
173 _g_skip_files = [
174 # TODO: update with MPL block - or CVS remove (check history)
175 "tools/wizards/templates/licenses/MPL/lic.mak",
176 "tools/wizards/templates/licenses/MPL/lic.pl",
178 ###########################################################################
179 # Everything in _g_skip_files below this line needs no further work.
180 ###########################################################################
182 # Files containing copies of licence text which confuses the script
183 "LICENSE",
184 "js2/COPYING",
185 "security/svrcore/LICENSE",
186 "extensions/xmlterm/doc/MPL",
187 "gfx/cairo/cairo/COPYING-LGPL-2.1",
188 "gfx/cairo/cairo/COPYING-MPL-1.1",
190 # Files containing global licensing information
191 "toolkit/content/license.html",
193 # Ben Bucksch - files are tri-licensed with an extra clause.
194 "netwerk/streamconv/converters/mozTXTToHTMLConv.cpp",
195 "netwerk/streamconv/converters/mozTXTToHTMLConv.h",
196 "netwerk/streamconv/public/mozITXTToHTMLConv.idl",
198 # GPLed build tools
199 "config/preprocessor.pl",
200 "intl/uconv/tools/parse-mozilla-encoding-table.pl",
201 "intl/uconv/tools/gen-big5hkscs-2001-mozilla.pl",
202 "js2/missing",
204 # Files which the script doesn't handle well. All have been relicensed
205 # manually.
206 "xpinstall/wizard/windows/builder/readme.txt",
207 "xpfe/bootstrap/icons/windows/readme.txt",
208 "embedding/qa/testembed/README.TXT",
209 "security/nss/lib/freebl/ecl/README.FP",
210 "nsprpub/pkg/linux/sun-nspr.spec",
211 "security/nss/pkg/linux/sun-nss.spec",
212 "security/jss/pkg/linux/sun-jss.spec",
213 "security/nss/lib/freebl/mpi/utils/README",
214 "security/nss/lib/freebl/ecl/README",
215 "security/nss/lib/freebl/mpi/README",
216 "lib/mac/UserInterface/Tables/TableClasses.doc",
217 "parser/htmlparser/tests/html/bug23680.html",
218 "security/nss/lib/freebl/mpi/montmulfv9.s",
219 "tools/performance/pageload/base/lxr.mozilla.org/index.html",
220 "testing/performance/win32/page_load_test/" +\
221 "base/lxr.mozilla.org/index.html",
222 "testing/performance/win32/page_load_test/" +\
223 "base/lxr.mozilla.org/20001028.html.orig",
225 # Not sure what to do with this...
226 "gfx/cairo/stdint.diff",
228 # GPL with autoconf exception (same license as files distributed with)
229 "build/autoconf/codeset.m4",
230 "toolkit/airbag/airbag/autotools/depcomp",
231 "toolkit/airbag/airbag/autotools/missing",
232 "toolkit/airbag/airbag/autotools/ltmain.sh",
233 "js/tamarin/pcre/ltmain.sh",
234 "security/svrcore/compile",
235 "security/svrcore/ltmain.sh",
236 "security/svrcore/missing",
237 "security/svrcore/depcomp",
238 "security/svrcore/aclocal.m4",
240 # Public domain or equivalent
241 "nsprpub/config/nspr.m4",
242 "toolkit/airbag/airbag/aclocal.m4",
243 "security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s",
245 # GSSAPI has BSD-like licence requiring some attribution
246 "extensions/auth/gssapi.h",
248 # This script
249 "tools/relic/relic",
251 _g_skip_dir_basenames = [
252 "CVS",
255 _g_skip_dir_basenames_cvs_only = [
256 "CVS",
259 # Complete path from mozilla dir to a dir to skip.
260 _g_skip_dirs = [
261 # Test files for this script, which cause it to crash!
262 "tools/relic/test",
264 # License template files (TODO: this directory may disappear)
265 "tools/wizards/templates/licenses",
267 # As per the "New Original Source Files" section of:
268 # http://www.mozilla.org/MPL/license-policy.html
269 # with obsolete or now-relicensed directories removed
270 "apache", # Obsolete mod_gzip code
271 "cck", # mkaply's baby; not core code anyway.
272 "dbm",
273 "js/rhino", # Currently MPL/GPL - may end up BSD
274 "webtools", # Various MPLed webtools
276 # These could be done, but no-one's clamouring for it, and it's a hassle
277 # sorting it all out, so let sleeping dogs lie.
278 "msgsdk",
279 "java",
280 "privacy",
282 # These have their own BSD-like license
283 "jpeg",
284 "modules/libimg/mng",
286 # The following are not supposed to be relicensed, but they do have a
287 # few files in we care about (like makefiles)
288 "modules/libimg/png",
289 "modules/zlib",
290 "gc/boehm",
291 "other-licenses",
293 # Copy of GPLed tool
294 "tools/buildbot",
296 # Other directories we want to exclude
297 "embedding/tests", # Agreed as BSD
298 "calendar/libical", # LGPL/MPL
299 "gfx/cairo/cairo/src", # LGPL/MPL
302 _g_basename_to_comment_info = {
303 "configure": (["dnl"], ),
305 "Makefile": (["#"], ),
306 "makefile": (["#"], ),
307 "nfspwd": (["#"], ),
308 "typemap": (["#"], ),
309 "xmplflt.conf": (["#"], ),
310 "ldapfriendly": (["#"], ),
311 "ldaptemplates.conf": (["#"], ),
312 "ldapsearchprefs.conf": (["#"], ),
313 "ldapfilter.conf": (["#"], ),
314 "README.configure": (["#"], ),
315 "Options.txt": (["#"], ),
316 "fdsetsize.txt": (["#"], ),
317 "prototype": (["#"], ),
318 "prototype_i386": (["#"], ),
319 "prototype3_i386": (["#"], ),
320 "prototype_com": (["#"], ),
321 "prototype3_com": (["#"], ),
322 "prototype_sparc": (["#"], ),
323 "prototype3_sparc": (["#"], ),
324 "nglayout.mac": (["#"], ),
325 "pkgdepend": (["#"], ),
326 "Maketests": (["#"], ),
327 "depend": (["#"], ),
328 "csh-aliases": (["#"], ),
329 "csh-env": (["#"], ),
330 ".cshrc": (["#"], ),
331 "MANIFEST": (["#"], ),
332 "mozconfig": (["#"], ),
333 "makecommon": (["#"], ),
334 "bld_awk_pkginfo": (["#"], ),
335 "prototype_i86pc": (["#"], ),
336 "pkgdepend_5_6": (["#"], ),
337 "awk_pkginfo-i386": (["#"], ),
338 "awk_pkginfo-sparc": (["#"], ),
339 "pkgdepend_64bit": (["#"], ),
340 "WIN32": (["#"], ),
341 "Makefile.linux": (["#"], ),
343 "README": ([""], ["#"]),
344 "copyright": ([""], ),
346 "xptcstubs_asm_ppc_darwin.s.m4": (["/*", " *", "*/"], ),
347 "xptcstubs_asm_mips.s.m4": (["/*", " *", "*/"], ),
349 "nsIDocCharsetTest.txt": (["<!--", " -", "-->"], ),
350 "nsIFontListTest.txt": (["<!--", " -", "-->"], ),
351 "ComponentListTest.txt": (["<!--", " -", "-->"], ),
352 "nsIWebBrowserPersistTest1.txt": (["<!--", " -", "-->"], ),
353 "nsIWebBrowserPersistTest2.txt": (["<!--", " -", "-->"], ),
354 "nsIWebBrowserPersistTest3.txt": (["<!--", " -", "-->"], ),
355 "plugins.txt": (["<!--", " -", "-->"], ),
356 "NsISHistoryTestCase1.txt": (["<!--", " -", "-->"], ),
357 "EmbedSmokeTest.txt": (["<!--", " -", "-->"], ),
359 "lineterm_LICENSE": (["/*", " *", "*/"], ),
360 "XMLterm_LICENSE": (["/*", " *", "*/"], ),
361 "BrowserView.cpp.mod": (["/*", " *", "*/"], ),
362 "header_template": (["/*", " *", "*/"], ),
363 "cpp_template": (["/*", " *", "*/"], ),
365 "abcFormat470.txt": (["//"], ),
366 "opcodes.tbl": (["//"], ),
369 _g_ext_to_comment_info = {
370 ".txt": (["##", "#", ], ["#"]),
371 ".TXT": (["##", "#", ]),
373 ".doc": (["", ]),
374 ".build": (["", ]),
375 ".1st": (["", ]),
376 ".lsm": (["", ]),
377 ".FP": (["", ]),
378 ".spec": (["", ]),
380 ".CPP": (["/*", " *", "*/"], ),
381 ".cpp": (["/*", " *", "*/"], ),
382 ".H": (["/*", " *", "*/"], ),
383 ".h": (["/*", " *", "*/"], ),
384 ".hxx": (["/*", " *", "*/"], ),
385 ".c": (["/*", " *", "*/"], ),
386 ".css": (["/*", " *", "*/"], ['#']),
387 ".js": (["/*", " *", "*/"], ['#']),
388 ".idl": (["/*", " *", "*/"], ),
389 ".ut": (["/*", " *", "*/"], ),
390 ".rc": (["/*", " *", "*/"], ),
391 ".rc2": (["/*", " *", "*/"], ),
392 ".RC": (["/*", " *", "*/"], ),
393 ".Prefix": (["/*", " *", "*/"], ),
394 ".prefix": (["/*", " *", "*/"], ),
395 ".cfg": (["/*", " *", "*/"], ["#"]),
396 ".cp": (["/*", " *", "*/"], ),
397 ".cs": (["/*", " *", "*/"], ),
398 ".java": (["/*", " *", "*/"], ),
399 ".jst": (["/*", " *", "*/"], ),
400 ".tbl": (["/*", " *", "*/"], ),
401 ".tab": (["/*", " *", "*/"], ),
402 ".cc": (["/*", " *", "*/"], ),
403 ".msg": (["/*", " *", "*/"], ),
404 ".y": (["/*", " *", "*/"], ),
405 ".r": (["/*", " *", "*/"], ),
406 ".mm": (["/*", " *", "*/"], ),
407 ".x-ccmap":(["/*", " *", "*/"], ),
408 ".ccmap": (["/*", " *", "*/"], ),
409 ".sql": (["/*", " *", "*/"], ),
410 ".pch++": (["/*", " *", "*/"], ),
411 ".xpm": (["/*", " *", "*/"], ),
412 ".uih": (["/*", " *", "*/"], ),
413 ".uil": (["/*", " *", "*/"], ),
414 ".ccmap": (["/*", " *", "*/"], ),
415 ".map": (["/*", " *", "*/"], ),
416 ".win98": (["/*", " *", "*/"], ),
417 ".php": (["/*", " *", "*/"], ),
418 ".m": (["/*", " *", "*/"], ),
419 ".jnot": (["/*", " *", "*/"], ),
420 ".l": (["/*", " *", "*/"], ),
421 ".htp": (["/*", " *", "*/"], ),
422 ".xs": (["/*", " *", "*/"], ),
423 ".as": (["/*", " *", "*/"], ),
424 ".api": (["/*", " *", "*/"], ['#']),
426 ".html": (["<!--", " -", "-->"], ["#"]),
427 ".xml": (["<!--", " -", "-->"], ["#"]),
428 ".xbl": (["<!--", " -", "-->"], ["#"]),
429 ".xsl": (["<!--", " -", "-->"], ),
430 ".xul": (["<!--", " -", "-->"], ["#"]),
431 ".dtd": (["<!--", " -", "-->"], ["#"]),
432 ".rdf": (["<!--", " -", "-->"], ["#"]),
433 ".htm": (["<!--", " -", "-->"], ),
434 ".out": (["<!--", " -", "-->"], ),
435 ".resx": (["<!--", " -", "-->"], ),
436 ".bl": (["<!--", " -", "-->"], ),
437 ".xif": (["<!--", " -", "-->"], ),
438 ".xhtml":(["<!--", " -", "-->"], ["#"]),
440 ".inc": (["<!--", " -", "-->"],
441 ["#"],
442 ["@!"],
443 ["/*", " *", "*/"]),
445 ".properties": (["#"], ),
446 ".win": (["#"], ),
447 ".dsp": (["#"], ),
448 ".exp": (["#"], ),
449 ".mk": (["#"], ),
450 ".mn": (["#"], ),
451 ".mak": (["#"], ),
452 ".MAK": (["#"], ),
453 ".perl": (["#"], ),
454 ".pl": (["#"], ),
455 ".PL": (["#"], ),
456 ".sh": (["#"], ),
457 ".dsw": (["#"], ),
458 ".cgi": (["#"], ),
459 ".pm": (["#"], ),
460 ".pod": (["#"], ),
461 ".src": (["#"], ),
462 ".csh": (["#"], ),
463 ".DLLs": (["#"], ),
464 ".ksh": (["#"], ),
465 ".toc": (["#"], ),
466 ".am": (["#"], ),
467 ".df": (["#"], ),
468 ".client": (["#"], ),
469 ".ref": (["#"], ), # all of them "Makefile.ref"
470 ".ldif": (["#"], ),
471 ".ex": (["#"], ),
472 ".reg": (["#"], ),
473 ".py": (["#"], ),
474 ".adb": (["#"], ),
475 ".dtksh": (["#"], ),
476 ".pkg": (["#"], ),
477 ".et": (["#"], ),
478 ".stub": (["#"], ),
479 ".nss": (["#"], ),
480 ".os2": (["#"], ),
481 ".Solaris": (["#"], ),
482 ".rep": (["#"], ),
483 ".NSS": (["#"], ),
484 ".server": (["#"], ),
485 ".awk": (["#"], ),
486 ".targ": (["#"], ),
487 ".gnuplot": (["#"], ),
488 ".bash": (["#"], ),
489 ".tmpl": (["#"], ),
490 ".com": (["#"], ),
491 ".dat": (["#"], ),
492 ".rpm": (["#"], ),
493 ".nsi": (["#"], ),
494 ".nsh": (["#"], ),
495 ".template": (["#"], ),
496 ".ldkd": (["#"], ),
497 ".ldku": (["#"], ),
498 ".arm": (["#"], ),
500 ".tdf": ([";"], ),
501 ".def": ([";+#"], [";"]),
502 ".DEF": ([";+#"], [";"]),
503 ".ini": ([";"], ),
504 ".it": ([";"], ),
505 ".lisp": ([";;;"], ),
507 ".cmd": (["rem"], ["REM"]),
508 ".bat": (["rem"], ["REM"]),
510 ".tex": (["%"], ),
511 ".texi": (["%"], ),
513 ".m4": (["dnl"], ),
515 ".asm": ([";"], ),
516 ".vbs": (["'"], ),
517 ".il": (["!"], ),
518 ".ad": (["!"], ),
520 ".script": (["(*", " *", "*)"], ),
522 ".3x": (['.\\"'], ),
524 # What a mess...
525 ".s": (["#"], ["//"], ["/*", " *", "*/"], ["!"], [";"], ["/"]),
527 _g_shebang_pattern_to_comment_info = [
528 (re.compile(ur'\A#!.*/bin/(ba)?sh.*$', re.IGNORECASE), (["#"], )),
529 (re.compile(ur'\A#!.*perl.*$', re.IGNORECASE), (["#"], )),
530 (re.compile(ur'\A#!.*php.*$', re.IGNORECASE), (["#"], )),
531 (re.compile(ur'\A#!.*python.*$', re.IGNORECASE), (["#"], )),
532 (re.compile(ur'\A#!.*ruby.*$', re.IGNORECASE), (["#"], )),
533 (re.compile(ur'\A#!.*tclsh.*$', re.IGNORECASE), (["#"], )),
534 (re.compile(ur'\A#!.*wish.*$', re.IGNORECASE), (["#"], )),
535 (re.compile(ur'\A#!.*expect.*$', re.IGNORECASE), (["#"], )),
539 _g_trilicense_parts = {
540 "mpl": """\
541 ***** BEGIN LICENSE BLOCK *****
542 Version: MPL 1.1/GPL 2.0/LGPL 2.1
544 The contents of this file are subject to the Mozilla Public License Version
545 1.1 (the "License"); you may not use this file except in compliance with
546 the License. You may obtain a copy of the License at
547 http://www.mozilla.org/MPL/
549 Software distributed under the License is distributed on an "AS IS" basis,
550 WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
551 for the specific language governing rights and limitations under the
552 License.
554 """,
555 "npl": """\
556 ***** BEGIN LICENSE BLOCK *****
557 Version: NPL 1.1/GPL 2.0/LGPL 2.1
559 The contents of this file are subject to the Netscape Public License
560 Version 1.1 (the "License"); you may not use this file except in
561 compliance with the License. You may obtain a copy of the License at
562 http://www.mozilla.org/NPL/
564 Software distributed under the License is distributed on an "AS IS" basis,
565 WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
566 for the specific language governing rights and limitations under the
567 License.
569 """,
570 "original_code_is": """\
571 The Original Code is %(original_code_is)s.
573 """,
574 "original_code_is_with_date": """\
575 The Original Code is %(original_code_is)s, released
576 %(original_code_date)s.
578 """,
579 "initial_developer": """\
580 The Initial Developer of the Original Code is
581 %(initial_developer)s.
582 Portions created by the Initial Developer are Copyright (C) %(initial_copyright_date)s
583 the Initial Developer. All Rights Reserved.
585 """,
586 "contributors": """\
587 Contributor(s):
589 """,
590 "gpl for mpl": """\
591 Alternatively, the contents of this file may be used under the terms of
592 the GNU General Public License Version 2 or later (the "GPL"), in which
593 case the provisions of the GPL are applicable instead of those above. If
594 you wish to allow use of your version of this file only under the terms of
595 the GPL and not to allow others to use your version of this file under the
596 MPL, indicate your decision by deleting the provisions above and replacing
597 them with the notice and other provisions required by the GPL. If you do
598 not delete the provisions above, a recipient may use your version of this
599 file under either the MPL or the GPL.
601 ***** END LICENSE BLOCK *****""",
602 "gpl for npl": """\
603 Alternatively, the contents of this file may be used under the terms of
604 the GNU General Public License Version 2 or later (the "GPL"), in which
605 case the provisions of the GPL are applicable instead of those above. If
606 you wish to allow use of your version of this file only under the terms of
607 the GPL and not to allow others to use your version of this file under the
608 NPL, indicate your decision by deleting the provisions above and replacing
609 them with the notice and other provisions required by the GPL. If you do
610 not delete the provisions above, a recipient may use your version of this
611 file under either the NPL or the GPL.
613 ***** END LICENSE BLOCK *****""",
614 "gpl/lgpl for mpl": """\
615 Alternatively, the contents of this file may be used under the terms of
616 either the GNU General Public License Version 2 or later (the "GPL"), or
617 the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
618 in which case the provisions of the GPL or the LGPL are applicable instead
619 of those above. If you wish to allow use of your version of this file only
620 under the terms of either the GPL or the LGPL, and not to allow others to
621 use your version of this file under the terms of the MPL, indicate your
622 decision by deleting the provisions above and replace them with the notice
623 and other provisions required by the GPL or the LGPL. If you do not delete
624 the provisions above, a recipient may use your version of this file under
625 the terms of any one of the MPL, the GPL or the LGPL.
627 ***** END LICENSE BLOCK *****""",
628 "gpl/lgpl for npl": """\
629 Alternatively, the contents of this file may be used under the terms of
630 either the GNU General Public License Version 2 or later (the "GPL"), or
631 the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
632 in which case the provisions of the GPL or the LGPL are applicable instead
633 of those above. If you wish to allow use of your version of this file only
634 under the terms of either the GPL or the LGPL, and not to allow others to
635 use your version of this file under the terms of the NPL, indicate your
636 decision by deleting the provisions above and replace them with the notice
637 and other provisions required by the GPL or the LGPL. If you do not delete
638 the provisions above, a recipient may use your version of this file under
639 the terms of any one of the NPL, the GPL or the LGPL.
641 ***** END LICENSE BLOCK *****""",
644 _g_dry_run = 0 # iff true, don't modify any files
645 _g_force = 0
646 _g_check_all = 0
649 #---- internal support routines
651 def _is_binary(filename):
652 """Return true iff the given filename is binary.
654 Raises an EnvironmentError if the file does not exist or cannot be
655 accessed.
657 fin = open(filename, 'rb')
658 try:
659 CHUNKSIZE = 1024
660 while 1:
661 chunk = fin.read(CHUNKSIZE)
662 if '\0' in chunk: # found null byte
663 return 1
664 if len(chunk) < CHUNKSIZE:
665 break # done
666 finally:
667 fin.close()
669 return 0
672 _g_cvsignore_cache = {} # optimization: keep a cache of .cvsignore content
673 def _should_skip_according_to_cvsignore(path):
674 dirname, basename = os.path.split(path)
675 cvsignore = os.path.join(dirname, ".cvsignore")
676 if not os.path.exists(cvsignore):
677 return 0
678 elif cvsignore not in _g_cvsignore_cache:
679 fin = open(cvsignore, 'r')
680 to_ignore = []
681 try:
682 for f in fin:
683 if f[-1] == "\n": f = f[:-1] # chomp
684 if not f: continue # skip empty lines
685 to_ignore.append(f)
686 finally:
687 fin.close()
688 _g_cvsignore_cache[cvsignore] = to_ignore
690 # At this point .cvsignore exists and its contents are in the cache.
691 to_ignore = _g_cvsignore_cache[cvsignore]
692 if basename in to_ignore:
693 return 1
694 else:
695 return 0
697 _g_backup_pattern = re.compile("~\d+$")
698 def _should_skip_file(path):
699 log.debug("_should_skip_file(path='%s')", path)
700 if _g_check_all:
701 return 0
702 ext = os.path.splitext(path)[1]
703 if ext in _g_skip_exts:
704 log.info("Skipping '%s' (according to '_g_skip_exts').", path)
705 return 1
706 xpath = '/'.join(path.split(os.sep)) # use same sep as in _g_skip_files
707 for sf in _g_skip_files:
708 if xpath.endswith(sf):
709 log.info("Skipping '%s' (according to '_g_skip_files').", path)
710 return 1
711 if os.path.basename(path) in _g_skip_file_basenames:
712 log.info("Skipping '%s' (according to '_g_skip_file_basenames').", path)
713 return 1
714 if _should_skip_according_to_cvsignore(path):
715 log.info("Skipping '%s' (according to .cvsignore).", path)
716 return 1
717 if _g_backup_pattern.search(path):
718 log.info("Skipping '%s' (looks like backup file).", path)
719 return 1
720 return 0
722 def _should_skip_dir(path):
723 log.debug("_should_skip_dir(path='%s')", path)
724 if _g_check_all:
725 if os.path.basename(path) in _g_skip_dir_basenames_cvs_only:
726 return 1
727 return 0
728 if os.path.basename(path) in _g_skip_dir_basenames:
729 log.info("Skipping '%s' (according to _g_skip_dir_basenames).", path)
730 return 1
731 xpath = '/'.join(path.split(os.sep)) # use same sep as in _g_skip_dirs
732 # These could do with being a proper path canonicalisation function...
733 if xpath[-1] == '/': xpath = xpath[:-1] # treat "calendar/" the same as "calendar"
734 if xpath[0:2] == './': xpath = xpath[2:] # treat "./calendar" the same as "calendar"
735 for sd in _g_skip_dirs:
736 # Changed by gerv to make skip_dirs require whole path
737 if xpath == sd:
738 log.info("Skipping '%s' (according to _g_skip_dirs).", path)
739 return 1
740 if _should_skip_according_to_cvsignore(path):
741 log.info("Skipping '%s' (according to .cvsignore).", path)
742 return 1
743 return 0
746 def _get_license_info(filename, show_initial=0, quick=0):
747 """Return license block information for the given file.
749 "filename" is the path to the file to scan.
750 "show_initial" is a boolean that indicates if initial developer info
751 should be displayed.
752 "quick" is a boolean that can be set for a quick scan. In this
753 case, only the "parts" field of the return dictionary will
754 be filled out.
756 Returns a dictionary adequately describing the license block in the
757 given file for the purpose of determining whether to patch the
758 license block and how. Returns a dictionary of the following form:
759 {"parts": <list of zero or more of "mpl", "npl", "gpl", "lgpl",
760 "unknown", "block_begin", "block_end" in the
761 order in which they were found>,
762 # if necessary, the following keys are included as well
763 "begin_line": <(0-based) index at which license block starts>,
764 "end_line": <(0-based) index at which license block ends>,
765 "first_prefix": <prefix to use for new license block first line>,
766 "subsequent_prefix": <prefix to use for subsequent lines>,
767 "last_suffix": <suffix to use for last line>,
768 # The following fields are correspond to the file specific
769 # portions of the license template as described here:
770 # http://www.mozilla.org/MPL/relicensing-faq.html#new-license
771 # If the associated block is not found, then the value is None.
772 "original_code_is": ...,
773 "original_code_date": ...,
774 "initial_developer": ...,
775 "initial_copyright_date": ...,
776 "contributors": ...,
779 precondition: should not be called on binary files
781 lic_info = {
782 "parts": [],
785 fin = open(filename, 'r')
786 try:
787 content = fin.read()
788 finally:
789 fin.close()
791 # Help me find filena
792 log.info("Next file is: %s", filename)
794 # do quick search to see if any of the desired licenses is in here
795 # - if it looks like all the parts are there, good, done
796 # - if some but not all parts, continue
797 parts_pattern = re.compile("""(
798 (?P<block_begin>\*\*\*\*\*\ BEGIN\ LICENSE\ BLOCK\ \*\*\*\*\*)
799 | (?P<mpl>The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Mozilla)
800 | (?P<npl>The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Netscape)
801 | (?P<gpl>GNU\ (General\ )?Public\ License)
802 | (?P<lgpl>(Library|Lesser)\ General\ Public\ License)
803 | (?P<block_end>\*\*\*\*\*\ END\ LICENSE\ BLOCK\ \*\*\*\*\*)
804 )""",
805 re.VERBOSE)
806 parts = [] # found license parts in this file
807 start = 0
808 blocks = 0
809 while 1:
810 match = parts_pattern.search(content, start)
811 if match:
812 # Skip this block, if the last license block is more than 10 lines
813 # away (file is probably used for autogeneration of files then).
814 if blocks == 1 and (match.start()-start) > 10:
815 break
816 else:
817 parts = match.groupdict()
818 for part in parts:
819 if parts[part]:
820 lic_info["parts"].append(part)
821 log.info("%s license/delimeter found", part)
822 start = match.end()
823 if part == "block_end":
824 blocks = blocks + 1
825 else:
826 blocks = 0
827 break
828 else:
829 raise RelicError("unexpected license part: %r" % parts)
830 else:
831 break
833 # no license block at all
834 if not parts:
835 # - if not, check to see if License or Copyright shows up in the
836 # file; if so, then error out; if not, skip out
837 any_lic_pattern = re.compile("(Copyright|Licen[sc]e)", re.IGNORECASE)
838 match = any_lic_pattern.search(content)
839 if match:
840 lic_info["parts"].append("unknown")
841 log.info("unknown license found: %r",
842 content[max(match.start()-20,0):match.end()+20])
843 else:
844 log.info("no license found")
845 return lic_info
847 # license block with non-tri-license version headers
848 elif lic_info["parts"] == ["block_begin", "block_end"]:
849 lic_info["parts"].append("unknown")
850 log.info("unknown license found (license block with non-tri-license)")
851 return lic_info
853 # license block with tri-license version headers
854 elif (lic_info["parts"] == ["block_begin", "mpl", "gpl", "lgpl", "block_end"] or
855 lic_info["parts"] == ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
856 log.info("license looks good, no changes necessary")
857 if quick:
858 return lic_info
860 # Otherwise, the license needs to be fixed, so gather more detailed
861 # information. Here is the algorithm we will use:
862 # - find first license line
863 # - find the end of this comment block (assumption: from the first
864 # license line to the end of the comment block is the full
865 # license block)
866 # This is a bad assumption in two cases and steps have been taken
867 # to try to deal with those cases:
868 # - There could be a trailing part bit of comment that is
869 # NOT part of the license but is part of the same comment
870 # block. A common example are the:
871 # This Original Code has been modified by IBM...
872 # files (about 130 of them in the moz tree).
873 # (c.f. test_relicense_ibm_copyright_suffix.c)
874 # - Some files have split up the license paragraphs into
875 # multiple comment blocks, e.g.
876 # "mozilla/build/unix/abs2rel.pl":
877 # # The contents of this file are subject to the
878 # # ...
879 # # the License at http://www.mozilla.org/MPL/
881 # # The Initial Developer of the Original Code
882 # # ...
883 # # Rights Reserved.
884 # (c.f. test_relicense_separated_license_comment_blocks.pl)
885 # - these are the lines to replace
886 # - gather embedded lic data
887 # - use second line to determine line prefix
888 # ? Should we only allow processing of unknown-delimiter-files with
889 # an option?
891 # Get comment delimiter info for this file.
892 comment_delim_sets = _get_comment_delim_sets(filename)
894 # - find first license line (and determine which set of comment
895 # delimiters are in use)
896 lines = content.splitlines()
897 for comment_delims in comment_delim_sets:
898 if len(comment_delims) == 3:
899 prefix_pattern = "%s|%s|" % (re.escape(comment_delims[0]),
900 re.escape(comment_delims[1]))
901 suffix_pattern = "%s" % re.escape(comment_delims[2])
902 elif len(comment_delims) == 2:
903 prefix_pattern = "%s|" % re.escape(comment_delims[0])
904 suffix_pattern = "%s" % re.escape(comment_delims[1])
905 elif len(comment_delims) == 1:
906 prefix_pattern = re.escape(comment_delims[-1])
907 suffix_pattern = ""
908 else: # len(comment_delims) == 0
909 prefix_pattern = ""
910 suffix_pattern = ""
912 lic_begin_pattern = re.compile("""
913 ^(?P<prefix>%s)
914 (?P<space>\s*)
915 (\*+\ BEGIN\ LICENSE\ BLOCK\ \*+
916 |\-+\ BEGIN\ LICENSE\ BLOCK\ \-+
917 | Version:\ MPL\ \d+\.\d+/GPL\ \d+\.\d+/LGPL\ \d+\.\d+
918 | The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Mozilla[\w ]*
919 | The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Netscape[\w ]*
920 | Alternatively,\ the\ contents\ of\ this\ file\ may\ be\ used\ under\ the[\w ]*)
921 (?P<suffix>%s|)\s*?$
922 """ % (prefix_pattern, suffix_pattern), re.VERBOSE)
924 for i in range(len(lines)):
925 match = lic_begin_pattern.search(lines[i])
926 if match:
927 beginline = {
928 "content": lines[i],
929 "linenum": i,
930 "prefix": match.group("prefix"),
931 "space": match.group("space"),
932 "suffix": match.group("suffix")
934 # Optimization: If the line before the "beginline" is simply
935 # a block comment open the include that line in parsed out
936 # license block. E.g.,
937 # <!--
938 # - ***** BEGIN LICENSE BLOCK *****
939 # ...
940 if (len(comment_delims) > 1 # only for block comments
941 and beginline["prefix"] != comment_delims[0]
942 and i-1 >= 0
943 and lines[i-1].strip() == comment_delims[0]):
944 beginline["linenum"] -= 1
945 beginline["prefix"] = comment_delims[0]
946 break
947 if match: break
948 else:
949 raise RelicError("couldn't find start line with this pattern (even "
950 "though it looks like there is a license block in "
951 "%s): %s" % (filename, lic_begin_pattern.pattern))
952 log.info("comment delimiters: %s", comment_delims)
953 log.debug("beginline dict: %s", beginline)
954 lic_info["comment_delims"] = comment_delims
955 lic_info["begin_line"] = beginline["linenum"]
956 lic_info["first_prefix"] = beginline["prefix"]
957 log.info("prefix for first line: '%s'", beginline["prefix"])
959 # - get second license line
960 lic_middle_pattern = re.compile("""
961 ^(?P<prefix>%s|)
962 (?P<space>\s*)
963 (?P<content>.*)
964 (?P<suffix>%s|)\s*?$
965 """ % (prefix_pattern, suffix_pattern),
966 re.VERBOSE)
967 # skip empty lines which might result in bogus scanning later, e.g.:
968 # mozilla/layout/html/tests/table/marvin/x_thead_align_center.xml
969 second_linenum = beginline["linenum"]+1
970 while second_linenum < len(lines):
971 if lines[second_linenum].strip():
972 break
973 log.debug("skip blank 'second' line: %d", second_linenum)
974 second_linenum +=1
975 else:
976 raise RelicError("all lines after the first license block line (%d) "
977 "were empty" % (beginline["linenum"]+1))
978 match = lic_middle_pattern.search(lines[second_linenum])
979 if match:
980 secondline = {
981 "content": lines[second_linenum],
982 "linenum": second_linenum,
983 "prefix": match.group("prefix"),
984 "space": match.group("space"),
985 "suffix": match.group("suffix")
987 else:
988 raise RelicError("didn't find second line with pattern: %s"
989 % lic_middle_pattern.pattern)
990 log.debug("secondline dict: %s", secondline)
991 lic_info["subsequent_prefix"] = secondline["prefix"]
992 log.info("prefix for subsequent lines: '%s'", secondline["prefix"])
994 # - find block comment end
995 orig_code_modified_pattern = re.compile("This Original Code has been "
996 "modified", re.I)
997 non_lic_content_in_same_comment_block = 0
998 if len(comment_delims) == 1:
999 # line-style comments: The comment block "end" is defined as the
1000 # last line before a line NOT using the block comment delimiter.
1001 #XXX:BUG: This is not good enough for:
1002 # test/inputs/separated_license_comment_blocks.pl
1003 if comment_delims[0] == "":
1004 raise RelicError(
1005 "Don't know how to find the end of a line-style comment "
1006 "block when the delimiter is the empty string. (Basically "
1007 "this script cannot handle this type of file.)")
1008 for i in range(beginline["linenum"]+1, len(lines)):
1009 if not lines[i].startswith(comment_delims[0]):
1010 endlinenum = i-1
1011 break
1012 elif lines[i].find("END LICENSE BLOCK") != -1:
1013 endlinenum = i
1014 break
1015 # As per "test_relicense_trailing_orig_code_modified.pl", a
1016 # paragraph starting with:
1017 # This Original Code has been modified
1018 # is deemed to be OUTside the license block, i.e. it is not
1019 # replaced for relicensing.
1020 if orig_code_modified_pattern.search(lines[i]):
1021 non_lic_content_in_same_comment_block = 1
1022 # The endline is the first non-blank line before this one.
1023 endlinenum = i-1
1024 while 1:
1025 line = lines[endlinenum]
1026 match = lic_middle_pattern.search(line)
1027 if not match:
1028 raise RelicError("Line did not match lic_middle_pattern "
1029 "unexpectedly: %r" % line)
1030 if match.group("content").strip(): # non-empty line
1031 break
1032 endlinenum -= 1
1033 break
1034 else:
1035 raise RelicError("Could not find license comment block end "
1036 "line in '%s'." % filename)
1037 elif len(comment_delims) >= 2: # block-style comments
1038 for i in range(beginline["linenum"]+1, len(lines)):
1039 if lines[i].find(comment_delims[-1]) != -1:
1040 endlinenum = i
1041 break
1042 elif lines[i].find("END LICENSE BLOCK") != -1:
1043 endlinenum = i
1044 non_lic_content_in_same_comment_block = 1
1045 break
1046 # As per "test_relicense_ibm_copyright_suffix.c", a
1047 # paragraph starting with:
1048 # This Original Code has been modified
1049 # is deemed to be OUTside the license block, i.e. it is not
1050 # replaced for relicensing.
1051 if orig_code_modified_pattern.search(lines[i]):
1052 non_lic_content_in_same_comment_block = 1
1053 # The endline is the first non-blank line before this one.
1054 endlinenum = i-1
1055 while 1:
1056 line = lines[endlinenum]
1057 match = lic_middle_pattern.search(line)
1058 if not match:
1059 raise RelicError("Line did not match lic_middle_pattern "
1060 "unexpectedly: %r" % line)
1061 if match.group("content").strip(): # non-empty line
1062 break
1063 endlinenum -= 1
1064 break
1065 else:
1066 raise RelicError("Could not find license comment block end "
1067 "line in '%s'." % filename)
1068 if not non_lic_content_in_same_comment_block\
1069 and not lines[endlinenum].strip().endswith(comment_delims[-1]):
1070 raise RelicError(
1071 "There is text AFTER the license block comment end "
1072 "delimiter, but on the SAME LINE. This is unexpected. "
1073 "Bailing.\n%s:%s:%r"
1074 % (filename, endlinenum, lines[endlinenum]))
1075 else: # len(comment_delims) == 0
1076 # For files without a comment character to help out, we ONLY
1077 # successfully break one the full correct "END LICENSE BLOCK"
1078 # token.
1079 for i in range(beginline["linenum"]+1, len(lines)):
1080 if lines[i].find("END LICENSE BLOCK") != -1:
1081 endlinenum = i
1082 break
1083 elif i > beginline["linenum"]+1+50:
1084 raise RelicError("Haven't found 'END LICENSE BLOCK' marker "
1085 "within 50 lines of the start of the "
1086 "license block on line %d. Aborting."
1087 % (beginline["linenum"]+1))
1088 # As per "test_relicense_trailing_orig_code_modified.pl", a
1089 # paragraph starting with:
1090 # This Original Code has been modified
1091 # is deemed to be OUTside the license block, i.e. it is not
1092 # replaced for relicensing.
1093 if orig_code_modified_pattern.search(lines[i]):
1094 non_lic_content_in_same_comment_block = 1
1095 # The endline is the first non-blank line before this one.
1096 endlinenum = i-1
1097 while 1:
1098 line = lines[endlinenum]
1099 match = lic_middle_pattern.search(line)
1100 if not match:
1101 raise RelicError("Line did not match lic_middle_pattern "
1102 "unexpectedly: %r" % line)
1103 if match.group("content").strip(): # non-empty line
1104 break
1105 endlinenum -= 1
1106 break
1107 else:
1108 raise RelicError("Could not find license comment block end "
1109 "line in '%s'." % filename)
1111 # Test case: test_relicense_separated_license_comment_blocks.pl
1112 # It is possible that a separate comment block immediately following
1113 # the license block we just parsed should be included in the license
1114 # block.
1115 if (not non_lic_content_in_same_comment_block
1116 and len(comment_delims) == 1): # only do this for line-style comments
1117 lic_indicators = [
1118 re.compile("^The content of this file are subject to", re.I),
1119 re.compile("^Software distributed under the License", re.I),
1120 re.compile("^The Original Code is", re.I),
1121 re.compile("^The Initial Developer", re.I),
1122 re.compile("^Contributor", re.I),
1123 re.compile("^Alternatively, the content of this file", re.I),
1125 comment_line_pattern = re.compile("""
1126 ^(?P<prefix>%s|)
1127 (?P<space>\s*)
1128 (?P<content>.*)
1129 (?P<suffix>%s|)\s*?$
1130 """ % (prefix_pattern, suffix_pattern),
1131 re.VERBOSE)
1132 i = endlinenum
1133 while i+1 < len(lines):
1134 i += 1; line = lines[i]
1135 comment_index = line.find(comment_delims[0])
1136 if comment_index != -1:
1137 content = line[:comment_index].strip()
1138 comment = line[comment_index+len(comment_delims[0]):].strip()
1139 else:
1140 content = line.strip()
1141 comment = ""
1142 if content: # if non-comment content, then skip out
1143 break
1144 if not comment:
1145 continue
1146 for indicator in lic_indicators:
1147 if indicator.search(comment):
1148 # include this paragraph in the lic block
1149 while i < len(lines):
1150 i += 1; line = lines[i]
1151 if not line.strip().startswith(comment_delims[0]):
1152 break
1153 if not line.strip()[len(comment_delims[0]):]:
1154 break
1155 endlinenum = i-1
1156 break
1157 else:
1158 break # this is a non-lic-related comment
1160 # Get the end-line data.
1161 if non_lic_content_in_same_comment_block:
1162 lic_end_pattern = re.compile(
1163 "^(?P<prefix>%s)(?P<space>\s*).*?\s*?$"
1164 % prefix_pattern)
1165 else:
1166 lic_end_pattern = re.compile(
1167 "^(?P<prefix>%s)(?P<space>\s*).*?(?P<suffix>%s)\s*?$"
1168 % (prefix_pattern, suffix_pattern))
1169 match = lic_end_pattern.match(lines[endlinenum])
1170 if match:
1171 endline = {
1172 "content": lines[endlinenum],
1173 "linenum": endlinenum,
1174 "prefix": match.group("prefix"),
1175 "space": match.group("space"),
1176 "suffix": match.groupdict().get("suffix", ""),
1178 else:
1179 raise RelicError("license block end line did not match: line='%s', "
1180 "pattern='%s'"
1181 % (lines[endlinenum], lic_end_pattern.pattern))
1182 log.debug("endline dict: %s", endline)
1183 lic_info["last_suffix"] = endline["suffix"]
1184 log.info("suffix for last line: '%s'", endline["suffix"])
1185 lic_info["end_line"] = endline["linenum"]
1186 log.info("license lines: %d-%d", beginline["linenum"], endline["linenum"])
1188 # So at this point we have the beginline, secondline, and endline
1189 # dicts describing and bounding the license block.
1191 # - gather embedded lic data
1192 # As described here:
1193 # http://www.mozilla.org/MPL/relicensing-faq.html#new-license
1194 # we have to parse out the following possible fields:
1195 # original_code_is
1196 # original_code_date
1197 # initial_developer
1198 # initial_copyright_date
1199 # contributors
1200 lic_line_pattern = re.compile( # regex to parse out the line _body_
1201 "^(?P<prefix>%s)(?P<space>\s*)(?P<body>.*?)(?P<suffix>%s|)\s*?$"
1202 % (prefix_pattern, suffix_pattern))
1204 original_code_is = None
1205 original_code_date = None
1206 # Parse out the "The Original Code is ..." paragraph _content_.
1207 paragraph = ""
1208 in_paragraph = 0
1209 for i in range(beginline["linenum"], endline["linenum"]+1):
1210 body = lic_line_pattern.match(lines[i]).group("body")
1211 if (not in_paragraph and body.startswith("The Original Code is")):
1212 in_paragraph = 1
1213 if in_paragraph:
1214 if not body.strip(): # i.e. a blank line, end of paragraph
1215 break
1216 # ensure one space btwn lines
1217 if paragraph: paragraph = paragraph.rstrip() + " "
1218 paragraph += body
1219 if paragraph:
1220 pattern1 = re.compile('^The Original Code is (.*), released (.*)\.')
1221 match = pattern1.search(paragraph)
1222 if match:
1223 original_code_is = match.group(1)
1224 original_code_date = match.group(2)
1225 else:
1226 pattern2 = re.compile('^The Original Code is (.*?)\.?$')
1227 match = pattern2.search(paragraph)
1228 if match:
1229 original_code_is = match.group(1)
1230 else:
1231 raise RelicError(
1232 "%s: 'The Original Code is' paragraph did not match the "
1233 "expected patterns. paragraph=\n\t%r\n"
1234 "pattern1=\n\t%r\npattern2=\n\t%r"
1235 % (filename, paragraph, pattern1.pattern, pattern2.pattern))
1236 lic_info["original_code_is"] = original_code_is
1237 lic_info["original_code_date"] = original_code_date
1238 log.info("original code is: %s", original_code_is)
1239 log.info("original_code_date: %s", original_code_date)
1241 initial_developer = None
1242 initial_copyright_date = None
1243 # Parse out the "The Initial Developer..." paragraph _content_.
1244 paragraph = ""
1245 in_paragraph = 0
1246 for i in range(beginline["linenum"], endline["linenum"]+1):
1247 body = lic_line_pattern.match(lines[i]).group("body")
1248 if (not in_paragraph and
1249 (body.startswith("The Initial Developer of") or
1250 body.startswith("The Initial Developers of"))):
1251 in_paragraph = 1
1252 if in_paragraph:
1253 if not body.strip(): # i.e. a blank line, end of paragraph
1254 # Catch the possible case where there is an empty line
1255 # but the paragraph picks up on the next line with
1256 # "Portions created by"
1257 # (test_relicense_no_period_after_origcodeis.cpp).
1258 try:
1259 nextlinebody = lic_line_pattern.match(lines[i+1]).group("body")
1260 except:
1261 nextlinebody = ""
1262 if not nextlinebody.startswith("Portions created by"):
1263 break
1264 # ensure one space btwn lines
1265 if paragraph: paragraph = paragraph.rstrip() + " "
1266 paragraph += body
1267 if paragraph:
1268 pattern = re.compile("""^
1269 The\ Initial\ Developers?\ of\
1270 (the\ Original\ Code\ (is\ |are\ |is\.)|this\ code\ under\ the\ [MN]PL\ (is|are)\ )
1271 (?P<developer>.*?)
1272 \.? # maybe a trailing period
1274 \s+Portions\ created\ by\ .*?
1275 are\ Copyright\ \(C\)\[?\ (?P<date>[\d-]+)
1276 .*? # maybe a trailing period
1277 (\s+All\ Rights\ Reserved\.)?
1279 $""", re.VERBOSE)
1280 match = pattern.search(paragraph)
1281 if not match:
1282 raise RelicError(
1283 "%s: 'This Initial Developer' paragraph did not match the "
1284 "expected pattern. paragraph=\n\t%r\npattern=\n\t%s"
1285 % (filename, paragraph, pattern.pattern))
1286 initial_developer = match.group("developer")
1287 initial_copyright_date = match.group("date")
1288 lic_info["initial_developer"] = initial_developer
1289 lic_info["initial_copyright_date"] = initial_copyright_date
1290 log.info("initial developer paragraph: %r", paragraph)
1291 log.info("initial developer: %r", initial_developer)
1292 log.info("initial copyright date: %r", initial_copyright_date)
1294 contributors = []
1295 normal_leading_space = None
1296 in_contributors_block = 0
1297 contrib_end = endline["linenum"]
1298 # If line-style comment, include the last line in the block in the
1299 # range we examine; if block-style comment, we only allow it if the
1300 # comment-block doesn't end on the endline. On top of these
1301 # conditions we don't search the last line if it includes the
1302 # special end-of-license marker.
1303 if len(comment_delims) == 1 or not endline["suffix"]:
1304 if endline["content"].find("END LICENSE BLOCK") == -1:
1305 contrib_end += 1
1306 for i in range(beginline["linenum"], contrib_end):
1307 match = lic_line_pattern.match(lines[i])
1308 body = match.group("body")
1309 space = match.group("space").replace('\t', ' '*8)
1310 if not in_contributors_block \
1311 and body.startswith("Contributor"):
1312 in_contributors_block = 1
1313 normal_leading_space = space
1314 # Try to pickup "foo@bar.org" as a contributor for a
1315 # possible line like this:
1316 # Contributor(s): foo@bar.org
1317 pivot = body.find(':')
1318 if pivot != -1:
1319 remainder = body[pivot+1:].strip()
1320 if remainder:
1321 contributors.append(remainder)
1322 elif in_contributors_block:
1323 if not body.strip():
1324 # i.e. a blank line, end of paragraph
1325 #XXX:BUG This condition causes the latter two
1326 # contributor lines to be lost from, e.g.,
1327 # test/x_thead_align_center.xml.
1328 break
1329 if len(space) <= len(normal_leading_space):
1330 # A line in the "Contributor(s) paragraph is not
1331 # indented. This is considered an error. Likely this is
1332 # a (not indented) contributor, but it might also be the
1333 # start of another paragraph (i.e. no blank line
1334 # terminating the "Contributor(s):" paragraph). We could
1335 # just error out here, but this is very common in the
1336 # Moz tree (~500) so lets try to deal with it.
1337 # - Heuristic #1: if the line contains what looks like
1338 # an email address then this it is a contributor.
1339 # - Heuristic #2 (to accomodate js/rhino): if the line
1340 # looks like just a person's name.
1341 # Otherwise, error out.
1342 words = body.split()
1343 if '@' in body:
1344 lic_info["unindented_contributor_lines"] = 1
1345 elif (2 <= len(words) <= 3 and
1346 words == [word[0].upper()+word[1:] for word in words]):
1347 # Try to accept the following names:
1348 # Norris Boyd
1349 # Mike McCabe
1350 # George C. Scott
1351 lic_info["unindented_contributor_lines"] = 1
1352 else:
1353 raise RelicError("This line is part of the "
1354 "'Contributor(s):' paragraph but (1) is not indented "
1355 "and (2) does not look like it contains an email "
1356 "address: %s:%s: %r" % (filename, i, lines[i]))
1357 contributors.append(body.strip())
1358 log.info("contributors: %s", contributors)
1359 lic_info["contributors"] = contributors
1361 ## Optimization: The only content in the remain license block lines
1362 ## (i.e. after the contributors block) should really be the GPL/LGPL
1363 ## or nothing. Trapping this will avoid losing the latter two
1364 ## contributor lines in test/x_thead_align_center.xml.
1365 #gpl_lgpl_lines = _g_trilicense_parts["gpl/lgpl"].splitlines(0)
1366 #gpl_lgpl = " ".join(gpl_lgpl_lines)
1367 #for i in range(i, endline["linenum"]):
1368 # match = lic_line_pattern.match(lines[i])
1369 # body = match.group("body")
1370 # space = match.group("space").replace('\t', ' '*8)
1371 # if not body.strip():
1372 # continue
1373 # #XXX This test is no robust enough to use.
1374 # if (gpl_lgpl.find(body) == -1 and
1375 # body.find(gpl_lgpl) == -1):
1376 # print "QQQ: bogus following text: %r" % body
1378 return lic_info
1381 def _report_on_file(path, (results, switch_to_mpl, show_initial, quick, _errors)):
1382 log.debug("_report_on_file(path='%s', results)", path)
1383 output = path + "\n"
1384 lic_info = {}
1386 if _is_binary(path):
1387 output += "... binary, skipping this file\n"
1388 else:
1389 try:
1390 lic_info = _get_license_info(path, show_initial, quick)
1391 except RelicError, ex:
1392 return _relicensing_error(ex, path, _errors)
1394 if log.isEnabledFor(logging.DEBUG):
1395 pprint.pprint(lic_info)
1396 parts = lic_info["parts"]
1397 if not parts:
1398 output += "... no license found\n"
1399 elif "unknown" in parts:
1400 output += "... unknown license (possibly) found\n"
1401 elif ((parts == ["block_begin", "mpl", "gpl", "lgpl", "block_end"] or
1402 parts == ["block_begin", "npl", "gpl", "lgpl", "block_end"]) and
1403 not lic_info.get("unindented_contributor_lines")):
1404 if (switch_to_mpl and
1405 parts == ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
1406 output += "... %s found (looks complete, but is not MPL)"\
1407 % "/".join(parts) + "\n"
1408 else:
1409 output += "... %s found (looks complete)"\
1410 % "/".join(parts) + "\n"
1411 else:
1412 output += "... %s found" % "/".join(parts) + "\n"
1414 if not quick:
1415 if "begin_line" in lic_info and "end_line" in lic_info:
1416 output += "... license block lines: %(begin_line)d-%(end_line)d"\
1417 % lic_info + "\n"
1418 if "original_code_is" in lic_info:
1419 output += "... original code is: %(original_code_is)s"\
1420 % lic_info + "\n"
1421 if "original_code_date" in lic_info:
1422 output += "... original code date: %(original_code_date)s"\
1423 % lic_info + "\n"
1424 if "initial_developer" in lic_info:
1425 output += "... initial developer: %(initial_developer)s"\
1426 % lic_info + "\n"
1427 if "initial_copyright_date" in lic_info:
1428 output += "... initial copyright date: %(initial_copyright_date)s"\
1429 % lic_info + "\n"
1430 if "contributors" in lic_info:
1431 output += "... contributors: %s"\
1432 % ", ".join(lic_info["contributors"]) + "\n"
1433 if lic_info.get("unindented_contributor_lines"):
1434 output += "... one or more contributor lines were not indented properly"\
1435 + "\n"
1437 if show_initial:
1438 if "initial_developer" in lic_info:
1439 print lic_info["initial_developer"]
1440 else:
1441 print output;
1443 def _gather_info_on_file(path, (results, _errors)):
1444 log.debug("_gather_info_on_file(path='%s', results)", path)
1445 # Skip binary files.
1446 try:
1447 if _is_binary(path):
1448 log.debug("Skipping binary file '%s'.", path)
1449 return
1450 except Exception, ex:
1451 return _relicensing_error(
1452 "error determining if file is binary: %s" % ex,
1453 path, _errors)
1455 try:
1456 results[path] = _get_license_info(path)
1457 except RelicError, ex:
1458 return _relicensing_error(ex, path, _errors, 1)
1461 def _make_backup_path(path):
1462 for n in range(100):
1463 backup_path = "%s~%d" % (path, n)
1464 if not os.path.exists(backup_path):
1465 return backup_path
1466 raise RelicError("Could not find an unused backup path for '%s'." % path)
1469 def _relicensing_error(err, path, cache=None, quiet=0):
1470 """Handle an error during relicensing.
1472 "err" may be an error string or an exception instance.
1473 "path" is the path of the file on which this error occured.
1474 "cache" is a mapping of path to errors on which errors may be
1475 stored for later reporting.
1476 "quiet" optionally allows one to silence the stdout output when
1477 force is in effect.
1479 If the --force option is in-effect then errors may be remembered and
1480 processing continues, rather than halting the whole process.
1482 if _g_force:
1483 if not quiet:
1484 print "...", err
1485 if cache is not None:
1486 cache[path] = err
1487 elif isinstance(err, Exception):
1488 raise
1489 else:
1490 raise RelicError("%s: %s" % (path, err))
1493 def _get_comment_delim_sets(filename):
1494 comment_delims = None
1495 if os.path.splitext(filename)[1] == ".in":
1496 # "<foo>.in" is generally a precursor for a filetype
1497 # identifiable without the ".in". Drop it.
1498 xfilename = os.path.splitext(filename)[0]
1499 else:
1500 xfilename = filename
1501 # special cases for some basenames
1502 basename = os.path.basename(xfilename)
1503 try:
1504 comment_delims = _g_basename_to_comment_info[basename]
1505 except KeyError:
1506 pass
1507 if not comment_delims: # use the file extension
1508 ext = os.path.splitext(xfilename)[1]
1509 try:
1510 comment_delims = _g_ext_to_comment_info[ext]
1511 except KeyError:
1512 pass
1513 if not comment_delims: # try to use the shebang line, if any
1514 fin = open(filename, 'r')
1515 firstline = fin.readline()
1516 fin.close()
1517 if firstline.startswith("#!"):
1518 for pattern, cds in _g_shebang_pattern_to_comment_info:
1519 if pattern.match(firstline):
1520 comment_delims = cds
1521 break
1522 if not comment_delims:
1523 raise RelicError("%s: couldn't determine file type (and "
1524 "comment delimiter info) from basename '%s' or "
1525 "extension '%s'): you may need to add to "
1526 "'_g_basename_to_comment_info', "
1527 "'_g_ext_to_comment_info', "
1528 "'_g_shebang_pattern_to_comment_info' "
1529 "or one of the '_g_skip_*' globals"
1530 % (filename, basename, ext))
1531 return comment_delims
1534 def _relicense_file(original_path,
1535 (fallback_initial_copyright_date,
1536 fallback_initial_developer,
1537 fallback_original_code_is,
1538 fallback_original_code_date,
1539 switch_to_mpl,
1540 backup,
1541 results,
1542 force_relicensing,
1543 _errors)):
1544 """Relicense the given file.
1546 "original_path" is the file to relicense
1547 "fallback_initial_copyright_date"
1548 "fallback_initial_developer"
1549 "fallback_original_code_is"
1550 "fallback_original_code_date"
1551 User-specified fallback values to use for the new license
1552 block if they cannot be found in the original.
1553 "switch_to_mpl" is a boolean indicating if an NPL-based license
1554 should be converted to MPL.
1555 "backup" (optional, default false) is a boolean indicating if
1556 backups should be made
1557 "results" is a dictionary in which to store statistics and errors.
1558 See relicense() for schema.
1559 "force_relicensing" is a boolean indicating if relicensing
1560 should be done even if the license block looks complete.
1561 "_errors" is a dictionary on which errors are reported
1562 (keyed by file path) when the force option is in effect.
1564 The function does not return anything.
1566 log.debug("_relicense_file(original_path='%s')", original_path)
1567 print original_path
1569 # Ensure can access file.
1570 if not os.access(original_path, os.R_OK|os.W_OK):
1571 return _relicensing_error("cannot access", original_path, _errors)
1572 else:
1573 log.info("have read/write access")
1575 # Skip binary files.
1576 try:
1577 if _is_binary(original_path):
1578 print "... binary, skipping this file"
1579 results["binary"] += 1
1580 return
1581 except Exception, ex:
1582 return _relicensing_error(
1583 "error determining if file is binary: %s" % ex,
1584 original_path, _errors)
1586 try:
1587 lic_info = _get_license_info(original_path, 0)
1588 except RelicError, ex:
1589 return _relicensing_error(ex, original_path, _errors)
1591 # Load fallback info if necessary.
1592 if not lic_info.get("initial_copyright_date"):
1593 lic_info["initial_copyright_date"] = fallback_initial_copyright_date
1594 if not lic_info.get("initial_developer"):
1595 lic_info["initial_developer"] = fallback_initial_developer
1596 if not lic_info.get("original_code_is"):
1597 lic_info["original_code_is"] = fallback_original_code_is
1598 if not lic_info.get("original_code_date"):
1599 lic_info["original_code_date"] = fallback_original_code_date
1601 # Return/abort if cannot or do not need to re-license.
1602 parts = lic_info["parts"]
1603 if not parts:
1604 results["no license"] += 1
1605 print "... no license found, skipping this file"
1606 return
1607 elif "unknown" in parts:
1608 return _relicensing_error("unknown license (possibly) found",
1609 original_path, _errors)
1610 elif parts.count("block_begin") > 1: # sanity check
1611 return _relicensing_error(
1612 "'BEGIN LICENSE BLOCK' delimiter found more than once",
1613 original_path, _errors)
1614 elif parts.count("block_end") > 1: # sanity check
1615 return _relicensing_error(
1616 "'END LICENSE BLOCK' delimiter found more than once",
1617 original_path, _errors)
1618 elif not lic_info["initial_developer"]:
1619 return _relicensing_error(
1620 "no 'Initial Developer' section was found -- use "
1621 "the -i option to specify your own",
1622 original_path, _errors)
1623 elif not lic_info["initial_copyright_date"]:
1624 return _relicensing_error(
1625 "no initial copyright year was found -- use "
1626 "the -y option to specify your own",
1627 original_path, _errors)
1628 elif not lic_info["original_code_is"]:
1629 return _relicensing_error(
1630 "no 'Original Code is' section was found -- use "
1631 "the -o option to specify your own",
1632 original_path, _errors)
1633 elif ((parts == ["block_begin", "mpl", "gpl", "lgpl", "block_end"] or
1634 parts == ["block_begin", "npl", "gpl", "lgpl", "block_end"]) and
1635 not lic_info.get("unindented_contributor_lines")):
1636 #XXX Should add an option to relicense anyway because matching
1637 # is not super-strict. E.g. nsWidgetFactory.cpp.
1638 if (switch_to_mpl and
1639 parts == ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
1640 print "... %s found (looks complete, but is not MPL)"\
1641 % "/".join(parts)
1642 elif force_relicensing:
1643 print "... %s found (looks complete, but forcing relicensing)"\
1644 % "/".join(parts)
1645 else:
1646 results["good"] += 1
1647 print "... %s found (looks complete), nothing to do"\
1648 % "/".join(parts)
1649 return
1651 # We need to re-license this file.
1652 print "... %s found, need to relicense" % "/".join(parts)
1653 if lic_info["original_code_is"]:
1654 print "... original code is: %(original_code_is)s" % lic_info
1655 if lic_info["original_code_date"]:
1656 print "... original code date: %(original_code_date)s" % lic_info
1657 if lic_info["initial_developer"]:
1658 print "... initial developer: %(initial_developer)s" % lic_info
1659 if lic_info["initial_copyright_date"]:
1660 print "... initial copyright date: %(initial_copyright_date)s" % lic_info
1661 if lic_info["contributors"]:
1662 print "... contributors: %s" % ", ".join(lic_info["contributors"])
1664 # Put the license block together.
1665 # - build up the license block from the appropriate parts
1666 trilicense = ""
1667 if (not switch_to_mpl) and ( "npl" in parts ):
1668 trilicense_name = "NPL/GPL/LGPL"
1669 trilicense += _g_trilicense_parts["npl"]
1670 else:
1671 trilicense_name = "MPL/GPL/LGPL"
1672 trilicense += _g_trilicense_parts["mpl"]
1673 print "... replacing lines %d-%d with %s tri-license"\
1674 % (lic_info["begin_line"], lic_info["end_line"], trilicense_name)
1675 if lic_info["original_code_is"] is not None:
1676 if lic_info["original_code_date"] is not None:
1677 trilicense += _g_trilicense_parts["original_code_is_with_date"] % lic_info
1678 else:
1679 trilicense += _g_trilicense_parts["original_code_is"] % lic_info
1680 #else:
1681 # raise RelicError("Gerv, how should the new license block handle no "
1682 # "'Originial Code is...' information? --TM")
1683 if (lic_info["initial_developer"] is not None
1684 and lic_info["initial_copyright_date"] is not None):
1685 trilicense += _g_trilicense_parts["initial_developer"] % lic_info
1686 #else:
1687 # raise RelicError("Gerv, how should the new license block handle no "
1688 # "'Initial Developer is...' information? --TM")
1689 if lic_info["contributors"]:
1690 contributors = " " + "\n ".join(lic_info["contributors"]) + "\n"
1691 else:
1692 contributors = ""
1693 trilicense += _g_trilicense_parts["contributors"] % contributors
1694 if trilicense_name == "NPL/GPL/LGPL":
1695 trilicense += _g_trilicense_parts["gpl/lgpl for npl"]
1696 else: # trilicense_name == "MPL/GPL/LGPL"
1697 trilicense += _g_trilicense_parts["gpl/lgpl for mpl"]
1699 # get fallback comment subsequent prefix
1700 fallback_prefix = _get_comment_delim_sets(original_path)
1702 # - add the comment delimiters
1703 lines = trilicense.splitlines()
1704 for i in range(len(lines)):
1705 if i == 0:
1706 prefix = lic_info["first_prefix"]
1707 else:
1708 if lic_info["subsequent_prefix"]:
1709 prefix = lic_info["subsequent_prefix"]
1710 else:
1711 prefix = fallback_prefix[0][1]
1712 if lines[i]:
1713 if len(lic_info["comment_delims"]) == 0:
1714 lines[i] = prefix + lines[i]
1715 else:
1716 lines[i] = prefix + ' ' + lines[i]
1717 else: # don't add trailing whitespace
1718 lines[i] = prefix
1719 if lic_info["last_suffix"]: # don't add that ' ' if there is no suffix
1720 lines[-1] += ' ' + lic_info["last_suffix"]
1721 for i in range(len(lines)): lines[i] += '\n'
1722 trilicense_lines = lines
1724 ##### uncomment to debug license block
1725 # pprint.pprint(lines)
1726 # return
1728 # Skip out now if doing a dry-run.
1729 if _g_dry_run:
1730 results["relicensed"] += 1
1731 return
1733 # Make a backup.
1734 if backup:
1735 backup_path = _make_backup_path(original_path)
1736 print "... backing up to '%s'" % backup_path
1737 try:
1738 shutil.copy(original_path, backup_path)
1739 except EnvironmentError, ex:
1740 return _relicensing_error(ex, original_path, _errors)
1742 # Re-license the file.
1743 try:
1744 fin = open(original_path, "r")
1745 try:
1746 lines = fin.readlines()
1747 finally:
1748 fin.close()
1750 lines[lic_info["begin_line"]:lic_info["end_line"]+1] = trilicense_lines
1752 fout = open(original_path, "w")
1753 try:
1754 fout.write(''.join(lines))
1755 finally:
1756 fout.close()
1757 results["relicensed"] += 1
1758 print "... done relicensing '%s'" % original_path
1759 except:
1760 if backup:
1761 print "... error relicensing, restoring original"
1762 if os.path.exists(original_path):
1763 os.remove(original_path)
1764 os.rename(backup_path, original_path)
1765 else:
1766 print "... error relicensing, file may be corrupted"
1767 # fallback to type_ for string exceptions
1768 type_, value, tb = sys.exc_info()
1769 return _relicensing_error(value or type_,
1770 original_path, _errors)
1773 def _add_license_to_file(original_path,
1774 (initial_copyright_date,
1775 initial_developer,
1776 original_code_is,
1777 original_code_date,
1778 backup,
1779 results,
1780 _errors)):
1781 """Relicense the given file.
1783 "original_path" is the file to relicense
1784 "initial_copyright_date"
1785 "initial_developer"
1786 "original_code_is"
1787 "original_code_date"
1788 User-specified values to use for the new license. All but
1789 "original_code_date" are required.
1790 "backup" (optional, default false) is a boolean indicating if
1791 backups should be made
1792 "results" is a dictionary in which to store statistics and errors.
1793 See relicense() for schema.
1794 "_errors" is a dictionary on which errors are reported
1795 (keyed by file path) when the force option is in effect.
1797 The function does not return anything.
1799 log.debug("_add_license_to_file(original_path='%s')", original_path)
1800 print original_path
1802 # Ensure can access file.
1803 if not os.access(original_path, os.R_OK|os.W_OK):
1804 return _relicensing_error("cannot access", original_path, _errors)
1805 else:
1806 log.info("have read/write access")
1808 # Skip binary files.
1809 try:
1810 if _is_binary(original_path):
1811 print "... binary, skipping this file"
1812 results["binary"] += 1
1813 return
1814 except Exception, ex:
1815 return _relicensing_error(
1816 "error determining if file is binary: %s" % ex,
1817 original_path, _errors)
1819 try:
1820 lic_info = _get_license_info(original_path, 0)
1821 except RelicError, ex:
1822 return _relicensing_error(ex, original_path, _errors)
1824 # Return/abort if cannot or do not need to re-license.
1825 parts = lic_info["parts"]
1826 if lic_info["parts"]: # has a license
1827 results["license"] += 1
1828 print "... license found, skipping this file"
1829 return
1830 #... else we need to add a license to this file.
1831 print "... no license found, need to add one"
1833 # Load license info.
1834 lic_info["initial_developer"] = initial_developer
1835 print "... initial developer: %(initial_developer)s" % lic_info
1836 lic_info["initial_copyright_date"] = initial_copyright_date
1837 print "... initial copyright date: %(initial_copyright_date)s" % lic_info
1838 lic_info["original_code_is"] = original_code_is
1839 print "... original code is: %(original_code_is)s" % lic_info
1840 if original_code_date:
1841 lic_info["original_code_date"] = original_code_date
1842 print "... original code date: %(original_code_date)s" % lic_info
1843 else:
1844 lic_info["original_code_date"] = None
1846 # Determine what line we can start putting the license block on.
1847 # Typically this would be line 0, but for the following exceptions:
1848 # - Shebang (#!) lines
1849 # - Emacs local variables line:
1850 # /* -*- Mode: C++; ... -*- */
1851 # This line does not HAVE to be first, but that seems to be a
1852 # trend, so might as well honour it.
1853 # - XML magic "number": <?xml version="2.0" ... ?>
1854 # where "..." might include newlines
1855 startline = 0
1856 try:
1857 comment_delim_sets = _get_comment_delim_sets(original_path)
1858 except RelicError, ex:
1859 return _relicensing_error(ex, original_path, _errors, 1)
1860 fin = open(original_path, 'r')
1861 try:
1862 lines = fin.readlines()
1863 finally:
1864 fin.close()
1865 # If this is an XML file, advance past the magic number tag.
1866 if lines and lines[0].find("<?xml") != -1:
1867 line = lines[0]
1868 if (line.find('encoding="utf-8"') != -1
1869 and line.startswith("\xef\xbb\xbf")):
1870 # remove UTF-8 BOM
1871 # Note: this is hardly robust Unicode XML handling :)
1872 line = line[3:]
1873 if line.startswith("<?xml"):
1874 end_index = lines[startline].find("?>")
1875 while startline < len(lines):
1876 startline += 1
1877 if end_index != -1: # found end of tag
1878 break
1879 # Note: this does not catch something like this:
1880 # <?xml version="2.0"?> <?stylesheet ...
1881 # ...?>
1882 # but that is just crazy.
1883 # else, advance past a possible shebang line.
1884 else:
1885 for comment_delims in comment_delim_sets:
1886 if (len(comment_delims) == 1 and comment_delims[0] == "#"
1887 and lines[0].startswith("#!")):
1888 startline += 1
1889 # Advance past an Emacs local variable line.
1890 comment_delims = None
1891 if lines[startline].find("-*-") != -1:
1892 for comment_delims in comment_delim_sets:
1893 if lines[startline].find(comment_delims[0]) != -1:
1894 break
1895 else:
1896 # We were hoping to be able to determine which of the set of
1897 # possible commenting styles was in use by finding the
1898 # comment start token on the same line as the -*-
1899 # Emacs-modeline signifier, but could not. This likely means
1900 # that this file uses a block-style comment but the block
1901 # doesn't start on the same line. Fallback to the
1902 # block-style comment delimiter set, if there is one.
1903 for comment_delims in comment_delim_sets:
1904 if len(comment_delims) == 3:
1905 break
1906 else:
1907 comment_delims = comment_delim_sets[0]
1909 if len(comment_delims) == 1: # line-style comments
1910 startline += 1
1911 else: # block-style comments
1912 in_comment = 0
1913 while startline < len(lines):
1914 line = lines[startline]
1915 linepos = 0
1916 while linepos < len(line):
1917 if not in_comment:
1918 i = line.find(comment_delims[0], linepos)
1919 if i == -1:
1920 break
1921 else:
1922 in_comment = 1
1923 linepos = i+1
1924 else:
1925 i = line.find(comment_delims[-1], linepos)
1926 if i == -1:
1927 break
1928 else:
1929 in_comment = 0
1930 linepos = i+1
1931 startline += 1
1932 if not in_comment:
1933 break
1935 # Put the license block together.
1936 # - build up the license block from the appropriate parts
1937 trilicense_name = "MPL/GPL/LGPL"
1938 print "... adding %s tri-license starting at line %s (zero-based)"\
1939 % (trilicense_name, startline)
1940 trilicense = _g_trilicense_parts["mpl"]
1941 if lic_info["original_code_date"] is not None:
1942 trilicense += _g_trilicense_parts["original_code_is_with_date"] % lic_info
1943 else:
1944 trilicense += _g_trilicense_parts["original_code_is"] % lic_info
1945 trilicense += _g_trilicense_parts["initial_developer"] % lic_info
1946 if lic_info.get("contributors"):
1947 contributors = " " + "\n ".join(lic_info["contributors"]) + "\n"
1948 else:
1949 contributors = ""
1950 trilicense += _g_trilicense_parts["contributors"] % contributors
1951 trilicense += _g_trilicense_parts["gpl/lgpl for mpl"]
1952 # - add the comment delimiters
1953 if comment_delims is None:
1954 for comment_delims in comment_delim_sets:
1955 if lines[startline].find(comment_delims[0]) != -1:
1956 break
1957 elif len(comment_delims) == 3 and lines[startline].find(comment_delims[1]) != -1:
1958 break
1959 else:
1960 # We were hoping to be able to determine which of the set of
1961 # possible commenting styles was in use by finding the
1962 # comment start token on the same line as the -*-
1963 # Emacs-modeline signifier, but could not. This likely means
1964 # that this file uses a block-style comment but the block
1965 # doesn't start on the same line. Fallback to the
1966 # block-style comment delimiter set, if there is one.
1967 for comment_delims in comment_delim_sets:
1968 if len(comment_delims) == 3:
1969 break
1970 else:
1971 comment_delims = comment_delim_sets[0]
1972 print "comment delims were none: %r" % comment_delims
1973 t_lines = trilicense.splitlines()
1974 if len(comment_delims) == 1: # line-style comments
1975 for i in range(len(t_lines)):
1976 if t_lines[i]:
1977 t_lines[i] = comment_delims[0] + ' ' + t_lines[i]
1978 else: # don't add trailing whitespace
1979 t_lines[i] = comment_delims[0]
1980 else: # block-style comments
1981 if t_lines[0]:
1982 t_lines[0] = comment_delims[0] + ' ' + t_lines[0]
1983 else: # don't add trailing whitespace
1984 t_lines[0] = comment_delims[0]
1985 for i in range(1, len(t_lines)):
1986 if t_lines[i]:
1987 t_lines[i] = comment_delims[1] + ' ' + t_lines[i]
1988 else: # don't add trailing whitespace
1989 t_lines[i] = comment_delims[1]
1990 t_lines[-1] += ' ' + comment_delims[-1]
1991 for i in range(len(t_lines)): t_lines[i] += '\n'
1992 t_lines[-1] += '\n' # add a blank line at end of lic block
1993 trilicense_lines = t_lines
1994 #pprint.pprint(t_lines)
1996 # Skip out now if doing a dry-run.
1997 if _g_dry_run:
1998 results["added"] += 1
1999 return
2001 # Make a backup.
2002 if backup:
2003 backup_path = _make_backup_path(original_path)
2004 print "... backing up to '%s'" % backup_path
2005 try:
2006 shutil.copy(original_path, backup_path)
2007 except EnvironmentError, ex:
2008 return _relicensing_error(ex, original_path, _errors)
2010 # Add the license to the file.
2011 try:
2012 lines[startline:startline] = trilicense_lines
2014 fout = open(original_path, "w")
2015 try:
2016 fout.write(''.join(lines))
2017 finally:
2018 fout.close()
2019 results["added"] += 1
2020 print "... done adding license to '%s'" % original_path
2021 except:
2022 if backup:
2023 print "... error adding license, restoring original"
2024 if os.path.exists(original_path):
2025 os.remove(original_path)
2026 os.rename(backup_path, original_path)
2027 else:
2028 print "... error adding license, file may be corrupted"
2029 # fallback to type_ for string exceptions
2030 type_, value, tb = sys.exc_info()
2031 return _relicensing_error(value or type_,
2032 original_path, _errors)
2035 def _traverse_dir((file_handler, results), dirname, names):
2036 """os.path.walk target to traverse the give dir"""
2037 log.debug("_traverse_dir((file_handler, results), dirname='%s', "
2038 "names=%s)", dirname, names)
2039 # Iterate over names backwards because may modify it in-place.
2040 # Modifying it in-place ensures that removed entries are not
2041 # traversed by os.path.walk.
2042 for i in range(len(names)-1, -1, -1):
2043 path = os.path.join(dirname, names[i])
2044 if os.path.isdir(path):
2045 if _should_skip_dir(path):
2046 del names[i]
2047 continue
2048 if os.path.isfile(path):
2049 if _should_skip_file(path):
2050 del names[i]
2051 continue
2052 if file_handler is not None:
2053 file_handler(path, results)
2055 def _traverse(paths, file_handler, arg):
2056 """Traverse the given path(s) and call the given callback for each.
2058 "paths" is either a list of files or directories, or it is an
2059 input stream with a path on each line.
2060 "file_handler" is a callable to be called on each file traversed.
2061 It is called with the following signature:
2062 file_handler(path, arg)
2063 "arg" is some object passed to each callback. This is useful for
2064 recording results.
2066 This method takes care of skipping files and directories that should
2067 be skipped according to .cvsignore files and the configured skip
2068 paths. This method does not return anything.
2070 log.debug("_traverse(paths=%s, file_handler=%s, arg=%s)",
2071 paths, file_handler, arg)
2073 for path in paths:
2074 if path[-1] == "\n": path = path[:-1] # chomp if 'paths' is a stream
2075 if not os.path.exists(path):
2076 log.warn("'%s' does not exist, skipping", path)
2077 elif os.path.isfile(path):
2078 if _should_skip_file(path):
2079 continue
2080 if file_handler is not None:
2081 file_handler(path, arg)
2082 elif os.path.isdir(path):
2083 if _should_skip_dir(path):
2084 continue
2085 os.path.walk(path, _traverse_dir, (file_handler, arg))
2086 else:
2087 raise RelicError("unexpected path type '%s'" % path)
2091 #---- public routines
2093 def relicense(paths,
2094 fallback_initial_copyright_date=None,
2095 fallback_initial_developer=None,
2096 fallback_original_code_is=None,
2097 fallback_original_code_date=None,
2098 switch_to_mpl=0,
2099 backup=0,
2100 force_relicensing=0,
2101 _errors=None):
2102 """Relicense the given file(s) (or files in the given dir).
2104 "paths" is either a list of files or directories, or it is an
2105 input stream with a path on each line.
2106 "fallback_initial_copyright_date"
2107 "fallback_initial_developer"
2108 "fallback_original_code_is"
2109 "fallback_original_code_date"
2110 User-specified fallback values to use for the new license
2111 block if they cannot be found in the original.
2112 "switch_to_mpl" (optional, default false) is a boolean
2113 indicating if an NPL-based license should be converted to
2114 MPL.
2115 "backup" (optional, default false)is a boolean indicating if
2116 backups should be made
2117 "force_relicensing" (option, default false) is a boolean
2118 indicating if relicensing should happen even if the license
2119 block looks complete
2120 "_errors" (optional) is a dictionary on which errors are reported
2121 (keyed by file path) when the force option is in effect.
2123 This method does not return anything. It will raise RelicError if
2124 there is a problem. Note that OSError/IOError may also be raised.
2126 log.debug("relicense(paths=%s, backup=%r)", paths, backup)
2127 results = {
2128 "relicensed": 0,
2129 "no license": 0,
2130 "good": 0,
2131 "binary": 0,
2133 _traverse(paths, _relicense_file,
2134 (fallback_initial_copyright_date,
2135 fallback_initial_developer,
2136 fallback_original_code_is,
2137 fallback_original_code_date,
2138 switch_to_mpl,
2139 backup,
2140 results,
2141 force_relicensing,
2142 _errors))
2143 print
2144 print "--------------------- Summary of Results ------------------------"
2145 print "Files skipped b/c they are binary:", results["binary"]
2146 print "Files skipped b/c they already had proper license:", results["good"]
2147 print "Files skipped b/c they had no license:", results["no license"]
2148 if _g_dry_run:
2149 print "Files re-licensed: %d (dry-run)" % results["relicensed"]
2150 else:
2151 print "Files re-licensed:", results["relicensed"]
2152 print "-----------------------------------------------------------------"
2155 def addlicense(paths,
2156 initial_copyright_date,
2157 initial_developer,
2158 original_code_is,
2159 original_code_date=None,
2160 backup=0,
2161 _errors=None):
2162 """Add a license to those of the given file(s) that do not appear to
2163 have one.
2165 "paths" is either a list of files or directories, or it is an
2166 input stream with a path on each line.
2167 "initial_copyright_date"
2168 "initial_developer"
2169 "original_code_is"
2170 "original_code_date"
2171 User-specified values to use for the new license block. All
2172 but "original_code_date" are required.
2173 "backup" (optional, default false) is a boolean indicating if
2174 backups should be made
2175 "_errors" (optional) is a dictionary on which errors are reported
2176 (keyed by file path) when the force option is in effect.
2178 This method does not return anything. It will raise RelicError if
2179 there is a problem. Note that OSError/IOError may also be raised.
2181 log.debug("addlicense(paths=%s, backup=%r)", paths, backup)
2182 if not initial_copyright_date:
2183 raise RelicError("no Initial Developer copyright year was "
2184 "specified -- use the -y option")
2185 if not initial_developer:
2186 raise RelicError("no 'Initial Developer' section was specified "
2187 "-- use the -i option")
2188 if not original_code_is:
2189 raise RelicError("no 'Original Code is' section was specified "
2190 "-- use the -o option")
2192 results = {
2193 "added": 0,
2194 "license": 0,
2195 "binary": 0,
2197 _traverse(paths, _add_license_to_file,
2198 (initial_copyright_date,
2199 initial_developer,
2200 original_code_is,
2201 original_code_date,
2202 backup,
2203 results,
2204 _errors))
2205 print
2206 print "--------------------- Summary of Results ------------------------"
2207 print "Files skipped b/c they are binary:", results["binary"]
2208 print "Files skipped b/c they already had a license:", results["license"]
2209 if _g_dry_run:
2210 print "Files to which a license was added: %d (dry-run)" % results["added"]
2211 else:
2212 print "Files to which a license was added: %d" % results["added"]
2213 print "-----------------------------------------------------------------"
2216 def report(paths, switch_to_mpl=0, show_initial=1, quick=0, _errors=None):
2217 """Report on the existing licenses in the given file(s).
2219 "paths" is either a list of files or directories, or it is an
2220 input stream with a path on each line.
2221 "switch_to_mpl" (optional, default false) is a boolean
2222 indicating if an NPL-based license should be converted to
2223 MPL.
2224 "show_initial" (optional, default true) is a boolean indicating
2225 if the initial developer should be displayed for each file.
2226 "quick" (optional, default false) is a boolean indicating if only
2227 basic license checking should be applied.
2228 "_errors" (optional) is a dictionary on which errors are reported
2229 (keyed by file path) when the force option is in effect.
2231 This method does not return anything. It will raise RelicError if
2232 there is a problem.
2234 log.debug("report(paths=%s)", paths)
2235 results = {}
2236 _traverse(paths,\
2237 _report_on_file,\
2238 (results, switch_to_mpl, show_initial, quick, _errors))
2241 def statistics(paths, extended=0, quick=0, _errors=None):
2242 """Show a summary table of licenses in files in the given path(s).
2244 "paths" is either a list of files or directories, or it is an
2245 input stream with a path on each line.
2246 "extended" (optional) is a boolean indicating if extended
2247 statistics should be shown
2248 "quick" (optional) is a boolean indicating if quick scan mode should
2249 be enabled.
2250 "_errors" (optional) is a dictionary on which errors are reported
2251 (keyed by file path) when the force option is in effect.
2253 This method does not return anything.
2255 #XXX Info gathering returns a lot more info now. We might be able to
2256 # output more interesting stats.
2257 log.debug("statistics(paths=%s, extended=%s)",
2258 paths, extended)
2259 results = {}
2260 _traverse(paths, _gather_info_on_file, (results, _errors))
2262 # Process results and print out some stats.
2263 stats = {
2264 # <lic type>: [<number of hits>, [<files>...]]
2266 for file, info in results.items():
2267 lic_types = [p for p in info["parts"]
2268 if p not in ["block_begin", "block_end"]]
2269 if not lic_types:
2270 name = "<none found>"
2271 elif "unknown" in lic_types:
2272 name = "<unknown license>"
2273 # Distinguish between complete mpl/gpl/lgpl (i.e. with the block
2274 # begin and end tokens) and incomplete mpl/gpl/lgpl. Likewise
2275 # NPL.
2276 elif (info["parts"] == ["block_begin", "mpl", "gpl", "lgpl", "block_end"]):
2277 name = "mpl/gpl/lgpl (complete)"
2278 elif (info["parts"] == ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
2279 name = "npl/gpl/lgpl (complete)"
2280 else:
2281 name = "/".join(lic_types)
2282 if name not in stats: stats[name] = [0, []]
2283 stats[name][0] += 1
2284 stats[name][1].append(file)
2286 statslist = [(hits, name, files) for name, (hits, files) in stats.items()]
2287 statslist.sort() # sort by number of hits
2288 statslist.reverse() # most common first
2289 print "Summary of Licenses in Files"
2290 print "============================"
2291 print " Number Percent License"
2292 print "------- -------- -----------"
2293 # 115 55.55% npl/gpl
2294 for hits, name, files in statslist:
2295 print "%7d %7.2f%% %s"\
2296 % (hits, (hits*100.0/len(results)), name)
2297 #XXX Removed for now because I am not clear if this is at all
2298 # useful.
2299 #if extended:
2300 # hits_per_ext = {}
2301 # for file in files:
2302 # ext = os.path.splitext(file)[1]
2303 # if ext not in hits_per_ext: hits_per_ext[ext] = 0
2304 # hits_per_ext[ext] += 1
2305 # hits_per_ext_list = [(h, e) for e, h in hits_per_ext.items()]
2306 # hits_per_ext_list.sort()
2307 # hits_per_ext_list.reverse()
2308 # for ext_hits, ext in hits_per_ext_list:
2309 # if not ext: ext = "<no extension>"
2310 # print " %7d %s" % (ext_hits, ext)
2311 print "----------------------------"
2312 print "%7d files processed" % len(results)
2314 # Print some other interesting statistics.
2315 no_original_code_is = []
2316 no_initial_developer = []
2317 unindented_contributor_lines = []
2318 for file, info in results.items():
2319 if "original_code_is" in info and info["original_code_is"] is None:
2320 no_original_code_is.append(file)
2321 if "initial_developer" in info and info["initial_developer"] is None:
2322 no_initial_developer.append(file)
2323 if info.get("unindented_contributor_lines"):
2324 unindented_contributor_lines.append(file)
2325 print
2326 print "Licensed files with no 'Initial Developer...' info: %d" % len(no_initial_developer)
2327 if extended:
2328 for f in no_initial_developer:
2329 print " %s" % f
2330 print "Licensed files with no 'Original Code is...' info: %d" % len(no_original_code_is)
2331 if extended:
2332 for f in no_original_code_is:
2333 print " %s" % f
2334 print "Licensed files with improperly indented 'Contributor(s):' line(s): %d" % len(unindented_contributor_lines)
2335 if extended:
2336 for f in unindented_contributor_lines:
2337 print " %s" % f
2338 if extended:
2339 for hits, name, files in statslist:
2340 print "Files in license category '%s'" % name
2341 sortedFiles = files[:]
2342 sortedFiles.sort()
2343 for file in sortedFiles:
2344 print " %s" % file
2345 print
2349 #---- mainline
2351 def main(argv):
2352 try:
2353 opts, args = getopt.getopt(argv[1:], "VvadhqfML:sxry:i:o:D:ARI",
2354 ["version", "verbose", "all", "help", "debug",
2355 "dry-run", "force", "MPL", "license=",
2356 "statistics", "relicense", "backup", "add", "defaults",
2357 "force-relicense", "initial-developers", "quick"])
2358 except getopt.GetoptError, ex:
2359 log.error(str(ex))
2360 log.error("Try `%s --help'.", argv[0])
2361 return 2
2363 debug = 0
2364 mode = "report"
2365 extended = 0
2366 backup = 0
2367 quick = 0
2368 force_relicensing = 0
2369 fallback_initial_copyright_date = None
2370 fallback_initial_developer = None
2371 fallback_original_code_is = None
2372 fallback_original_code_date = None
2373 switch_to_mpl = 0
2374 show_initial = 0
2375 for opt, optarg in opts:
2376 if opt in ("-h", "--help"):
2377 sys.stdout.write(__doc__)
2378 return
2379 elif opt in ("-V", "--version"):
2380 ver = '.'.join([str(part) for part in _version_])
2381 print "relic %s" % ver
2382 return
2383 elif opt in ("-v", "--verbose"):
2384 log.setLevel(logging.INFO)
2385 elif opt in ("-a", "--all"):
2386 global _g_check_all
2387 _g_check_all = 1
2388 elif opt in ("-M", "--MPL"):
2389 switch_to_mpl = 1
2390 elif opt in ("-d", "--debug"):
2391 log.setLevel(logging.DEBUG)
2392 debug = 1
2393 elif opt in ("--dry-run",):
2394 global _g_dry_run
2395 _g_dry_run = 1
2396 elif opt in ("-f", "--force"):
2397 global _g_force
2398 _g_force = 1
2399 elif opt in ("-s", "--statistics"):
2400 mode = "statistics"
2401 elif opt in ("-x",):
2402 extended = 1
2403 elif opt in ("-r", "--relicense"):
2404 mode = "relicense"
2405 elif opt in ("-R", "--force-relicense"):
2406 mode = "relicense"
2407 force_relicensing = 1
2408 elif opt in ("-A", "--add"):
2409 mode = "add"
2410 elif opt == "--backup":
2411 backup = 1
2412 elif opt == "-y":
2413 fallback_initial_copyright_date = optarg
2414 elif opt == "-i":
2415 fallback_initial_developer = optarg
2416 elif opt == "-o":
2417 fallback_original_code_is = optarg
2418 elif opt == "-D":
2419 fallback_original_code_date = optarg
2420 elif opt in ("-I", "--initial-developers"):
2421 show_initial = 1
2422 elif opt == "--defaults":
2423 fallback_original_code_is = "mozilla.org Code"
2424 fallback_initial_copyright_date = "2001"
2425 fallback_initial_developer = "Netscape Communications Corporation"
2426 elif opt in ("-q", "--quick"):
2427 quick = 1
2429 try:
2430 # Prepare the input.
2431 if not args:
2432 log.debug("no given files, trying stdin")
2433 paths = sys.stdin
2434 else:
2435 paths = args
2437 # Invoke the requested action.
2438 _errors = {}
2439 if mode == "relicense":
2440 relicense(paths,
2441 fallback_initial_copyright_date,
2442 fallback_initial_developer,
2443 fallback_original_code_is,
2444 fallback_original_code_date,
2445 switch_to_mpl,
2446 backup,
2447 force_relicensing,
2448 _errors=_errors)
2449 elif mode == "statistics":
2450 statistics(paths, extended, quick, _errors=_errors)
2451 elif mode == "report":
2452 report(paths, switch_to_mpl, show_initial, quick, _errors=_errors)
2453 elif mode == "add":
2454 addlicense(paths,
2455 fallback_initial_copyright_date,
2456 fallback_initial_developer,
2457 fallback_original_code_is,
2458 fallback_original_code_date,
2459 backup,
2460 _errors=_errors)
2461 else:
2462 raise RelicError("unexpected mode: '%s'" % mode)
2464 # Report any delayed errors.
2465 if _errors:
2466 print
2467 print "=================== Summary of Errors ==========================="
2468 print "Files with processing errors:", len(_errors)
2469 print "================================================================="
2470 for file, error in _errors.items():
2471 print "%s: %s" % (file, error)
2472 print
2473 print "================================================================="
2474 except RelicError, ex:
2475 log.error(str(ex) +
2476 " (the --force option can be used to skip problematic "
2477 "files and continue processing rather than aborting)")
2478 if debug:
2479 print
2480 import traceback
2481 traceback.print_exception(*sys.exc_info())
2482 return 1
2483 except KeyboardInterrupt:
2484 pass
2487 if __name__ == "__main__":
2488 sys.exit( main(sys.argv) )