tools/relic/relic

   1 #!/usr/bin/python
   2
   3 # ***** BEGIN LICENSE BLOCK *****
   4 # Version: MPL 1.1/GPL 2.0/LGPL 2.1
   5 #
   6 # The contents of this file are subject to the Mozilla Public License Version
   7 # 1.1 (the "License"); you may not use this file except in compliance with
   8 # the License. You may obtain a copy of the License at
   9 # http://www.mozilla.org/MPL/
  10 #
  11 # Software distributed under the License is distributed on an "AS IS" basis,
  12 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  13 # for the specific language governing rights and limitations under the
  14 # License.
  15 #
  16 # The Original Code is the relic relicensing tool.
  17 #
  18 # The Initial Developer of the Original Code is
  19 # Trent Mick <TrentM@ActiveState.com>.
  20 # Portions created by the Initial Developer are Copyright (C) 2003-2005
  21 # the Initial Developer. All Rights Reserved.
  22 #
  23 # Contributor(s):
  24 #   Gervase Markham <gerv@gerv.net>
  25 #   Patrick Fey <bugzilla@nachtarbeiter.net>
  26 #
  27 # Alternatively, the contents of this file may be used under the terms of
  28 # either the GNU General Public License Version 2 or later (the "GPL"), or
  29 # the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30 # in which case the provisions of the GPL or the LGPL are applicable instead
  31 # of those above. If you wish to allow use of your version of this file only
  32 # under the terms of either the GPL or the LGPL, and not to allow others to
  33 # use your version of this file under the terms of the MPL, indicate your
  34 # decision by deleting the provisions above and replace them with the notice
  35 # and other provisions required by the GPL or the LGPL. If you do not delete
  36 # the provisions above, a recipient may use your version of this file under
  37 # the terms of any one of the MPL, the GPL or the LGPL.
  38 #
  39 # ***** END LICENSE BLOCK *****
  40
  41 # Adapted from the 'lick' and 'ripl' Python scripts. (See:
  42 # <http://bugzilla.mozilla.org/show_bug.cgi?id=98089>)
  43
  44 """
  45     relic - RE-LICense a given file, set of files, or directory of files
  46             from the Mozilla source tree
  47
  48     Usage:
  49         relic [options...] [files...]
  50         relic [options...] < files...
  51
  52     Options to Select Mode (use one):
  53         <none>              List the licenses in each file.
  54         -s, --statistics    Should a summary table of licenses in each file.
  55                             The -x, --extended option may be added to show
  56                             some additional detail to the stats.
  57         -r, --relicense     Modify the given files to include to
  58                             appropriate Mozilla license, where
  59                             "appropriate" is either the NPL/GPL/LPGL
  60                             tri-license if was already under the NPL or
  61                             the MPL/LPGL/GPL license in all other cases.
  62         -R, --force-relicense
  63                             Relicenses files (as -r|--relicense), but
  64                             does NOT skip files that already appear to
  65                             have a complete license.
  66         -A, --add           Add a license to files that do not appear to
  67                             have one.
  68         -I, --initial-developers
  69                             Display initial developer for each file.
  70
  71     General Options:
  72         -h, --help          dump this help and exit
  73         -V, --version       dump this script's version and exit
  74         -v, --verbose       verbose output
  75         -d, --debug         more verbose output
  76
  77         -f, --force         Continue processing after an error. (Errors
  78                             are summarized at end.)
  79         -q, --quick         Quick scanning. Use only basic license checks
  80                             (only use in report mode).
  81         -M, --MPL           Replace NPL licenses with MPL ones.
  82         -a, --all           Check all files (only skip CVS directories).
  83         --dry-run           Go through motions but don't actually change
  84                             any files.
  85         --backup            Make backups of changes files with
  86                             relicensing. Backup filenames are the
  87                             original filename suffixed with a ~# where
  88                             "#" is the lowest number to avoid a file
  89                             conflict.
  90         -o <orig_code_is>   Provide fallback value for the "Original
  91                             Code is" block.
  92         -D <orig_code_date> Provide fallback value for the date
  93                             that is part of the "Original Code is" block.
  94         -i <initial_dev>    Provide fallback value for the "Initial
  95                             Developer of the Original Code is" block.
  96         -y <year>           Provide fallback value for "Initial
  97                             Developer" copyright year.
  98         --defaults          Use the following default fallback values:
  99                              original_code_is: "mozilla.org Code"
 100                              initial_copyright_date: "2001"
 101                              initial_developer: "Netscape Communications
 102                                                  Corporation"
 103                             Note: the "Original Code" date is generally
 104                             not required, so a default is not included
 105                             here.
 106
 107     Examples:
 108         # List license in files under mozilla/js/src.
 109         relic mozilla/js/src        # list licenses in files
 110         relic -s mozilla/js/src     # show summary stats on licenses
 111         relic -r mozilla/js/src     # re-license files
 112
 113 """
 114
 115 import os
 116 import sys
 117 import re
 118 import getopt
 119 import pprint
 120 import shutil
 121
 122
 123 class RelicError(Exception):
 124     pass
 125
 126
 127 #---- setup logging
 128
 129 try:
 130     # This package will be std in Python 2.3, but many Python 2.2
 131     # installation will not have it.
 132     import logging
 133     logging.basicConfig()
 134 except ImportError:
 135     # Local fallback logging module.
 136     try:
 137         import _logging as logging
 138     except ImportError:
 139         sys.stderr.write("Your Python installation does not have the logging "
 140                          "package, nor could the fallback _logging module be "
 141                          "found. One of the two is required to run this "
 142                          "script.\n\n")
 143         raise
 144
 145 log = logging.getLogger("relic")
 146
 147 #---- globals
 148
 149 _version_ = (0, 7, 2)
 150
 151 # When processing files, 'relic' skips files and directories according
 152 # to these settings. Note: files identified in .cvsignore files are also
 153 # skipped.
 154 _g_skip_exts = [".mdp", ".order", ".dsp", ".dsw", ".uf"]
 155 _g_skip_file_basenames = [
 156     # Used by CVS (and this script)
 157     ".cvsignore",
 158
 159     # GPL with autoconf exception
 160     "config.guess",
 161     "config.sub",
 162
 163     # Auto-generated from other files
 164     "configure",
 165
 166     # license and readme files
 167     "license",
 168     "readme",
 169     "copyright",
 170     "LICENSE-MPL",
 171     "MPL-1.1.txt",
 172 ]
 173 _g_skip_files = [
 174     # TODO: update with MPL block - or CVS remove (check history)
 175     "tools/wizards/templates/licenses/MPL/lic.mak",
 176     "tools/wizards/templates/licenses/MPL/lic.pl",
 177
 178 ###########################################################################
 179 # Everything in _g_skip_files below this line needs no further work.
 180 ###########################################################################
 181
 182     # Files containing copies of licence text which confuses the script
 183     "LICENSE",
 184     "js2/COPYING",
 185     "security/svrcore/LICENSE",
 186     "extensions/xmlterm/doc/MPL",
 187     "gfx/cairo/cairo/COPYING-LGPL-2.1",
 188     "gfx/cairo/cairo/COPYING-MPL-1.1",
 189
 190     # Files containing global licensing information
 191     "toolkit/content/license.html",
 192
 193     # Ben Bucksch - files are tri-licensed with an extra clause.
 194     "netwerk/streamconv/converters/mozTXTToHTMLConv.cpp",
 195     "netwerk/streamconv/converters/mozTXTToHTMLConv.h",
 196     "netwerk/streamconv/public/mozITXTToHTMLConv.idl",
 197
 198     # GPLed build tools
 199     "config/preprocessor.pl",
 200     "intl/uconv/tools/parse-mozilla-encoding-table.pl",
 201     "intl/uconv/tools/gen-big5hkscs-2001-mozilla.pl",
 202     "js2/missing",
 203
 204     # Files which the script doesn't handle well. All have been relicensed
 205     # manually.
 206     "xpinstall/wizard/windows/builder/readme.txt",
 207     "xpfe/bootstrap/icons/windows/readme.txt",
 208     "embedding/qa/testembed/README.TXT",
 209     "security/nss/lib/freebl/ecl/README.FP",
 210     "nsprpub/pkg/linux/sun-nspr.spec",
 211     "security/nss/pkg/linux/sun-nss.spec",
 212     "security/jss/pkg/linux/sun-jss.spec",
 213     "security/nss/lib/freebl/mpi/utils/README",
 214     "security/nss/lib/freebl/ecl/README",
 215     "security/nss/lib/freebl/mpi/README",
 216     "lib/mac/UserInterface/Tables/TableClasses.doc",
 217     "parser/htmlparser/tests/html/bug23680.html",
 218     "security/nss/lib/freebl/mpi/montmulfv9.s",
 219     "tools/performance/pageload/base/lxr.mozilla.org/index.html",
 220     "testing/performance/win32/page_load_test/" +\
 221                                             "base/lxr.mozilla.org/index.html",
 222     "testing/performance/win32/page_load_test/" +\
 223                                     "base/lxr.mozilla.org/20001028.html.orig",
 224
 225     # Not sure what to do with this...
 226     "gfx/cairo/stdint.diff",
 227
 228     # GPL with autoconf exception (same license as files distributed with)
 229     "build/autoconf/codeset.m4",
 230     "toolkit/airbag/airbag/autotools/depcomp",
 231     "toolkit/airbag/airbag/autotools/missing",
 232     "toolkit/airbag/airbag/autotools/ltmain.sh",
 233     "js/tamarin/pcre/ltmain.sh",
 234     "security/svrcore/compile",
 235     "security/svrcore/ltmain.sh",
 236     "security/svrcore/missing",
 237     "security/svrcore/depcomp",
 238     "security/svrcore/aclocal.m4",
 239
 240     # Public domain or equivalent
 241     "nsprpub/config/nspr.m4",
 242     "toolkit/airbag/airbag/aclocal.m4",
 243     "security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s",
 244
 245     # GSSAPI has BSD-like licence requiring some attribution
 246     "extensions/auth/gssapi.h",
 247
 248     # This script
 249     "tools/relic/relic",
 250 ]
 251 _g_skip_dir_basenames = [
 252     "CVS",
 253 ]
 254
 255 _g_skip_dir_basenames_cvs_only = [
 256     "CVS",
 257 ]
 258
 259 # Complete path from mozilla dir to a dir to skip.
 260 _g_skip_dirs = [
 261      # Test files for this script, which cause it to crash!
 262     "tools/relic/test",
 263
 264     # License template files (TODO: this directory may disappear)
 265     "tools/wizards/templates/licenses",
 266
 267     # As per the "New Original Source Files" section of:
 268     #   http://www.mozilla.org/MPL/license-policy.html
 269     #   with obsolete or now-relicensed directories removed
 270     "apache",             # Obsolete mod_gzip code
 271     "cck",                # mkaply's baby; not core code anyway.
 272     "dbm",
 273     "js/rhino",           # Currently MPL/GPL - may end up BSD
 274     "webtools",           # Various MPLed webtools
 275
 276     # These could be done, but no-one's clamouring for it, and it's a hassle
 277     # sorting it all out, so let sleeping dogs lie.
 278     "msgsdk",
 279     "java",
 280     "privacy",
 281
 282     # These have their own BSD-like license
 283     "jpeg",
 284     "modules/libimg/mng",
 285
 286     # The following are not supposed to be relicensed, but they do have a
 287     # few files in we care about (like makefiles)
 288     "modules/libimg/png",
 289     "modules/zlib",
 290     "gc/boehm",
 291     "other-licenses",
 292
 293     # Copy of GPLed tool
 294     "tools/buildbot",
 295
 296     # Other directories we want to exclude
 297     "embedding/tests",     # Agreed as BSD
 298     "calendar/libical",    # LGPL/MPL
 299     "gfx/cairo/cairo/src", # LGPL/MPL
 300 ]
 301
 302 _g_basename_to_comment_info = {
 303     "configure": (["dnl"], ),
 304
 305     "Makefile": (["#"], ),
 306     "makefile": (["#"], ),
 307     "nfspwd": (["#"], ),
 308     "typemap": (["#"], ),
 309     "xmplflt.conf": (["#"], ),
 310     "ldapfriendly": (["#"], ),
 311     "ldaptemplates.conf": (["#"], ),
 312     "ldapsearchprefs.conf": (["#"], ),
 313     "ldapfilter.conf": (["#"], ),
 314     "README.configure": (["#"], ),
 315     "Options.txt": (["#"], ),
 316     "fdsetsize.txt": (["#"], ),
 317     "prototype": (["#"], ),
 318     "prototype_i386": (["#"], ),
 319     "prototype3_i386": (["#"], ),
 320     "prototype_com": (["#"], ),
 321     "prototype3_com": (["#"], ),
 322     "prototype_sparc": (["#"], ),
 323     "prototype3_sparc": (["#"], ),
 324     "nglayout.mac": (["#"], ),
 325     "pkgdepend": (["#"], ),
 326     "Maketests": (["#"], ),
 327     "depend": (["#"], ),
 328     "csh-aliases": (["#"], ),
 329     "csh-env": (["#"], ),
 330     ".cshrc": (["#"], ),
 331     "MANIFEST": (["#"], ),
 332     "mozconfig": (["#"], ),
 333     "makecommon": (["#"], ),
 334     "bld_awk_pkginfo": (["#"], ),
 335     "prototype_i86pc": (["#"], ),
 336     "pkgdepend_5_6": (["#"], ),
 337     "awk_pkginfo-i386": (["#"], ),
 338     "awk_pkginfo-sparc": (["#"], ),
 339     "pkgdepend_64bit": (["#"], ),
 340     "WIN32": (["#"], ),
 341     "Makefile.linux": (["#"], ),
 342
 343     "README": ([""], ["#"]),
 344     "copyright": ([""], ),
 345
 346     "xptcstubs_asm_ppc_darwin.s.m4": (["/*", " *", "*/"], ),
 347     "xptcstubs_asm_mips.s.m4": (["/*", " *", "*/"], ),
 348
 349     "nsIDocCharsetTest.txt": (["<!--", "   -", "-->"], ),
 350     "nsIFontListTest.txt": (["<!--", "   -", "-->"], ),
 351     "ComponentListTest.txt": (["<!--", "   -", "-->"], ),
 352     "nsIWebBrowserPersistTest1.txt": (["<!--", "   -", "-->"], ),
 353     "nsIWebBrowserPersistTest2.txt": (["<!--", "   -", "-->"], ),
 354     "nsIWebBrowserPersistTest3.txt": (["<!--", "   -", "-->"], ),
 355     "plugins.txt": (["<!--", "   -", "-->"], ),
 356     "NsISHistoryTestCase1.txt": (["<!--", "   -", "-->"], ),
 357     "EmbedSmokeTest.txt": (["<!--", "   -", "-->"], ),
 358
 359     "lineterm_LICENSE": (["/*", " *", "*/"], ),
 360     "XMLterm_LICENSE": (["/*", " *", "*/"], ),
 361     "BrowserView.cpp.mod": (["/*", " *", "*/"], ),
 362     "header_template": (["/*", " *", "*/"], ),
 363     "cpp_template": (["/*", " *", "*/"], ),
 364
 365     "abcFormat470.txt": (["//"], ),
 366     "opcodes.tbl": (["//"], ),
 367 }
 368
 369 _g_ext_to_comment_info = {
 370     ".txt":   (["##", "#", ], ["#"]),
 371     ".TXT":   (["##", "#", ]),
 372
 373     ".doc":   (["", ]),
 374     ".build": (["", ]),
 375     ".1st":   (["", ]),
 376     ".lsm":   (["", ]),
 377     ".FP":    (["", ]),
 378     ".spec":  (["", ]),
 379
 380     ".CPP":    (["/*", " *", "*/"], ),
 381     ".cpp":    (["/*", " *", "*/"], ),
 382     ".H":      (["/*", " *", "*/"], ),
 383     ".h":      (["/*", " *", "*/"], ),
 384     ".hxx":    (["/*", " *", "*/"], ),
 385     ".c":      (["/*", " *", "*/"], ),
 386     ".css":    (["/*", " *", "*/"], ['#']),
 387     ".js":     (["/*", " *", "*/"], ['#']),
 388     ".idl":    (["/*", " *", "*/"], ),
 389     ".ut":     (["/*", " *", "*/"], ),
 390     ".rc":     (["/*", " *", "*/"], ),
 391     ".rc2":    (["/*", " *", "*/"], ),
 392     ".RC":     (["/*", " *", "*/"], ),
 393     ".Prefix": (["/*", " *", "*/"], ),
 394     ".prefix": (["/*", " *", "*/"], ),
 395     ".cfg":    (["/*", " *", "*/"], ["#"]),
 396     ".cp":     (["/*", " *", "*/"], ),
 397     ".cs":     (["/*", " *", "*/"], ),
 398     ".java":   (["/*", " *", "*/"], ),
 399     ".jst":    (["/*", " *", "*/"], ),
 400     ".tbl":    (["/*", " *", "*/"], ),
 401     ".tab":    (["/*", " *", "*/"], ),
 402     ".cc":     (["/*", " *", "*/"], ),
 403     ".msg":    (["/*", " *", "*/"], ),
 404     ".y":      (["/*", " *", "*/"], ),
 405     ".r":      (["/*", " *", "*/"], ),
 406     ".mm":     (["/*", " *", "*/"], ),
 407     ".x-ccmap":(["/*", " *", "*/"], ),
 408     ".ccmap":  (["/*", " *", "*/"], ),
 409     ".sql":    (["/*", " *", "*/"], ),
 410     ".pch++":  (["/*", " *", "*/"], ),
 411     ".xpm":    (["/*", " *", "*/"], ),
 412     ".uih":    (["/*", " *", "*/"], ),
 413     ".uil":    (["/*", " *", "*/"], ),
 414     ".ccmap":  (["/*", " *", "*/"], ),
 415     ".map":    (["/*", " *", "*/"], ),
 416     ".win98":  (["/*", " *", "*/"], ),
 417     ".php":    (["/*", " *", "*/"], ),
 418     ".m":      (["/*", " *", "*/"], ),
 419     ".jnot":   (["/*", " *", "*/"], ),
 420     ".l":      (["/*", " *", "*/"], ),
 421     ".htp":    (["/*", " *", "*/"], ),
 422     ".xs":     (["/*", " *", "*/"], ),
 423     ".as":     (["/*", " *", "*/"], ),
 424     ".api":    (["/*", " *", "*/"], ['#']),
 425
 426     ".html": (["<!--", "   -", "-->"], ["#"]),
 427     ".xml":  (["<!--", "   -", "-->"], ["#"]),
 428     ".xbl":  (["<!--", "   -", "-->"], ["#"]),
 429     ".xsl":  (["<!--", "   -", "-->"], ),
 430     ".xul":  (["<!--", "   -", "-->"], ["#"]),
 431     ".dtd":  (["<!--", "   -", "-->"], ["#"]),
 432     ".rdf":  (["<!--", "   -", "-->"], ["#"]),
 433     ".htm":  (["<!--", "   -", "-->"], ),
 434     ".out":  (["<!--", "   -", "-->"], ),
 435     ".resx": (["<!--", "   -", "-->"], ),
 436     ".bl":   (["<!--", "   -", "-->"], ),
 437     ".xif":  (["<!--", "   -", "-->"], ),
 438     ".xhtml":(["<!--", "   -", "-->"], ["#"]),
 439
 440     ".inc":  (["<!--", "   -", "-->"],
 441               ["#"],
 442               ["@!"],
 443               ["/*", " *", "*/"]),
 444
 445     ".properties": (["#"], ),
 446     ".win":        (["#"], ),
 447     ".dsp":        (["#"], ),
 448     ".exp":        (["#"], ),
 449     ".mk":         (["#"], ),
 450     ".mn":         (["#"], ),
 451     ".mak":        (["#"], ),
 452     ".MAK":        (["#"], ),
 453     ".perl":       (["#"], ),
 454     ".pl":         (["#"], ),
 455     ".PL":         (["#"], ),
 456     ".sh":         (["#"], ),
 457     ".dsw":        (["#"], ),
 458     ".cgi":        (["#"], ),
 459     ".pm":         (["#"], ),
 460     ".pod":        (["#"], ),
 461     ".src":        (["#"], ),
 462     ".csh":        (["#"], ),
 463     ".DLLs":       (["#"], ),
 464     ".ksh":        (["#"], ),
 465     ".toc":        (["#"], ),
 466     ".am":         (["#"], ),
 467     ".df":         (["#"], ),
 468     ".client":     (["#"], ),
 469     ".ref":        (["#"], ), # all of them "Makefile.ref"
 470     ".ldif":       (["#"], ),
 471     ".ex":         (["#"], ),
 472     ".reg":        (["#"], ),
 473     ".py":         (["#"], ),
 474     ".adb":        (["#"], ),
 475     ".dtksh":      (["#"], ),
 476     ".pkg":        (["#"], ),
 477     ".et":         (["#"], ),
 478     ".stub":       (["#"], ),
 479     ".nss":        (["#"], ),
 480     ".os2":        (["#"], ),
 481     ".Solaris":    (["#"], ),
 482     ".rep":        (["#"], ),
 483     ".NSS":        (["#"], ),
 484     ".server":     (["#"], ),
 485     ".awk":        (["#"], ),
 486     ".targ":       (["#"], ),
 487     ".gnuplot":    (["#"], ),
 488     ".bash":       (["#"], ),
 489     ".tmpl":       (["#"], ),
 490     ".com":        (["#"], ),
 491     ".dat":        (["#"], ),
 492     ".rpm":        (["#"], ),
 493     ".nsi":        (["#"], ),
 494     ".nsh":        (["#"], ),
 495     ".template":   (["#"], ),
 496     ".ldkd":       (["#"], ),
 497     ".ldku":       (["#"], ),
 498     ".arm":        (["#"], ),
 499
 500     ".tdf":  ([";"], ),
 501     ".def":  ([";+#"], [";"]),
 502     ".DEF":  ([";+#"], [";"]),
 503     ".ini":  ([";"], ),
 504     ".it":   ([";"], ),
 505     ".lisp": ([";;;"], ),
 506
 507     ".cmd": (["rem"], ["REM"]),
 508     ".bat": (["rem"], ["REM"]),
 509
 510     ".tex":  (["%"], ),
 511     ".texi": (["%"], ),
 512
 513     ".m4":  (["dnl"], ),
 514
 515     ".asm": ([";"], ),
 516     ".vbs": (["'"], ),
 517     ".il":  (["!"], ),
 518     ".ad":  (["!"], ),
 519
 520     ".script": (["(*", " *", "*)"], ),
 521
 522     ".3x":  (['.\\"'], ),
 523
 524     # What a mess...
 525     ".s": (["#"], ["//"], ["/*", " *", "*/"], ["!"], [";"], ["/"]),
 526 }
 527 _g_shebang_pattern_to_comment_info = [
 528     (re.compile(ur'\A#!.*/bin/(ba)?sh.*$', re.IGNORECASE), (["#"], )),
 529     (re.compile(ur'\A#!.*perl.*$', re.IGNORECASE), (["#"], )),
 530     (re.compile(ur'\A#!.*php.*$', re.IGNORECASE), (["#"], )),
 531     (re.compile(ur'\A#!.*python.*$', re.IGNORECASE), (["#"], )),
 532     (re.compile(ur'\A#!.*ruby.*$', re.IGNORECASE), (["#"], )),
 533     (re.compile(ur'\A#!.*tclsh.*$', re.IGNORECASE), (["#"], )),
 534     (re.compile(ur'\A#!.*wish.*$', re.IGNORECASE), (["#"], )),
 535     (re.compile(ur'\A#!.*expect.*$', re.IGNORECASE), (["#"], )),
 536 ]
 537
 538
 539 _g_trilicense_parts = {
 540     "mpl": """\
 541 ***** BEGIN LICENSE BLOCK *****
 542 Version: MPL 1.1/GPL 2.0/LGPL 2.1
 543
 544 The contents of this file are subject to the Mozilla Public License Version
 545 1.1 (the "License"); you may not use this file except in compliance with
 546 the License. You may obtain a copy of the License at
 547 http://www.mozilla.org/MPL/
 548
 549 Software distributed under the License is distributed on an "AS IS" basis,
 550 WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 551 for the specific language governing rights and limitations under the
 552 License.
 553
 554 """,
 555     "npl": """\
 556 ***** BEGIN LICENSE BLOCK *****
 557 Version: NPL 1.1/GPL 2.0/LGPL 2.1
 558
 559 The contents of this file are subject to the Netscape Public License
 560 Version 1.1 (the "License"); you may not use this file except in
 561 compliance with the License. You may obtain a copy of the License at
 562 http://www.mozilla.org/NPL/
 563
 564 Software distributed under the License is distributed on an "AS IS" basis,
 565 WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 566 for the specific language governing rights and limitations under the
 567 License.
 568
 569 """,
 570     "original_code_is": """\
 571 The Original Code is %(original_code_is)s.
 572
 573 """,
 574     "original_code_is_with_date": """\
 575 The Original Code is %(original_code_is)s, released
 576 %(original_code_date)s.
 577
 578 """,
 579     "initial_developer": """\
 580 The Initial Developer of the Original Code is
 581 %(initial_developer)s.
 582 Portions created by the Initial Developer are Copyright (C) %(initial_copyright_date)s
 583 the Initial Developer. All Rights Reserved.
 584
 585 """,
 586     "contributors": """\
 587 Contributor(s):
 588 %s
 589 """,
 590     "gpl for mpl": """\
 591 Alternatively, the contents of this file may be used under the terms of
 592 the GNU General Public License Version 2 or later (the "GPL"), in which
 593 case the provisions of the GPL are applicable instead of those above. If
 594 you wish to allow use of your version of this file only under the terms of
 595 the GPL and not to allow others to use your version of this file under the
 596 MPL, indicate your decision by deleting the provisions above and replacing
 597 them with the notice and other provisions required by the GPL. If you do
 598 not delete the provisions above, a recipient may use your version of this
 599 file under either the MPL or the GPL.
 600
 601 ***** END LICENSE BLOCK *****""",
 602     "gpl for npl": """\
 603 Alternatively, the contents of this file may be used under the terms of
 604 the GNU General Public License Version 2 or later (the "GPL"), in which
 605 case the provisions of the GPL are applicable instead of those above. If
 606 you wish to allow use of your version of this file only under the terms of
 607 the GPL and not to allow others to use your version of this file under the
 608 NPL, indicate your decision by deleting the provisions above and replacing
 609 them with the notice and other provisions required by the GPL. If you do
 610 not delete the provisions above, a recipient may use your version of this
 611 file under either the NPL or the GPL.
 612
 613 ***** END LICENSE BLOCK *****""",
 614     "gpl/lgpl for mpl": """\
 615 Alternatively, the contents of this file may be used under the terms of
 616 either the GNU General Public License Version 2 or later (the "GPL"), or
 617 the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 618 in which case the provisions of the GPL or the LGPL are applicable instead
 619 of those above. If you wish to allow use of your version of this file only
 620 under the terms of either the GPL or the LGPL, and not to allow others to
 621 use your version of this file under the terms of the MPL, indicate your
 622 decision by deleting the provisions above and replace them with the notice
 623 and other provisions required by the GPL or the LGPL. If you do not delete
 624 the provisions above, a recipient may use your version of this file under
 625 the terms of any one of the MPL, the GPL or the LGPL.
 626
 627 ***** END LICENSE BLOCK *****""",
 628     "gpl/lgpl for npl": """\
 629 Alternatively, the contents of this file may be used under the terms of
 630 either the GNU General Public License Version 2 or later (the "GPL"), or
 631 the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 632 in which case the provisions of the GPL or the LGPL are applicable instead
 633 of those above. If you wish to allow use of your version of this file only
 634 under the terms of either the GPL or the LGPL, and not to allow others to
 635 use your version of this file under the terms of the NPL, indicate your
 636 decision by deleting the provisions above and replace them with the notice
 637 and other provisions required by the GPL or the LGPL. If you do not delete
 638 the provisions above, a recipient may use your version of this file under
 639 the terms of any one of the NPL, the GPL or the LGPL.
 640
 641 ***** END LICENSE BLOCK *****""",
 642 }
 643
 644 _g_dry_run = 0 # iff true, don't modify any files
 645 _g_force = 0
 646 _g_check_all = 0
 647
 648
 649 #---- internal support routines
 650
 651 def _is_binary(filename):
 652     """Return true iff the given filename is binary.
 653
 654     Raises an EnvironmentError if the file does not exist or cannot be
 655     accessed.
 656     """
 657     fin = open(filename, 'rb')
 658     try:
 659         CHUNKSIZE = 1024
 660         while 1:
 661             chunk = fin.read(CHUNKSIZE)
 662             if '\0' in chunk: # found null byte
 663                 return 1
 664             if len(chunk) < CHUNKSIZE:
 665                 break # done
 666     finally:
 667         fin.close()
 668
 669     return 0
 670
 671
 672 _g_cvsignore_cache = {} # optimization: keep a cache of .cvsignore content
 673 def _should_skip_according_to_cvsignore(path):
 674     dirname, basename = os.path.split(path)
 675     cvsignore = os.path.join(dirname, ".cvsignore")
 676     if not os.path.exists(cvsignore):
 677         return 0
 678     elif cvsignore not in _g_cvsignore_cache:
 679         fin = open(cvsignore, 'r')
 680         to_ignore = []
 681         try:
 682             for f in fin:
 683                 if f[-1] == "\n": f = f[:-1] # chomp
 684                 if not f: continue # skip empty lines
 685                 to_ignore.append(f)
 686         finally:
 687             fin.close()
 688         _g_cvsignore_cache[cvsignore] = to_ignore
 689
 690     # At this point .cvsignore exists and its contents are in the cache.
 691     to_ignore = _g_cvsignore_cache[cvsignore]
 692     if basename in to_ignore:
 693         return 1
 694     else:
 695         return 0
 696
 697 _g_backup_pattern = re.compile("~\d+$")
 698 def _should_skip_file(path):
 699     log.debug("_should_skip_file(path='%s')", path)
 700     if _g_check_all:
 701         return 0
 702     ext = os.path.splitext(path)[1]
 703     if ext in _g_skip_exts:
 704         log.info("Skipping '%s' (according to '_g_skip_exts').", path)
 705         return 1
 706     xpath = '/'.join(path.split(os.sep)) # use same sep as in _g_skip_files
 707     for sf in _g_skip_files:
 708         if xpath.endswith(sf):
 709             log.info("Skipping '%s' (according to '_g_skip_files').", path)
 710             return 1
 711     if os.path.basename(path) in _g_skip_file_basenames:
 712         log.info("Skipping '%s' (according to '_g_skip_file_basenames').", path)
 713         return 1
 714     if _should_skip_according_to_cvsignore(path):
 715         log.info("Skipping '%s' (according to .cvsignore).", path)
 716         return 1
 717     if _g_backup_pattern.search(path):
 718         log.info("Skipping '%s' (looks like backup file).", path)
 719         return 1
 720     return 0
 721
 722 def _should_skip_dir(path):
 723     log.debug("_should_skip_dir(path='%s')", path)
 724     if _g_check_all:
 725         if os.path.basename(path) in _g_skip_dir_basenames_cvs_only:
 726             return 1
 727         return 0
 728     if os.path.basename(path) in _g_skip_dir_basenames:
 729         log.info("Skipping '%s' (according to _g_skip_dir_basenames).", path)
 730         return 1
 731     xpath = '/'.join(path.split(os.sep)) # use same sep as in _g_skip_dirs
 732     # These could do with being a proper path canonicalisation function...
 733     if xpath[-1] == '/': xpath = xpath[:-1]  # treat "calendar/" the same as "calendar"
 734     if xpath[0:2] == './': xpath = xpath[2:]  # treat "./calendar" the same as "calendar"
 735     for sd in _g_skip_dirs:
 736         # Changed by gerv to make skip_dirs require whole path
 737         if xpath == sd:
 738             log.info("Skipping '%s' (according to _g_skip_dirs).", path)
 739             return 1
 740     if _should_skip_according_to_cvsignore(path):
 741         log.info("Skipping '%s' (according to .cvsignore).", path)
 742         return 1
 743     return 0
 744
 745
 746 def _get_license_info(filename, show_initial=0, quick=0):
 747     """Return license block information for the given file.
 748
 749         "filename" is the path to the file to scan.
 750         "show_initial" is a boolean that indicates if initial developer info
 751           should be displayed.
 752         "quick" is a boolean that can be set for a quick scan. In this
 753             case, only the "parts" field of the return dictionary will
 754             be filled out.
 755
 756     Returns a dictionary adequately describing the license block in the
 757     given file for the purpose of determining whether to patch the
 758     license block and how. Returns a dictionary of the following form:
 759         {"parts": <list of zero or more of "mpl", "npl", "gpl", "lgpl",
 760                    "unknown", "block_begin", "block_end" in the
 761                    order in which they were found>,
 762          # if necessary, the following keys are included as well
 763          "begin_line": <(0-based) index at which license block starts>,
 764          "end_line": <(0-based) index at which license block ends>,
 765          "first_prefix": <prefix to use for new license block first line>,
 766          "subsequent_prefix": <prefix to use for subsequent lines>,
 767          "last_suffix": <suffix to use for last line>,
 768          # The following fields are correspond to the file specific
 769          # portions of the license template as described here:
 770          #   http://www.mozilla.org/MPL/relicensing-faq.html#new-license
 771          # If the associated block is not found, then the value is None.
 772          "original_code_is": ...,
 773          "original_code_date": ...,
 774          "initial_developer": ...,
 775          "initial_copyright_date": ...,
 776          "contributors": ...,
 777         }
 778
 779     precondition: should not be called on binary files
 780     """
 781     lic_info = {
 782         "parts": [],
 783     }
 784
 785     fin = open(filename, 'r')
 786     try:
 787         content = fin.read()
 788     finally:
 789         fin.close()
 790
 791     # Help me find filena
 792     log.info("Next file is: %s", filename)
 793
 794     # do quick search to see if any of the desired licenses is in here
 795     # - if it looks like all the parts are there, good, done
 796     # - if some but not all parts, continue
 797     parts_pattern = re.compile("""(
 798         (?P<block_begin>\*\*\*\*\*\ BEGIN\ LICENSE\ BLOCK\ \*\*\*\*\*)
 799         | (?P<mpl>The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Mozilla)
 800         | (?P<npl>The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Netscape)
 801         | (?P<gpl>GNU\ (General\ )?Public\ License)
 802         | (?P<lgpl>(Library|Lesser)\ General\ Public\ License)
 803         | (?P<block_end>\*\*\*\*\*\ END\ LICENSE\ BLOCK\ \*\*\*\*\*)
 804         )""",
 805         re.VERBOSE)
 806     parts = [] # found license parts in this file
 807     start = 0
 808     blocks = 0
 809     while 1:
 810         match = parts_pattern.search(content, start)
 811         if match:
 812             # Skip this block, if the last license block is more than 10 lines
 813             # away (file is probably used for autogeneration of files then).
 814             if blocks == 1 and (match.start()-start) > 10:
 815                 break
 816             else:
 817                 parts = match.groupdict()
 818                 for part in parts:
 819                     if parts[part]:
 820                         lic_info["parts"].append(part)
 821                         log.info("%s license/delimeter found", part)
 822                         start = match.end()
 823                         if part == "block_end":
 824                             blocks = blocks + 1
 825                         else:
 826                             blocks = 0
 827                         break
 828                 else:
 829                     raise RelicError("unexpected license part: %r" % parts)
 830         else:
 831             break
 832
 833     # no license block at all
 834     if not parts:
 835         # - if not, check to see if License or Copyright shows up in the
 836         #   file; if so, then error out; if not, skip out
 837         any_lic_pattern = re.compile("(Copyright|Licen[sc]e)", re.IGNORECASE)
 838         match = any_lic_pattern.search(content)
 839         if match:
 840             lic_info["parts"].append("unknown")
 841             log.info("unknown license found: %r",
 842                      content[max(match.start()-20,0):match.end()+20])
 843         else:
 844             log.info("no license found")
 845         return lic_info
 846
 847     # license block with non-tri-license version headers
 848     elif lic_info["parts"] == ["block_begin", "block_end"]:
 849         lic_info["parts"].append("unknown")
 850         log.info("unknown license found (license block with non-tri-license)")
 851         return lic_info
 852
 853     # license block with tri-license version headers
 854     elif (lic_info["parts"] == ["block_begin", "mpl", "gpl", "lgpl", "block_end"] or
 855           lic_info["parts"] == ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
 856         log.info("license looks good, no changes necessary")
 857     if quick:
 858         return lic_info
 859
 860     # Otherwise, the license needs to be fixed, so gather more detailed
 861     # information. Here is the algorithm we will use:
 862     # - find first license line
 863     # - find the end of this comment block (assumption: from the first
 864     #   license line to the end of the comment block is the full
 865     #   license block)
 866     #   This is a bad assumption in two cases and steps have been taken
 867     #   to try to deal with those cases:
 868     #   - There could be a trailing part bit of comment that is
 869     #     NOT part of the license but is part of the same comment
 870     #     block. A common example are the:
 871     #       This Original Code has been modified by IBM...
 872     #     files (about 130 of them in the moz tree).
 873     #     (c.f. test_relicense_ibm_copyright_suffix.c)
 874     #   - Some files have split up the license paragraphs into
 875     #     multiple comment blocks, e.g.
 876     #     "mozilla/build/unix/abs2rel.pl":
 877     #        # The contents of this file are subject to the
 878     #        # ...
 879     #        # the License at http://www.mozilla.org/MPL/
 880     #
 881     #        # The Initial Developer of the Original Code
 882     #        # ...
 883     #        # Rights Reserved.
 884     #     (c.f. test_relicense_separated_license_comment_blocks.pl)
 885     # - these are the lines to replace
 886     # - gather embedded lic data
 887     # - use second line to determine line prefix
 888     # ? Should we only allow processing of unknown-delimiter-files with
 889     #   an option?
 890
 891     # Get comment delimiter info for this file.
 892     comment_delim_sets = _get_comment_delim_sets(filename)
 893
 894     # - find first license line (and determine which set of comment
 895     #   delimiters are in use)
 896     lines = content.splitlines()
 897     for comment_delims in comment_delim_sets:
 898         if len(comment_delims) == 3:
 899             prefix_pattern = "%s|%s|" % (re.escape(comment_delims[0]),
 900                                          re.escape(comment_delims[1]))
 901             suffix_pattern = "%s" % re.escape(comment_delims[2])
 902         elif len(comment_delims) == 2:
 903             prefix_pattern = "%s|" % re.escape(comment_delims[0])
 904             suffix_pattern = "%s" % re.escape(comment_delims[1])
 905         elif len(comment_delims) == 1:
 906             prefix_pattern = re.escape(comment_delims[-1])
 907             suffix_pattern = ""
 908         else: # len(comment_delims) == 0
 909             prefix_pattern = ""
 910             suffix_pattern = ""
 911
 912         lic_begin_pattern = re.compile("""
 913             ^(?P<prefix>%s)
 914             (?P<space>\s*)
 915             (\*+\ BEGIN\ LICENSE\ BLOCK\ \*+
 916              |\-+\ BEGIN\ LICENSE\ BLOCK\ \-+
 917              | Version:\ MPL\ \d+\.\d+/GPL\ \d+\.\d+/LGPL\ \d+\.\d+
 918              | The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Mozilla[\w ]*
 919              | The\ contents\ of\ this\ file\ are\ subject\ to\ the\ Netscape[\w ]*
 920              | Alternatively,\ the\ contents\ of\ this\ file\ may\ be\ used\ under\ the[\w ]*)
 921             (?P<suffix>%s|)\s*?$
 922             """ % (prefix_pattern, suffix_pattern), re.VERBOSE)
 923
 924         for i in range(len(lines)):
 925             match = lic_begin_pattern.search(lines[i])
 926             if match:
 927                 beginline = {
 928                     "content": lines[i],
 929                     "linenum": i,
 930                     "prefix": match.group("prefix"),
 931                     "space": match.group("space"),
 932                     "suffix": match.group("suffix")
 933                 }
 934                 # Optimization: If the line before the "beginline" is simply
 935                 # a block comment open the include that line in parsed out
 936                 # license block. E.g.,
 937                 #    <!--
 938                 #       - ***** BEGIN LICENSE BLOCK *****
 939                 #       ...
 940                 if (len(comment_delims) > 1  # only for block comments
 941                     and beginline["prefix"] != comment_delims[0]
 942                     and i-1 >= 0
 943                     and lines[i-1].strip() == comment_delims[0]):
 944                     beginline["linenum"] -= 1
 945                     beginline["prefix"] = comment_delims[0]
 946                 break
 947         if match: break
 948     else:
 949         raise RelicError("couldn't find start line with this pattern (even "
 950                          "though it looks like there is a license block in "
 951                          "%s): %s" % (filename, lic_begin_pattern.pattern))
 952     log.info("comment delimiters: %s", comment_delims)
 953     log.debug("beginline dict: %s", beginline)
 954     lic_info["comment_delims"] = comment_delims
 955     lic_info["begin_line"] = beginline["linenum"]
 956     lic_info["first_prefix"] = beginline["prefix"]
 957     log.info("prefix for first line: '%s'", beginline["prefix"])
 958
 959     # - get second license line
 960     lic_middle_pattern = re.compile("""
 961         ^(?P<prefix>%s|)
 962         (?P<space>\s*)
 963         (?P<content>.*)
 964         (?P<suffix>%s|)\s*?$
 965         """ % (prefix_pattern, suffix_pattern),
 966         re.VERBOSE)
 967     # skip empty lines which might result in bogus scanning later, e.g.:
 968     #   mozilla/layout/html/tests/table/marvin/x_thead_align_center.xml
 969     second_linenum = beginline["linenum"]+1
 970     while second_linenum < len(lines):
 971         if lines[second_linenum].strip():
 972             break
 973         log.debug("skip blank 'second' line: %d", second_linenum)
 974         second_linenum +=1
 975     else:
 976         raise RelicError("all lines after the first license block line (%d) "
 977                          "were empty" % (beginline["linenum"]+1))
 978     match = lic_middle_pattern.search(lines[second_linenum])
 979     if match:
 980         secondline = {
 981             "content": lines[second_linenum],
 982             "linenum": second_linenum,
 983             "prefix": match.group("prefix"),
 984             "space": match.group("space"),
 985             "suffix": match.group("suffix")
 986         }
 987     else:
 988         raise RelicError("didn't find second line with pattern: %s"
 989                          % lic_middle_pattern.pattern)
 990     log.debug("secondline dict: %s", secondline)
 991     lic_info["subsequent_prefix"] = secondline["prefix"]
 992     log.info("prefix for subsequent lines: '%s'", secondline["prefix"])
 993
 994     # - find block comment end
 995     orig_code_modified_pattern = re.compile("This Original Code has been "
 996                                             "modified", re.I)
 997     non_lic_content_in_same_comment_block = 0
 998     if len(comment_delims) == 1:
 999         # line-style comments: The comment block "end" is defined as the
1000         # last line before a line NOT using the block comment delimiter.
1001         #XXX:BUG: This is not good enough for:
1002         #           test/inputs/separated_license_comment_blocks.pl
1003         if comment_delims[0] == "":
1004             raise RelicError(
1005                 "Don't know how to find the end of a line-style comment "
1006                 "block when the delimiter is the empty string. (Basically "
1007                 "this script cannot handle this type of file.)")
1008         for i in range(beginline["linenum"]+1, len(lines)):
1009             if not lines[i].startswith(comment_delims[0]):
1010                 endlinenum = i-1
1011                 break
1012             elif lines[i].find("END LICENSE BLOCK") != -1:
1013                 endlinenum = i
1014                 break
1015             # As per "test_relicense_trailing_orig_code_modified.pl", a
1016             # paragraph starting with:
1017             #     This Original Code has been modified
1018             # is deemed to be OUTside the license block, i.e. it is not
1019             # replaced for relicensing.
1020             if orig_code_modified_pattern.search(lines[i]):
1021                 non_lic_content_in_same_comment_block = 1
1022                 # The endline is the first non-blank line before this one.
1023                 endlinenum = i-1
1024                 while 1:
1025                     line = lines[endlinenum]
1026                     match = lic_middle_pattern.search(line)
1027                     if not match:
1028                         raise RelicError("Line did not match lic_middle_pattern "
1029                                          "unexpectedly: %r" % line)
1030                     if match.group("content").strip(): # non-empty line
1031                         break
1032                     endlinenum -= 1
1033                 break
1034         else:
1035             raise RelicError("Could not find license comment block end "
1036                              "line in '%s'." % filename)
1037     elif len(comment_delims) >= 2:   # block-style comments
1038         for i in range(beginline["linenum"]+1, len(lines)):
1039             if lines[i].find(comment_delims[-1]) != -1:
1040                 endlinenum = i
1041                 break
1042             elif lines[i].find("END LICENSE BLOCK") != -1:
1043                 endlinenum = i
1044                 non_lic_content_in_same_comment_block = 1
1045                 break
1046             # As per "test_relicense_ibm_copyright_suffix.c", a
1047             # paragraph starting with:
1048             #     This Original Code has been modified
1049             # is deemed to be OUTside the license block, i.e. it is not
1050             # replaced for relicensing.
1051             if orig_code_modified_pattern.search(lines[i]):
1052                 non_lic_content_in_same_comment_block = 1
1053                 # The endline is the first non-blank line before this one.
1054                 endlinenum = i-1
1055                 while 1:
1056                     line = lines[endlinenum]
1057                     match = lic_middle_pattern.search(line)
1058                     if not match:
1059                         raise RelicError("Line did not match lic_middle_pattern "
1060                                          "unexpectedly: %r" % line)
1061                     if match.group("content").strip(): # non-empty line
1062                         break
1063                     endlinenum -= 1
1064                 break
1065         else:
1066             raise RelicError("Could not find license comment block end "
1067                              "line in '%s'." % filename)
1068         if not non_lic_content_in_same_comment_block\
1069            and not lines[endlinenum].strip().endswith(comment_delims[-1]):
1070             raise RelicError(
1071                 "There is text AFTER the license block comment end "
1072                 "delimiter, but on the SAME LINE. This is unexpected. "
1073                 "Bailing.\n%s:%s:%r"
1074                 % (filename, endlinenum, lines[endlinenum]))
1075     else: # len(comment_delims) == 0
1076         # For files without a comment character to help out, we ONLY
1077         # successfully break one the full correct "END LICENSE BLOCK"
1078         # token.
1079         for i in range(beginline["linenum"]+1, len(lines)):
1080             if lines[i].find("END LICENSE BLOCK") != -1:
1081                 endlinenum = i
1082                 break
1083             elif i > beginline["linenum"]+1+50:
1084                 raise RelicError("Haven't found 'END LICENSE BLOCK' marker "
1085                                  "within 50 lines of the start of the "
1086                                  "license block on line %d. Aborting."
1087                                  % (beginline["linenum"]+1))
1088             # As per "test_relicense_trailing_orig_code_modified.pl", a
1089             # paragraph starting with:
1090             #     This Original Code has been modified
1091             # is deemed to be OUTside the license block, i.e. it is not
1092             # replaced for relicensing.
1093             if orig_code_modified_pattern.search(lines[i]):
1094                 non_lic_content_in_same_comment_block = 1
1095                 # The endline is the first non-blank line before this one.
1096                 endlinenum = i-1
1097                 while 1:
1098                     line = lines[endlinenum]
1099                     match = lic_middle_pattern.search(line)
1100                     if not match:
1101                         raise RelicError("Line did not match lic_middle_pattern "
1102                                          "unexpectedly: %r" % line)
1103                     if match.group("content").strip(): # non-empty line
1104                         break
1105                     endlinenum -= 1
1106                 break
1107         else:
1108             raise RelicError("Could not find license comment block end "
1109                              "line in '%s'." % filename)
1110
1111     # Test case: test_relicense_separated_license_comment_blocks.pl
1112     # It is possible that a separate comment block immediately following
1113     # the license block we just parsed should be included in the license
1114     # block.
1115     if (not non_lic_content_in_same_comment_block
1116         and len(comment_delims) == 1): # only do this for line-style comments
1117         lic_indicators = [
1118             re.compile("^The content of this file are subject to", re.I),
1119             re.compile("^Software distributed under the License", re.I),
1120             re.compile("^The Original Code is", re.I),
1121             re.compile("^The Initial Developer", re.I),
1122             re.compile("^Contributor", re.I),
1123             re.compile("^Alternatively, the content of this file", re.I),
1124         ]
1125         comment_line_pattern = re.compile("""
1126             ^(?P<prefix>%s|)
1127             (?P<space>\s*)
1128             (?P<content>.*)
1129             (?P<suffix>%s|)\s*?$
1130             """ % (prefix_pattern, suffix_pattern),
1131             re.VERBOSE)
1132         i = endlinenum
1133         while i+1 < len(lines):
1134             i += 1; line = lines[i]
1135             comment_index = line.find(comment_delims[0])
1136             if comment_index != -1:
1137                 content = line[:comment_index].strip()
1138                 comment = line[comment_index+len(comment_delims[0]):].strip()
1139             else:
1140                 content = line.strip()
1141                 comment = ""
1142             if content: # if non-comment content, then skip out
1143                 break
1144             if not comment:
1145                 continue
1146             for indicator in lic_indicators:
1147                 if indicator.search(comment):
1148                     # include this paragraph in the lic block
1149                     while i < len(lines):
1150                         i += 1; line = lines[i]
1151                         if not line.strip().startswith(comment_delims[0]):
1152                             break
1153                         if not line.strip()[len(comment_delims[0]):]:
1154                             break
1155                     endlinenum = i-1
1156                     break
1157             else:
1158                 break # this is a non-lic-related comment
1159
1160     # Get the end-line data.
1161     if non_lic_content_in_same_comment_block:
1162         lic_end_pattern = re.compile(
1163             "^(?P<prefix>%s)(?P<space>\s*).*?\s*?$"
1164             % prefix_pattern)
1165     else:
1166         lic_end_pattern = re.compile(
1167             "^(?P<prefix>%s)(?P<space>\s*).*?(?P<suffix>%s)\s*?$"
1168             % (prefix_pattern, suffix_pattern))
1169     match = lic_end_pattern.match(lines[endlinenum])
1170     if match:
1171         endline = {
1172             "content": lines[endlinenum],
1173             "linenum": endlinenum,
1174             "prefix": match.group("prefix"),
1175             "space": match.group("space"),
1176             "suffix": match.groupdict().get("suffix", ""),
1177         }
1178     else:
1179         raise RelicError("license block end line did not match: line='%s', "
1180                          "pattern='%s'"
1181                          % (lines[endlinenum], lic_end_pattern.pattern))
1182     log.debug("endline dict: %s", endline)
1183     lic_info["last_suffix"] = endline["suffix"]
1184     log.info("suffix for last line: '%s'", endline["suffix"])
1185     lic_info["end_line"] = endline["linenum"]
1186     log.info("license lines: %d-%d", beginline["linenum"], endline["linenum"])
1187
1188     # So at this point we have the beginline, secondline, and endline
1189     # dicts describing and bounding the license block.
1190
1191     # - gather embedded lic data
1192     # As described here:
1193     #   http://www.mozilla.org/MPL/relicensing-faq.html#new-license
1194     # we have to parse out the following possible fields:
1195     #   original_code_is
1196     #   original_code_date
1197     #   initial_developer
1198     #   initial_copyright_date
1199     #   contributors
1200     lic_line_pattern = re.compile( # regex to parse out the line _body_
1201         "^(?P<prefix>%s)(?P<space>\s*)(?P<body>.*?)(?P<suffix>%s|)\s*?$"
1202         % (prefix_pattern, suffix_pattern))
1203
1204     original_code_is = None
1205     original_code_date = None
1206     # Parse out the "The Original Code is ..." paragraph _content_.
1207     paragraph = ""
1208     in_paragraph = 0
1209     for i in range(beginline["linenum"], endline["linenum"]+1):
1210         body = lic_line_pattern.match(lines[i]).group("body")
1211         if (not in_paragraph and body.startswith("The Original Code is")):
1212             in_paragraph = 1
1213         if in_paragraph:
1214             if not body.strip(): # i.e. a blank line, end of paragraph
1215                 break
1216             # ensure one space btwn lines
1217             if paragraph: paragraph = paragraph.rstrip() + " "
1218             paragraph += body
1219     if paragraph:
1220         pattern1 = re.compile('^The Original Code is (.*), released (.*)\.')
1221         match = pattern1.search(paragraph)
1222         if match:
1223             original_code_is = match.group(1)
1224             original_code_date = match.group(2)
1225         else:
1226              pattern2 = re.compile('^The Original Code is (.*?)\.?$')
1227              match = pattern2.search(paragraph)
1228              if match:
1229                  original_code_is = match.group(1)
1230              else:
1231                 raise RelicError(
1232                     "%s: 'The Original Code is' paragraph did not match the "
1233                     "expected patterns. paragraph=\n\t%r\n"
1234                     "pattern1=\n\t%r\npattern2=\n\t%r"
1235                     % (filename, paragraph, pattern1.pattern, pattern2.pattern))
1236     lic_info["original_code_is"] = original_code_is
1237     lic_info["original_code_date"] = original_code_date
1238     log.info("original code is: %s", original_code_is)
1239     log.info("original_code_date: %s", original_code_date)
1240
1241     initial_developer = None
1242     initial_copyright_date = None
1243     # Parse out the "The Initial Developer..." paragraph _content_.
1244     paragraph = ""
1245     in_paragraph = 0
1246     for i in range(beginline["linenum"], endline["linenum"]+1):
1247         body = lic_line_pattern.match(lines[i]).group("body")
1248         if (not in_paragraph and
1249             (body.startswith("The Initial Developer of") or
1250              body.startswith("The Initial Developers of"))):
1251             in_paragraph = 1
1252         if in_paragraph:
1253             if not body.strip(): # i.e. a blank line, end of paragraph
1254                 # Catch the possible case where there is an empty line
1255                 # but the paragraph picks up on the next line with
1256                 # "Portions created by"
1257                 # (test_relicense_no_period_after_origcodeis.cpp).
1258                 try:
1259                     nextlinebody = lic_line_pattern.match(lines[i+1]).group("body")
1260                 except:
1261                     nextlinebody = ""
1262                 if not nextlinebody.startswith("Portions created by"):
1263                     break
1264             # ensure one space btwn lines
1265             if paragraph: paragraph = paragraph.rstrip() + " "
1266             paragraph += body
1267     if paragraph:
1268         pattern = re.compile("""^
1269             The\ Initial\ Developers?\ of\
1270             (the\ Original\ Code\ (is\ |are\ |is\.)|this\ code\ under\ the\ [MN]PL\ (is|are)\ )
1271             (?P<developer>.*?)
1272             \.?         # maybe a trailing period
1273             (
1274                 \s+Portions\ created\ by\ .*?
1275                 are\ Copyright\ \(C\)\[?\ (?P<date>[\d-]+)
1276                 .*?     # maybe a trailing period
1277                 (\s+All\ Rights\ Reserved\.)?
1278             )?
1279             $""", re.VERBOSE)
1280         match = pattern.search(paragraph)
1281         if not match:
1282             raise RelicError(
1283                 "%s: 'This Initial Developer' paragraph did not match the "
1284                 "expected pattern. paragraph=\n\t%r\npattern=\n\t%s"
1285                 % (filename, paragraph, pattern.pattern))
1286         initial_developer = match.group("developer")
1287         initial_copyright_date = match.group("date")
1288     lic_info["initial_developer"] = initial_developer
1289     lic_info["initial_copyright_date"] = initial_copyright_date
1290     log.info("initial developer paragraph: %r", paragraph)
1291     log.info("initial developer: %r", initial_developer)
1292     log.info("initial copyright date: %r", initial_copyright_date)
1293
1294     contributors = []
1295     normal_leading_space = None
1296     in_contributors_block = 0
1297     contrib_end = endline["linenum"]
1298     # If line-style comment, include the last line in the block in the
1299     # range we examine; if block-style comment, we only allow it if the
1300     # comment-block doesn't end on the endline. On top of these
1301     # conditions we don't search the last line if it includes the
1302     # special end-of-license marker.
1303     if len(comment_delims) == 1 or not endline["suffix"]:
1304         if endline["content"].find("END LICENSE BLOCK") == -1:
1305             contrib_end += 1
1306     for i in range(beginline["linenum"], contrib_end):
1307         match = lic_line_pattern.match(lines[i])
1308         body = match.group("body")
1309         space = match.group("space").replace('\t', ' '*8)
1310         if not in_contributors_block \
1311            and body.startswith("Contributor"):
1312             in_contributors_block = 1
1313             normal_leading_space = space
1314             # Try to pickup "foo@bar.org" as a contributor for a
1315             # possible line like this:
1316             #   Contributor(s): foo@bar.org
1317             pivot = body.find(':')
1318             if pivot != -1:
1319                 remainder = body[pivot+1:].strip()
1320                 if remainder:
1321                     contributors.append(remainder)
1322         elif in_contributors_block:
1323             if not body.strip():
1324                 # i.e. a blank line, end of paragraph
1325                 #XXX:BUG This condition causes the latter two
1326                 #        contributor lines to be lost from, e.g.,
1327                 #        test/x_thead_align_center.xml.
1328                 break
1329             if len(space) <= len(normal_leading_space):
1330                 # A line in the "Contributor(s) paragraph is not
1331                 # indented. This is considered an error.  Likely this is
1332                 # a (not indented) contributor, but it might also be the
1333                 # start of another paragraph (i.e. no blank line
1334                 # terminating the "Contributor(s):" paragraph). We could
1335                 # just error out here, but this is very common in the
1336                 # Moz tree (~500) so lets try to deal with it.
1337                 # - Heuristic #1: if the line contains what looks like
1338                 #   an email address then this it is a contributor.
1339                 # - Heuristic #2 (to accomodate js/rhino): if the line
1340                 #   looks like just a person's name.
1341                 # Otherwise, error out.
1342                 words = body.split()
1343                 if '@' in body:
1344                     lic_info["unindented_contributor_lines"] = 1
1345                 elif (2 <= len(words) <= 3 and
1346                       words == [word[0].upper()+word[1:] for word in words]):
1347                     # Try to accept the following names:
1348                     #    Norris Boyd
1349                     #    Mike McCabe
1350                     #    George C. Scott
1351                     lic_info["unindented_contributor_lines"] = 1
1352                 else:
1353                     raise RelicError("This line is part of the "
1354                         "'Contributor(s):' paragraph but (1) is not indented "
1355                         "and (2) does not look like it contains an email "
1356                         "address: %s:%s: %r" % (filename, i, lines[i]))
1357             contributors.append(body.strip())
1358     log.info("contributors: %s", contributors)
1359     lic_info["contributors"] = contributors
1360
1361     ## Optimization: The only content in the remain license block lines
1362     ## (i.e. after the contributors block) should really be the GPL/LGPL
1363     ## or nothing. Trapping this will avoid losing the latter two
1364     ## contributor lines in test/x_thead_align_center.xml.
1365     #gpl_lgpl_lines = _g_trilicense_parts["gpl/lgpl"].splitlines(0)
1366     #gpl_lgpl = " ".join(gpl_lgpl_lines)
1367     #for i in range(i, endline["linenum"]):
1368     #    match = lic_line_pattern.match(lines[i])
1369     #    body = match.group("body")
1370     #    space = match.group("space").replace('\t', ' '*8)
1371     #    if not body.strip():
1372     #        continue
1373     #    #XXX This test is no robust enough to use.
1374     #    if (gpl_lgpl.find(body) == -1 and
1375     #        body.find(gpl_lgpl) == -1):
1376     #        print "QQQ: bogus following text: %r" % body
1377
1378     return lic_info
1379
1380
1381 def _report_on_file(path, (results, switch_to_mpl, show_initial, quick, _errors)):
1382     log.debug("_report_on_file(path='%s', results)", path)
1383     output = path + "\n"
1384     lic_info = {}
1385
1386     if _is_binary(path):
1387         output += "... binary, skipping this file\n"
1388     else:
1389         try:
1390             lic_info = _get_license_info(path, show_initial, quick)
1391         except RelicError, ex:
1392             return _relicensing_error(ex, path, _errors)
1393
1394         if log.isEnabledFor(logging.DEBUG):
1395             pprint.pprint(lic_info)
1396         parts = lic_info["parts"]
1397         if not parts:
1398             output += "... no license found\n"
1399         elif "unknown" in parts:
1400             output += "... unknown license (possibly) found\n"
1401         elif ((parts == ["block_begin", "mpl", "gpl", "lgpl", "block_end"] or
1402                parts == ["block_begin", "npl", "gpl", "lgpl", "block_end"]) and
1403               not lic_info.get("unindented_contributor_lines")):
1404             if (switch_to_mpl and
1405                 parts == ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
1406                 output += "... %s found (looks complete, but is not MPL)"\
1407                           % "/".join(parts) + "\n"
1408             else:
1409                 output += "... %s found (looks complete)"\
1410                           % "/".join(parts) + "\n"
1411         else:
1412             output += "... %s found" % "/".join(parts) + "\n"
1413
1414         if not quick:
1415             if "begin_line" in lic_info and "end_line" in lic_info:
1416                 output += "... license block lines: %(begin_line)d-%(end_line)d"\
1417                   % lic_info + "\n"
1418             if "original_code_is" in lic_info:
1419                 output += "... original code is: %(original_code_is)s"\
1420                       % lic_info + "\n"
1421             if "original_code_date" in lic_info:
1422                 output += "... original code date: %(original_code_date)s"\
1423                       % lic_info + "\n"
1424             if "initial_developer" in lic_info:
1425                 output += "... initial developer: %(initial_developer)s"\
1426                       % lic_info + "\n"
1427             if "initial_copyright_date" in lic_info:
1428                 output += "... initial copyright date: %(initial_copyright_date)s"\
1429                       % lic_info + "\n"
1430             if "contributors" in lic_info:
1431                 output += "... contributors: %s"\
1432                       % ", ".join(lic_info["contributors"]) + "\n"
1433             if lic_info.get("unindented_contributor_lines"):
1434                 output += "... one or more contributor lines were not indented properly"\
1435                       + "\n"
1436
1437     if show_initial:
1438       if "initial_developer" in lic_info:
1439         print lic_info["initial_developer"]
1440     else:
1441       print output;
1442
1443 def _gather_info_on_file(path, (results, _errors)):
1444     log.debug("_gather_info_on_file(path='%s', results)", path)
1445     # Skip binary files.
1446     try:
1447         if _is_binary(path):
1448             log.debug("Skipping binary file '%s'.", path)
1449             return
1450     except Exception, ex:
1451         return _relicensing_error(
1452             "error determining if file is binary: %s" % ex,
1453             path, _errors)
1454
1455     try:
1456         results[path] = _get_license_info(path)
1457     except RelicError, ex:
1458         return _relicensing_error(ex, path, _errors, 1)
1459
1460
1461 def _make_backup_path(path):
1462     for n in range(100):
1463         backup_path = "%s~%d" % (path, n)
1464         if not os.path.exists(backup_path):
1465             return backup_path
1466     raise RelicError("Could not find an unused backup path for '%s'." % path)
1467
1468
1469 def _relicensing_error(err, path, cache=None, quiet=0):
1470     """Handle an error during relicensing.
1471
1472         "err" may be an error string or an exception instance.
1473         "path" is the path of the file on which this error occured.
1474         "cache" is a mapping of path to errors on which errors may be
1475             stored for later reporting.
1476         "quiet" optionally allows one to silence the stdout output when
1477             force is in effect.
1478
1479     If the --force option is in-effect then errors may be remembered and
1480     processing continues, rather than halting the whole process.
1481     """
1482     if _g_force:
1483         if not quiet:
1484             print "...", err
1485         if cache is not None:
1486             cache[path] = err
1487     elif isinstance(err, Exception):
1488         raise
1489     else:
1490         raise RelicError("%s: %s" % (path, err))
1491
1492
1493 def _get_comment_delim_sets(filename):
1494     comment_delims = None
1495     if os.path.splitext(filename)[1] == ".in":
1496         # "<foo>.in" is generally a precursor for a filetype
1497         # identifiable without the ".in". Drop it.
1498         xfilename = os.path.splitext(filename)[0]
1499     else:
1500         xfilename = filename
1501     # special cases for some basenames
1502     basename = os.path.basename(xfilename)
1503     try:
1504         comment_delims = _g_basename_to_comment_info[basename]
1505     except KeyError:
1506         pass
1507     if not comment_delims: # use the file extension
1508         ext = os.path.splitext(xfilename)[1]
1509         try:
1510             comment_delims = _g_ext_to_comment_info[ext]
1511         except KeyError:
1512             pass
1513     if not comment_delims: # try to use the shebang line, if any
1514         fin = open(filename, 'r')
1515         firstline = fin.readline()
1516         fin.close()
1517         if firstline.startswith("#!"):
1518             for pattern, cds in _g_shebang_pattern_to_comment_info:
1519                 if pattern.match(firstline):
1520                     comment_delims = cds
1521                     break
1522     if not comment_delims:
1523         raise RelicError("%s: couldn't determine file type (and "
1524             "comment delimiter info) from basename '%s' or "
1525             "extension '%s'): you may need to add to "
1526             "'_g_basename_to_comment_info', "
1527             "'_g_ext_to_comment_info', "
1528             "'_g_shebang_pattern_to_comment_info' "
1529             "or one of the '_g_skip_*' globals"
1530             % (filename, basename, ext))
1531     return comment_delims
1532
1533
1534 def _relicense_file(original_path,
1535                     (fallback_initial_copyright_date,
1536                      fallback_initial_developer,
1537                      fallback_original_code_is,
1538                      fallback_original_code_date,
1539                      switch_to_mpl,
1540                      backup,
1541                      results,
1542                      force_relicensing,
1543                      _errors)):
1544     """Relicense the given file.
1545
1546         "original_path" is the file to relicense
1547         "fallback_initial_copyright_date"
1548         "fallback_initial_developer"
1549         "fallback_original_code_is"
1550         "fallback_original_code_date"
1551             User-specified fallback values to use for the new license
1552             block if they cannot be found in the original.
1553         "switch_to_mpl" is a boolean indicating if an NPL-based license
1554             should be converted to MPL.
1555         "backup" (optional, default false) is a boolean indicating if
1556             backups should be made
1557         "results" is a dictionary in which to store statistics and errors.
1558             See relicense() for schema.
1559         "force_relicensing" is a boolean indicating if relicensing
1560             should be done even if the license block looks complete.
1561         "_errors" is a dictionary on which errors are reported
1562             (keyed by file path) when the force option is in effect.
1563
1564     The function does not return anything.
1565     """
1566     log.debug("_relicense_file(original_path='%s')", original_path)
1567     print original_path
1568
1569     # Ensure can access file.
1570     if not os.access(original_path, os.R_OK|os.W_OK):
1571         return _relicensing_error("cannot access", original_path, _errors)
1572     else:
1573         log.info("have read/write access")
1574
1575     # Skip binary files.
1576     try:
1577         if _is_binary(original_path):
1578             print "... binary, skipping this file"
1579             results["binary"] += 1
1580             return
1581     except Exception, ex:
1582         return _relicensing_error(
1583             "error determining if file is binary: %s" % ex,
1584             original_path, _errors)
1585
1586     try:
1587         lic_info = _get_license_info(original_path, 0)
1588     except RelicError, ex:
1589         return _relicensing_error(ex, original_path, _errors)
1590
1591     # Load fallback info if necessary.
1592     if not lic_info.get("initial_copyright_date"):
1593         lic_info["initial_copyright_date"] = fallback_initial_copyright_date
1594     if not lic_info.get("initial_developer"):
1595         lic_info["initial_developer"] = fallback_initial_developer
1596     if not lic_info.get("original_code_is"):
1597         lic_info["original_code_is"] = fallback_original_code_is
1598     if not lic_info.get("original_code_date"):
1599         lic_info["original_code_date"] = fallback_original_code_date
1600
1601     # Return/abort if cannot or do not need to re-license.
1602     parts = lic_info["parts"]
1603     if not parts:
1604         results["no license"] += 1
1605         print "... no license found, skipping this file"
1606         return
1607     elif "unknown" in parts:
1608         return _relicensing_error("unknown license (possibly) found",
1609                                   original_path, _errors)
1610     elif parts.count("block_begin") > 1: # sanity check
1611         return _relicensing_error(
1612             "'BEGIN LICENSE BLOCK' delimiter found more than once",
1613             original_path, _errors)
1614     elif parts.count("block_end") > 1: # sanity check
1615         return _relicensing_error(
1616             "'END LICENSE BLOCK' delimiter found more than once",
1617             original_path, _errors)
1618     elif not lic_info["initial_developer"]:
1619         return _relicensing_error(
1620             "no 'Initial Developer' section was found -- use "
1621             "the -i option to specify your own",
1622             original_path, _errors)
1623     elif not lic_info["initial_copyright_date"]:
1624         return _relicensing_error(
1625             "no initial copyright year was found -- use "
1626             "the -y option to specify your own",
1627             original_path, _errors)
1628     elif not lic_info["original_code_is"]:
1629         return _relicensing_error(
1630             "no 'Original Code is' section was found -- use "
1631             "the -o option to specify your own",
1632             original_path, _errors)
1633     elif ((parts == ["block_begin", "mpl", "gpl", "lgpl", "block_end"] or
1634            parts == ["block_begin", "npl", "gpl", "lgpl", "block_end"]) and
1635           not lic_info.get("unindented_contributor_lines")):
1636         #XXX Should add an option to relicense anyway because matching
1637         #    is not super-strict. E.g. nsWidgetFactory.cpp.
1638         if (switch_to_mpl and
1639             parts == ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
1640             print "... %s found (looks complete, but is not MPL)"\
1641                   % "/".join(parts)
1642         elif force_relicensing:
1643             print "... %s found (looks complete, but forcing relicensing)"\
1644                   % "/".join(parts)
1645         else:
1646             results["good"] += 1
1647             print "... %s found (looks complete), nothing to do"\
1648                   % "/".join(parts)
1649             return
1650
1651     # We need to re-license this file.
1652     print "... %s found, need to relicense" % "/".join(parts)
1653     if lic_info["original_code_is"]:
1654         print "... original code is: %(original_code_is)s" % lic_info
1655     if lic_info["original_code_date"]:
1656         print "... original code date: %(original_code_date)s" % lic_info
1657     if lic_info["initial_developer"]:
1658         print "... initial developer: %(initial_developer)s" % lic_info
1659     if lic_info["initial_copyright_date"]:
1660         print "... initial copyright date: %(initial_copyright_date)s" % lic_info
1661     if lic_info["contributors"]:
1662         print "... contributors: %s" % ", ".join(lic_info["contributors"])
1663
1664     # Put the license block together.
1665     # - build up the license block from the appropriate parts
1666     trilicense = ""
1667     if (not switch_to_mpl) and ( "npl" in parts ):
1668         trilicense_name = "NPL/GPL/LGPL"
1669         trilicense += _g_trilicense_parts["npl"]
1670     else:
1671         trilicense_name = "MPL/GPL/LGPL"
1672         trilicense += _g_trilicense_parts["mpl"]
1673     print "... replacing lines %d-%d with %s tri-license"\
1674           % (lic_info["begin_line"], lic_info["end_line"], trilicense_name)
1675     if lic_info["original_code_is"] is not None:
1676         if lic_info["original_code_date"] is not None:
1677             trilicense += _g_trilicense_parts["original_code_is_with_date"] % lic_info
1678         else:
1679             trilicense += _g_trilicense_parts["original_code_is"] % lic_info
1680     #else:
1681     #    raise RelicError("Gerv, how should the new license block handle no "
1682     #                     "'Originial Code is...' information? --TM")
1683     if (lic_info["initial_developer"] is not None
1684         and lic_info["initial_copyright_date"] is not None):
1685         trilicense += _g_trilicense_parts["initial_developer"] % lic_info
1686     #else:
1687     #    raise RelicError("Gerv, how should the new license block handle no "
1688     #                     "'Initial Developer is...' information? --TM")
1689     if lic_info["contributors"]:
1690         contributors = "  " + "\n  ".join(lic_info["contributors"]) + "\n"
1691     else:
1692         contributors = ""
1693     trilicense += _g_trilicense_parts["contributors"] % contributors
1694     if trilicense_name == "NPL/GPL/LGPL":
1695         trilicense += _g_trilicense_parts["gpl/lgpl for npl"]
1696     else: # trilicense_name == "MPL/GPL/LGPL"
1697         trilicense += _g_trilicense_parts["gpl/lgpl for mpl"]
1698
1699     # get fallback comment subsequent prefix
1700     fallback_prefix = _get_comment_delim_sets(original_path)
1701
1702     # - add the comment delimiters
1703     lines = trilicense.splitlines()
1704     for i in range(len(lines)):
1705         if i == 0:
1706             prefix = lic_info["first_prefix"]
1707         else:
1708             if lic_info["subsequent_prefix"]:
1709                 prefix = lic_info["subsequent_prefix"]
1710             else:
1711                 prefix = fallback_prefix[0][1]
1712         if lines[i]:
1713             if len(lic_info["comment_delims"]) == 0:
1714                 lines[i] = prefix + lines[i]
1715             else:
1716                 lines[i] = prefix + ' ' + lines[i]
1717         else: # don't add trailing whitespace
1718             lines[i] = prefix
1719     if lic_info["last_suffix"]: # don't add that ' ' if there is no suffix
1720         lines[-1] += ' ' + lic_info["last_suffix"]
1721     for i in range(len(lines)): lines[i] += '\n'
1722     trilicense_lines = lines
1723
1724     ##### uncomment to debug license block
1725     # pprint.pprint(lines)
1726     # return
1727
1728     # Skip out now if doing a dry-run.
1729     if _g_dry_run:
1730         results["relicensed"] += 1
1731         return
1732
1733     # Make a backup.
1734     if backup:
1735         backup_path = _make_backup_path(original_path)
1736         print "... backing up to '%s'" % backup_path
1737         try:
1738             shutil.copy(original_path, backup_path)
1739         except EnvironmentError, ex:
1740             return _relicensing_error(ex, original_path, _errors)
1741
1742     # Re-license the file.
1743     try:
1744         fin = open(original_path, "r")
1745         try:
1746             lines = fin.readlines()
1747         finally:
1748             fin.close()
1749
1750         lines[lic_info["begin_line"]:lic_info["end_line"]+1] = trilicense_lines
1751
1752         fout = open(original_path, "w")
1753         try:
1754             fout.write(''.join(lines))
1755         finally:
1756             fout.close()
1757         results["relicensed"] += 1
1758         print "... done relicensing '%s'" % original_path
1759     except:
1760         if backup:
1761             print "... error relicensing, restoring original"
1762             if os.path.exists(original_path):
1763                 os.remove(original_path)
1764             os.rename(backup_path, original_path)
1765         else:
1766             print "... error relicensing, file may be corrupted"
1767         # fallback to type_ for string exceptions
1768         type_, value, tb = sys.exc_info()
1769         return _relicensing_error(value or type_,
1770                                   original_path, _errors)
1771
1772
1773 def _add_license_to_file(original_path,
1774                          (initial_copyright_date,
1775                           initial_developer,
1776                           original_code_is,
1777                           original_code_date,
1778                           backup,
1779                           results,
1780                           _errors)):
1781     """Relicense the given file.
1782
1783         "original_path" is the file to relicense
1784         "initial_copyright_date"
1785         "initial_developer"
1786         "original_code_is"
1787         "original_code_date"
1788             User-specified values to use for the new license. All but
1789             "original_code_date" are required.
1790         "backup" (optional, default false) is a boolean indicating if
1791             backups should be made
1792         "results" is a dictionary in which to store statistics and errors.
1793             See relicense() for schema.
1794         "_errors" is a dictionary on which errors are reported
1795             (keyed by file path) when the force option is in effect.
1796
1797     The function does not return anything.
1798     """
1799     log.debug("_add_license_to_file(original_path='%s')", original_path)
1800     print original_path
1801
1802     # Ensure can access file.
1803     if not os.access(original_path, os.R_OK|os.W_OK):
1804         return _relicensing_error("cannot access", original_path, _errors)
1805     else:
1806         log.info("have read/write access")
1807
1808     # Skip binary files.
1809     try:
1810         if _is_binary(original_path):
1811             print "... binary, skipping this file"
1812             results["binary"] += 1
1813             return
1814     except Exception, ex:
1815         return _relicensing_error(
1816             "error determining if file is binary: %s" % ex,
1817             original_path, _errors)
1818
1819     try:
1820         lic_info = _get_license_info(original_path, 0)
1821     except RelicError, ex:
1822         return _relicensing_error(ex, original_path, _errors)
1823
1824     # Return/abort if cannot or do not need to re-license.
1825     parts = lic_info["parts"]
1826     if lic_info["parts"]: # has a license
1827         results["license"] += 1
1828         print "... license found, skipping this file"
1829         return
1830     #... else we need to add a license to this file.
1831     print "... no license found, need to add one"
1832
1833     # Load license info.
1834     lic_info["initial_developer"] = initial_developer
1835     print "... initial developer: %(initial_developer)s" % lic_info
1836     lic_info["initial_copyright_date"] = initial_copyright_date
1837     print "... initial copyright date: %(initial_copyright_date)s" % lic_info
1838     lic_info["original_code_is"] = original_code_is
1839     print "... original code is: %(original_code_is)s" % lic_info
1840     if original_code_date:
1841         lic_info["original_code_date"] = original_code_date
1842         print "... original code date: %(original_code_date)s" % lic_info
1843     else:
1844         lic_info["original_code_date"] = None
1845
1846     # Determine what line we can start putting the license block on.
1847     # Typically this would be line 0, but for the following exceptions:
1848     # - Shebang (#!) lines
1849     # - Emacs local variables line:
1850     #     /* -*- Mode: C++; ... -*- */
1851     #   This line does not HAVE to be first, but that seems to be a
1852     #   trend, so might as well honour it.
1853     # - XML magic "number":  <?xml version="2.0" ... ?>
1854     #   where "..." might include newlines
1855     startline = 0
1856     try:
1857         comment_delim_sets = _get_comment_delim_sets(original_path)
1858     except RelicError, ex:
1859         return _relicensing_error(ex, original_path, _errors, 1)
1860     fin = open(original_path, 'r')
1861     try:
1862         lines = fin.readlines()
1863     finally:
1864         fin.close()
1865     # If this is an XML file, advance past the magic number tag.
1866     if lines and lines[0].find("<?xml") != -1:
1867         line = lines[0]
1868         if (line.find('encoding="utf-8"') != -1
1869             and line.startswith("\xef\xbb\xbf")):
1870             # remove UTF-8 BOM
1871             # Note: this is hardly robust Unicode XML handling :)
1872             line = line[3:]
1873         if line.startswith("<?xml"):
1874             end_index = lines[startline].find("?>")
1875             while startline < len(lines):
1876                 startline += 1
1877                 if end_index != -1: # found end of tag
1878                     break
1879             # Note: this does not catch something like this:
1880             #   <?xml version="2.0"?> <?stylesheet ...
1881             #   ...?>
1882             # but that is just crazy.
1883     # else, advance past a possible shebang line.
1884     else:
1885         for comment_delims in comment_delim_sets:
1886             if (len(comment_delims) == 1 and comment_delims[0] == "#"
1887                 and lines[0].startswith("#!")):
1888                 startline += 1
1889     # Advance past an Emacs local variable line.
1890     comment_delims = None
1891     if lines[startline].find("-*-") != -1:
1892         for comment_delims in comment_delim_sets:
1893             if lines[startline].find(comment_delims[0]) != -1:
1894                 break
1895         else:
1896             # We were hoping to be able to determine which of the set of
1897             # possible commenting styles was in use by finding the
1898             # comment start token on the same line as the -*-
1899             # Emacs-modeline signifier, but could not. This likely means
1900             # that this file uses a block-style comment but the block
1901             # doesn't start on the same line. Fallback to the
1902             # block-style comment delimiter set, if there is one.
1903             for comment_delims in comment_delim_sets:
1904                 if len(comment_delims) == 3:
1905                     break
1906             else:
1907                 comment_delims = comment_delim_sets[0]
1908
1909         if len(comment_delims) == 1: # line-style comments
1910             startline += 1
1911         else: # block-style comments
1912             in_comment = 0
1913             while startline < len(lines):
1914                 line = lines[startline]
1915                 linepos = 0
1916                 while linepos < len(line):
1917                     if not in_comment:
1918                         i = line.find(comment_delims[0], linepos)
1919                         if i == -1:
1920                             break
1921                         else:
1922                             in_comment = 1
1923                             linepos = i+1
1924                     else:
1925                         i = line.find(comment_delims[-1], linepos)
1926                         if i == -1:
1927                             break
1928                         else:
1929                             in_comment = 0
1930                             linepos = i+1
1931                 startline += 1
1932                 if not in_comment:
1933                     break
1934
1935     # Put the license block together.
1936     # - build up the license block from the appropriate parts
1937     trilicense_name = "MPL/GPL/LGPL"
1938     print "... adding %s tri-license starting at line %s (zero-based)"\
1939           % (trilicense_name, startline)
1940     trilicense =  _g_trilicense_parts["mpl"]
1941     if lic_info["original_code_date"] is not None:
1942         trilicense += _g_trilicense_parts["original_code_is_with_date"] % lic_info
1943     else:
1944         trilicense += _g_trilicense_parts["original_code_is"] % lic_info
1945     trilicense += _g_trilicense_parts["initial_developer"] % lic_info
1946     if lic_info.get("contributors"):
1947         contributors = "  " + "\n  ".join(lic_info["contributors"]) + "\n"
1948     else:
1949         contributors = ""
1950     trilicense += _g_trilicense_parts["contributors"] % contributors
1951     trilicense += _g_trilicense_parts["gpl/lgpl for mpl"]
1952     # - add the comment delimiters
1953     if comment_delims is None:
1954         for comment_delims in comment_delim_sets:
1955             if lines[startline].find(comment_delims[0]) != -1:
1956                 break
1957             elif len(comment_delims) == 3 and lines[startline].find(comment_delims[1]) != -1:
1958                 break
1959         else:
1960             # We were hoping to be able to determine which of the set of
1961             # possible commenting styles was in use by finding the
1962             # comment start token on the same line as the -*-
1963             # Emacs-modeline signifier, but could not. This likely means
1964             # that this file uses a block-style comment but the block
1965             # doesn't start on the same line. Fallback to the
1966             # block-style comment delimiter set, if there is one.
1967             for comment_delims in comment_delim_sets:
1968                 if len(comment_delims) == 3:
1969                     break
1970             else:
1971                 comment_delims = comment_delim_sets[0]
1972         print "comment delims were none: %r" % comment_delims
1973     t_lines = trilicense.splitlines()
1974     if len(comment_delims) == 1: # line-style comments
1975         for i in range(len(t_lines)):
1976             if t_lines[i]:
1977                 t_lines[i] = comment_delims[0] + ' ' + t_lines[i]
1978             else: # don't add trailing whitespace
1979                 t_lines[i] = comment_delims[0]
1980     else: # block-style comments
1981         if t_lines[0]:
1982             t_lines[0] = comment_delims[0] + ' ' + t_lines[0]
1983         else: # don't add trailing whitespace
1984             t_lines[0] = comment_delims[0]
1985         for i in range(1, len(t_lines)):
1986             if t_lines[i]:
1987                 t_lines[i] = comment_delims[1] + ' ' + t_lines[i]
1988             else: # don't add trailing whitespace
1989                 t_lines[i] = comment_delims[1]
1990         t_lines[-1] += ' ' + comment_delims[-1]
1991     for i in range(len(t_lines)): t_lines[i] += '\n'
1992     t_lines[-1] += '\n' # add a blank line at end of lic block
1993     trilicense_lines = t_lines
1994     #pprint.pprint(t_lines)
1995
1996     # Skip out now if doing a dry-run.
1997     if _g_dry_run:
1998         results["added"] += 1
1999         return
2000
2001     # Make a backup.
2002     if backup:
2003         backup_path = _make_backup_path(original_path)
2004         print "... backing up to '%s'" % backup_path
2005         try:
2006             shutil.copy(original_path, backup_path)
2007         except EnvironmentError, ex:
2008             return _relicensing_error(ex, original_path, _errors)
2009
2010     # Add the license to the file.
2011     try:
2012         lines[startline:startline] = trilicense_lines
2013
2014         fout = open(original_path, "w")
2015         try:
2016             fout.write(''.join(lines))
2017         finally:
2018             fout.close()
2019         results["added"] += 1
2020         print "... done adding license to '%s'" % original_path
2021     except:
2022         if backup:
2023             print "... error adding license, restoring original"
2024             if os.path.exists(original_path):
2025                 os.remove(original_path)
2026             os.rename(backup_path, original_path)
2027         else:
2028             print "... error adding license, file may be corrupted"
2029         # fallback to type_ for string exceptions
2030         type_, value, tb = sys.exc_info()
2031         return _relicensing_error(value or type_,
2032                                   original_path, _errors)
2033
2034
2035 def _traverse_dir((file_handler, results), dirname, names):
2036     """os.path.walk target to traverse the give dir"""
2037     log.debug("_traverse_dir((file_handler, results), dirname='%s', "
2038               "names=%s)", dirname, names)
2039     # Iterate over names backwards because may modify it in-place.
2040     # Modifying it in-place ensures that removed entries are not
2041     # traversed by os.path.walk.
2042     for i in range(len(names)-1, -1, -1):
2043         path = os.path.join(dirname, names[i])
2044         if os.path.isdir(path):
2045             if _should_skip_dir(path):
2046                 del names[i]
2047                 continue
2048         if os.path.isfile(path):
2049             if _should_skip_file(path):
2050                 del names[i]
2051                 continue
2052             if file_handler is not None:
2053                 file_handler(path, results)
2054
2055 def _traverse(paths, file_handler, arg):
2056     """Traverse the given path(s) and call the given callback for each.
2057
2058         "paths" is either a list of files or directories, or it is an
2059             input stream with a path on each line.
2060         "file_handler" is a callable to be called on each file traversed.
2061             It is called with the following signature:
2062                 file_handler(path, arg)
2063         "arg" is some object passed to each callback. This is useful for
2064             recording results.
2065
2066     This method takes care of skipping files and directories that should
2067     be skipped according to .cvsignore files and the configured skip
2068     paths.  This method does not return anything.
2069     """
2070     log.debug("_traverse(paths=%s, file_handler=%s, arg=%s)",
2071               paths, file_handler, arg)
2072
2073     for path in paths:
2074         if path[-1] == "\n": path = path[:-1] # chomp if 'paths' is a stream
2075         if not os.path.exists(path):
2076             log.warn("'%s' does not exist, skipping", path)
2077         elif os.path.isfile(path):
2078             if _should_skip_file(path):
2079                 continue
2080             if file_handler is not None:
2081                 file_handler(path, arg)
2082         elif os.path.isdir(path):
2083             if _should_skip_dir(path):
2084                 continue
2085             os.path.walk(path, _traverse_dir, (file_handler, arg))
2086         else:
2087             raise RelicError("unexpected path type '%s'" % path)
2088
2089
2090
2091 #---- public routines
2092
2093 def relicense(paths,
2094               fallback_initial_copyright_date=None,
2095               fallback_initial_developer=None,
2096               fallback_original_code_is=None,
2097               fallback_original_code_date=None,
2098               switch_to_mpl=0,
2099               backup=0,
2100               force_relicensing=0,
2101               _errors=None):
2102     """Relicense the given file(s) (or files in the given dir).
2103
2104         "paths" is either a list of files or directories, or it is an
2105             input stream with a path on each line.
2106         "fallback_initial_copyright_date"
2107         "fallback_initial_developer"
2108         "fallback_original_code_is"
2109         "fallback_original_code_date"
2110             User-specified fallback values to use for the new license
2111             block if they cannot be found in the original.
2112         "switch_to_mpl" (optional, default false) is a boolean
2113             indicating if an NPL-based license should be converted to
2114             MPL.
2115         "backup" (optional, default false)is a boolean indicating if
2116             backups should be made
2117         "force_relicensing" (option, default false) is a boolean
2118             indicating if relicensing should happen even if the license
2119             block looks complete
2120         "_errors" (optional) is a dictionary on which errors are reported
2121             (keyed by file path) when the force option is in effect.
2122
2123     This method does not return anything. It will raise RelicError if
2124     there is a problem. Note that OSError/IOError may also be raised.
2125     """
2126     log.debug("relicense(paths=%s, backup=%r)", paths, backup)
2127     results = {
2128         "relicensed": 0,
2129         "no license": 0,
2130         "good": 0,
2131         "binary": 0,
2132     }
2133     _traverse(paths, _relicense_file,
2134               (fallback_initial_copyright_date,
2135                fallback_initial_developer,
2136                fallback_original_code_is,
2137                fallback_original_code_date,
2138                switch_to_mpl,
2139                backup,
2140                results,
2141                force_relicensing,
2142                _errors))
2143     print
2144     print "--------------------- Summary of Results ------------------------"
2145     print "Files skipped b/c they are binary:", results["binary"]
2146     print "Files skipped b/c they already had proper license:", results["good"]
2147     print "Files skipped b/c they had no license:", results["no license"]
2148     if _g_dry_run:
2149         print "Files re-licensed: %d (dry-run)" % results["relicensed"]
2150     else:
2151         print "Files re-licensed:", results["relicensed"]
2152     print "-----------------------------------------------------------------"
2153
2154
2155 def addlicense(paths,
2156                initial_copyright_date,
2157                initial_developer,
2158                original_code_is,
2159                original_code_date=None,
2160                backup=0,
2161                _errors=None):
2162     """Add a license to those of the given file(s) that do not appear to
2163     have one.
2164
2165         "paths" is either a list of files or directories, or it is an
2166             input stream with a path on each line.
2167         "initial_copyright_date"
2168         "initial_developer"
2169         "original_code_is"
2170         "original_code_date"
2171             User-specified values to use for the new license block. All
2172             but "original_code_date" are required.
2173         "backup" (optional, default false) is a boolean indicating if
2174             backups should be made
2175         "_errors" (optional) is a dictionary on which errors are reported
2176             (keyed by file path) when the force option is in effect.
2177
2178     This method does not return anything. It will raise RelicError if
2179     there is a problem. Note that OSError/IOError may also be raised.
2180     """
2181     log.debug("addlicense(paths=%s, backup=%r)", paths, backup)
2182     if not initial_copyright_date:
2183         raise RelicError("no Initial Developer copyright year was "
2184                          "specified -- use the -y option")
2185     if not initial_developer:
2186         raise RelicError("no 'Initial Developer' section was specified "
2187                          "-- use the -i option")
2188     if not original_code_is:
2189         raise RelicError("no 'Original Code is' section was specified "
2190                          "-- use the -o option")
2191
2192     results = {
2193         "added": 0,
2194         "license": 0,
2195         "binary": 0,
2196     }
2197     _traverse(paths, _add_license_to_file,
2198               (initial_copyright_date,
2199                initial_developer,
2200                original_code_is,
2201                original_code_date,
2202                backup,
2203                results,
2204                _errors))
2205     print
2206     print "--------------------- Summary of Results ------------------------"
2207     print "Files skipped b/c they are binary:", results["binary"]
2208     print "Files skipped b/c they already had a license:", results["license"]
2209     if _g_dry_run:
2210         print "Files to which a license was added: %d (dry-run)" % results["added"]
2211     else:
2212         print "Files to which a license was added: %d" % results["added"]
2213     print "-----------------------------------------------------------------"
2214
2215
2216 def report(paths, switch_to_mpl=0, show_initial=1, quick=0, _errors=None):
2217     """Report on the existing licenses in the given file(s).
2218
2219         "paths" is either a list of files or directories, or it is an
2220             input stream with a path on each line.
2221         "switch_to_mpl" (optional, default false) is a boolean
2222             indicating if an NPL-based license should be converted to
2223             MPL.
2224         "show_initial" (optional, default true) is a boolean indicating
2225             if the initial developer should be displayed for each file.
2226         "quick" (optional, default false) is a boolean indicating if only
2227             basic license checking should be applied.
2228         "_errors" (optional) is a dictionary on which errors are reported
2229             (keyed by file path) when the force option is in effect.
2230
2231     This method does not return anything. It will raise RelicError if
2232     there is a problem.
2233     """
2234     log.debug("report(paths=%s)", paths)
2235     results = {}
2236     _traverse(paths,\
2237               _report_on_file,\
2238               (results, switch_to_mpl, show_initial, quick, _errors))
2239
2240
2241 def statistics(paths, extended=0, quick=0, _errors=None):
2242     """Show a summary table of licenses in files in the given path(s).
2243
2244         "paths" is either a list of files or directories, or it is an
2245             input stream with a path on each line.
2246         "extended" (optional) is a boolean indicating if extended
2247             statistics should be shown
2248         "quick" (optional) is a boolean indicating if quick scan mode should
2249             be enabled.
2250         "_errors" (optional) is a dictionary on which errors are reported
2251             (keyed by file path) when the force option is in effect.
2252
2253     This method does not return anything.
2254     """
2255     #XXX Info gathering returns a lot more info now. We might be able to
2256     #    output more interesting stats.
2257     log.debug("statistics(paths=%s, extended=%s)",
2258               paths, extended)
2259     results = {}
2260     _traverse(paths, _gather_info_on_file, (results, _errors))
2261
2262     # Process results and print out some stats.
2263     stats = {
2264         # <lic type>: [<number of hits>, [<files>...]]
2265     }
2266     for file, info in results.items():
2267         lic_types = [p for p in info["parts"]
2268                      if p not in ["block_begin", "block_end"]]
2269         if not lic_types:
2270             name = "<none found>"
2271         elif "unknown" in lic_types:
2272             name = "<unknown license>"
2273         # Distinguish between complete mpl/gpl/lgpl (i.e. with the block
2274         # begin and end tokens) and incomplete mpl/gpl/lgpl. Likewise
2275         # NPL.
2276         elif (info["parts"] == ["block_begin", "mpl", "gpl", "lgpl", "block_end"]):
2277             name = "mpl/gpl/lgpl (complete)"
2278         elif (info["parts"] == ["block_begin", "npl", "gpl", "lgpl", "block_end"]):
2279             name = "npl/gpl/lgpl (complete)"
2280         else:
2281             name = "/".join(lic_types)
2282         if name not in stats: stats[name] = [0, []]
2283         stats[name][0] += 1
2284         stats[name][1].append(file)
2285
2286     statslist = [(hits, name, files) for name, (hits, files) in stats.items()]
2287     statslist.sort()    # sort by number of hits
2288     statslist.reverse() # most common first
2289     print "Summary of Licenses in Files"
2290     print "============================"
2291     print " Number  Percent License"
2292     print "------- -------- -----------"
2293           #    115   55.55% npl/gpl
2294     for hits, name, files in statslist:
2295         print "%7d %7.2f%% %s"\
2296               % (hits, (hits*100.0/len(results)), name)
2297         #XXX Removed for now because I am not clear if this is at all
2298         #    useful.
2299         #if extended:
2300         #    hits_per_ext = {}
2301         #    for file in files:
2302         #        ext = os.path.splitext(file)[1]
2303         #        if ext not in hits_per_ext: hits_per_ext[ext] = 0
2304         #        hits_per_ext[ext] += 1
2305         #    hits_per_ext_list = [(h, e) for e, h in hits_per_ext.items()]
2306         #    hits_per_ext_list.sort()
2307         #    hits_per_ext_list.reverse()
2308         #    for ext_hits, ext in hits_per_ext_list:
2309         #        if not ext: ext = "<no extension>"
2310         #        print "         %7d %s" % (ext_hits, ext)
2311     print "----------------------------"
2312     print "%7d files processed" % len(results)
2313
2314     # Print some other interesting statistics.
2315     no_original_code_is = []
2316     no_initial_developer = []
2317     unindented_contributor_lines = []
2318     for file, info in results.items():
2319         if "original_code_is" in info and info["original_code_is"] is None:
2320             no_original_code_is.append(file)
2321         if "initial_developer" in info and info["initial_developer"] is None:
2322             no_initial_developer.append(file)
2323         if info.get("unindented_contributor_lines"):
2324             unindented_contributor_lines.append(file)
2325     print
2326     print "Licensed files with no 'Initial Developer...' info: %d" % len(no_initial_developer)
2327     if extended:
2328         for f in no_initial_developer:
2329             print "    %s" % f
2330     print "Licensed files with no 'Original Code is...' info: %d" % len(no_original_code_is)
2331     if extended:
2332         for f in no_original_code_is:
2333             print "    %s" % f
2334     print "Licensed files with improperly indented 'Contributor(s):' line(s): %d" % len(unindented_contributor_lines)
2335     if extended:
2336         for f in unindented_contributor_lines:
2337             print "    %s" % f
2338     if extended:
2339         for hits, name, files in statslist:
2340             print "Files in license category '%s'" % name
2341             sortedFiles = files[:]
2342             sortedFiles.sort()
2343             for file in sortedFiles:
2344                 print "    %s" % file
2345     print
2346
2347
2348
2349 #---- mainline
2350
2351 def main(argv):
2352     try:
2353         opts, args = getopt.getopt(argv[1:], "VvadhqfML:sxry:i:o:D:ARI",
2354             ["version", "verbose", "all", "help", "debug",
2355              "dry-run", "force", "MPL", "license=",
2356              "statistics", "relicense", "backup", "add", "defaults",
2357              "force-relicense", "initial-developers", "quick"])
2358     except getopt.GetoptError, ex:
2359         log.error(str(ex))
2360         log.error("Try `%s --help'.", argv[0])
2361         return 2
2362
2363     debug = 0
2364     mode = "report"
2365     extended = 0
2366     backup = 0
2367     quick = 0
2368     force_relicensing = 0
2369     fallback_initial_copyright_date = None
2370     fallback_initial_developer = None
2371     fallback_original_code_is = None
2372     fallback_original_code_date = None
2373     switch_to_mpl = 0
2374     show_initial = 0
2375     for opt, optarg in opts:
2376         if opt in ("-h", "--help"):
2377             sys.stdout.write(__doc__)
2378             return
2379         elif opt in ("-V", "--version"):
2380             ver = '.'.join([str(part) for part in _version_])
2381             print "relic %s" % ver
2382             return
2383         elif opt in ("-v", "--verbose"):
2384             log.setLevel(logging.INFO)
2385         elif opt in ("-a", "--all"):
2386             global _g_check_all
2387             _g_check_all = 1
2388         elif opt in ("-M", "--MPL"):
2389             switch_to_mpl = 1
2390         elif opt in ("-d", "--debug"):
2391             log.setLevel(logging.DEBUG)
2392             debug = 1
2393         elif opt in ("--dry-run",):
2394             global _g_dry_run
2395             _g_dry_run = 1
2396         elif opt in ("-f", "--force"):
2397             global _g_force
2398             _g_force = 1
2399         elif opt in ("-s", "--statistics"):
2400             mode = "statistics"
2401         elif opt in ("-x",):
2402             extended = 1
2403         elif opt in ("-r", "--relicense"):
2404             mode = "relicense"
2405         elif opt in ("-R", "--force-relicense"):
2406             mode = "relicense"
2407             force_relicensing = 1
2408         elif opt in ("-A", "--add"):
2409             mode = "add"
2410         elif opt == "--backup":
2411             backup = 1
2412         elif opt == "-y":
2413             fallback_initial_copyright_date = optarg
2414         elif opt == "-i":
2415             fallback_initial_developer = optarg
2416         elif opt == "-o":
2417             fallback_original_code_is = optarg
2418         elif opt == "-D":
2419             fallback_original_code_date = optarg
2420         elif opt in ("-I", "--initial-developers"):
2421             show_initial = 1
2422         elif opt == "--defaults":
2423             fallback_original_code_is = "mozilla.org Code"
2424             fallback_initial_copyright_date = "2001"
2425             fallback_initial_developer = "Netscape Communications Corporation"
2426         elif opt in ("-q", "--quick"):
2427             quick = 1
2428
2429     try:
2430         # Prepare the input.
2431         if not args:
2432             log.debug("no given files, trying stdin")
2433             paths = sys.stdin
2434         else:
2435             paths = args
2436
2437         # Invoke the requested action.
2438         _errors = {}
2439         if mode == "relicense":
2440             relicense(paths,
2441                       fallback_initial_copyright_date,
2442                       fallback_initial_developer,
2443                       fallback_original_code_is,
2444                       fallback_original_code_date,
2445                       switch_to_mpl,
2446                       backup,
2447                       force_relicensing,
2448                       _errors=_errors)
2449         elif mode == "statistics":
2450             statistics(paths, extended, quick, _errors=_errors)
2451         elif mode == "report":
2452             report(paths, switch_to_mpl, show_initial, quick, _errors=_errors)
2453         elif mode == "add":
2454             addlicense(paths,
2455                        fallback_initial_copyright_date,
2456                        fallback_initial_developer,
2457                        fallback_original_code_is,
2458                        fallback_original_code_date,
2459                        backup,
2460                        _errors=_errors)
2461         else:
2462             raise RelicError("unexpected mode: '%s'" % mode)
2463
2464         # Report any delayed errors.
2465         if _errors:
2466             print
2467             print "=================== Summary of Errors ==========================="
2468             print "Files with processing errors:", len(_errors)
2469             print "================================================================="
2470             for file, error in _errors.items():
2471                 print "%s: %s" % (file, error)
2472                 print
2473             print "================================================================="
2474     except RelicError, ex:
2475         log.error(str(ex) +
2476                   " (the --force option can be used to skip problematic "
2477                   "files and continue processing rather than aborting)")
2478         if debug:
2479             print
2480             import traceback
2481             traceback.print_exception(*sys.exc_info())
2482         return 1
2483     except KeyboardInterrupt:
2484         pass
2485
2486
2487 if __name__ == "__main__":
2488     sys.exit( main(sys.argv) )