admin/copyright.py

   1 #!/usr/bin/env python
   2 #
   3 # This file is part of the GROMACS molecular simulation package.
   4 #
   5 # Copyright (c) 2013,2014,2015,2016,2018, by the GROMACS development team, led by
   6 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7 # and including many others, as listed in the AUTHORS file in the
   8 # top-level source directory and at http://www.gromacs.org.
   9 #
  10 # GROMACS is free software; you can redistribute it and/or
  11 # modify it under the terms of the GNU Lesser General Public License
  12 # as published by the Free Software Foundation; either version 2.1
  13 # of the License, or (at your option) any later version.
  14 #
  15 # GROMACS is distributed in the hope that it will be useful,
  16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18 # Lesser General Public License for more details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public
  21 # License along with GROMACS; if not, see
  22 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24 #
  25 # If you want to redistribute modifications to GROMACS, please
  26 # consider that scientific software is very special. Version
  27 # control is crucial - bugs must be traceable. We will be happy to
  28 # consider code for inclusion in the official distribution, but
  29 # derived work must not be called official GROMACS. Details are found
  30 # in the README & COPYING files - if they are missing, get the
  31 # official version at http://www.gromacs.org.
  32 #
  33 # To help us fund GROMACS development, we humbly ask that you cite
  34 # the research papers on the package. Check out http://www.gromacs.org.
  35
  36 """Checks and/or updates copyright headers in GROMACS source files.
  37
  38 It is used internally by several bash scripts to do copyright-relates tasks,
  39 but can also be invoked directly for some rare use cases.
  40
  41 See docs/dev-manual/uncrustify.rst for more details.
  42 """
  43
  44 import datetime
  45 import os.path
  46 import re
  47 import sys
  48
  49 from optparse import OptionParser
  50
  51 class CopyrightState(object):
  52
  53     """Information about an existing (or non-existing) copyright header."""
  54
  55     def __init__(self, has_copyright, is_correct, is_newstyle, years, other_copyrights):
  56         self.has_copyright = has_copyright
  57         self.is_correct = is_correct
  58         self.is_newstyle = is_newstyle
  59         self.years = years
  60         self.other_copyrights = other_copyrights
  61
  62 class CopyrightChecker(object):
  63
  64     """Logic for analyzing existing copyright headers and generating new ones."""
  65
  66     _header = ["", "This file is part of the GROMACS molecular simulation package.", ""]
  67     _copyright = "Copyright (c) {0}, by the GROMACS development team, led by"
  68     _footer = """
  69 Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  70 and including many others, as listed in the AUTHORS file in the
  71 top-level source directory and at http://www.gromacs.org.
  72
  73 GROMACS is free software; you can redistribute it and/or
  74 modify it under the terms of the GNU Lesser General Public License
  75 as published by the Free Software Foundation; either version 2.1
  76 of the License, or (at your option) any later version.
  77
  78 GROMACS is distributed in the hope that it will be useful,
  79 but WITHOUT ANY WARRANTY; without even the implied warranty of
  80 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  81 Lesser General Public License for more details.
  82
  83 You should have received a copy of the GNU Lesser General Public
  84 License along with GROMACS; if not, see
  85 http://www.gnu.org/licenses, or write to the Free Software Foundation,
  86 Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  87
  88 If you want to redistribute modifications to GROMACS, please
  89 consider that scientific software is very special. Version
  90 control is crucial - bugs must be traceable. We will be happy to
  91 consider code for inclusion in the official distribution, but
  92 derived work must not be called official GROMACS. Details are found
  93 in the README & COPYING files - if they are missing, get the
  94 official version at http://www.gromacs.org.
  95
  96 To help us fund GROMACS development, we humbly ask that you cite
  97 the research papers on the package. Check out http://www.gromacs.org.
  98 """.strip().splitlines()
  99
 100     def check_copyright(self, comment_block):
 101         """Analyze existing copyright header for correctness and extract information."""
 102         copyright_re = r'Copyright \(c\) (([0-9]{4}[,-])*[0-9]{4}),? by the GROMACS development team,'
 103         has_copyright = False
 104         is_newstyle = True
 105         is_correct = True
 106         next_header_line = 0
 107         next_footer_line = 0
 108         append_next_line_to_other_copyrights = False
 109         existing_years = ''
 110         other_copyrights = []
 111         for line in comment_block:
 112             if append_next_line_to_other_copyrights:
 113                 other_copyrights[-1] += ' ' + line
 114                 append_next_line_to_other_copyrights = False
 115                 continue
 116             if 'Copyright' in line:
 117                 has_copyright = True
 118                 match = re.match(copyright_re, line)
 119                 if match:
 120                     existing_years = match.group(1)
 121                     new_line = self._copyright.format(existing_years)
 122                     if line != new_line:
 123                         is_correct = False
 124                 else:
 125                     other_copyrights.append(line[line.find('Copyright'):])
 126                     if not line.startswith('Copyright'):
 127                         append_next_line_to_other_copyrights = True
 128                 if next_header_line != -1 or next_footer_line != 0:
 129                     is_correct = False
 130                 continue
 131             if line.startswith('Written by the Gromacs development team'):
 132                 has_copyright = True
 133             if next_header_line >= 0:
 134                 if line == self._header[next_header_line]:
 135                     next_header_line += 1
 136                     if next_header_line >= len(self._header):
 137                         next_header_line = -1
 138                 else:
 139                     is_correct = False
 140                     is_newstyle = False
 141             elif next_footer_line >= 0:
 142                 if line == self._footer[next_footer_line]:
 143                     next_footer_line += 1
 144                     if next_footer_line >= len(self._footer):
 145                         next_footer_line = -1
 146                 else:
 147                     is_correct = False
 148             else:
 149                 is_correct = False
 150         if next_header_line != -1 or next_footer_line != -1:
 151             is_correct = False
 152
 153         return CopyrightState(has_copyright, is_correct, is_newstyle, existing_years, other_copyrights)
 154
 155     def process_copyright(self, state, options, current_years, reporter):
 156         """Determine whether a copyrigth header needs to be updated and report issues."""
 157         need_update = False
 158
 159         if state.years:
 160             if options.replace_years:
 161                 if state.years != current_years:
 162                     need_update = True
 163                     reporter.report('copyright years replaced')
 164                 new_years = current_years
 165             else:
 166                 new_years = state.years
 167                 if not new_years.endswith(current_years):
 168                     if options.update_year:
 169                         need_update = True
 170                         new_years += ',' + current_years
 171                     if options.check or not need_update:
 172                         reporter.report('copyright year outdated')
 173                     else:
 174                         reporter.report('copyright year added')
 175         else:
 176             new_years = current_years
 177
 178         if not state.has_copyright:
 179             if options.add_missing:
 180                 need_update = True
 181             if options.check or not need_update:
 182                 reporter.report('copyright header missing')
 183             elif options.add_missing:
 184                 reporter.report('copyright header added')
 185         else:
 186             if not state.is_newstyle:
 187                 if options.replace_header:
 188                     need_update = True
 189                 if options.check or not need_update:
 190                     reporter.report('copyright header incorrect')
 191                 else:
 192                     reporter.report('copyright header replaced')
 193             elif not state.is_correct:
 194                 if options.update_header:
 195                     need_update = True
 196                 if options.check or not need_update:
 197                     reporter.report('copyright header outdated')
 198                 else:
 199                     reporter.report('copyright header updated')
 200
 201         return need_update, new_years
 202
 203     def get_copyright_text(self, years, other_copyrights):
 204         """Construct a new copyright header."""
 205         output = []
 206         output.extend(self._header)
 207         if other_copyrights:
 208             for line in other_copyrights:
 209                 outline = line.rstrip()
 210                 if outline.endswith(','):
 211                     outline = outline[:-1]
 212                 if not outline.endswith('.'):
 213                     outline += '.'
 214                 output.append(outline)
 215         output.append(self._copyright.format(years))
 216         output.extend(self._footer)
 217         return output
 218
 219 class Reporter(object):
 220
 221     """Wrapper for reporting issues in a file."""
 222
 223     def __init__(self, reportfile, filename):
 224         self._reportfile = reportfile
 225         self._filename = filename
 226
 227     def report(self, text):
 228         self._reportfile.write(self._filename + ': ' + text + '\n');
 229
 230 class CommentHandlerC(object):
 231
 232     """Handler for extracting and creating C-style comments."""
 233
 234     def extract_first_comment_block(self, content_lines):
 235         if not content_lines or not content_lines[0].startswith('/*'):
 236             return ([], 0)
 237         comment_block = [content_lines[0][2:].strip()]
 238         line_index = 1
 239         while line_index < len(content_lines):
 240             line = content_lines[line_index]
 241             if '*/' in content_lines[line_index]:
 242                 break
 243             comment_block.append(line.lstrip('* ').rstrip())
 244             line_index += 1
 245         return (comment_block, line_index + 1)
 246
 247     def create_comment_block(self, lines):
 248         output = []
 249         output.append(('/* ' + lines[0]).rstrip())
 250         output.extend([(' * ' + x).rstrip() for x in lines[1:]])
 251         output.append(' */')
 252         return output
 253
 254 class CommentHandlerSimple(object):
 255
 256     """Handler for extracting and creating sh-style comments.
 257
 258     Also other comments of the same type, but with a different comment
 259     character are supported."""
 260
 261     def __init__(self, comment_char):
 262         self._comment_char = comment_char
 263
 264     def extract_first_comment_block(self, content_lines):
 265         if not content_lines or not content_lines[0].startswith(self._comment_char):
 266             return ([], 0)
 267         comment_block = []
 268         line_index = 0
 269         while line_index < len(content_lines):
 270             line = content_lines[line_index]
 271             if not line.startswith(self._comment_char):
 272                 break
 273             comment_block.append(line.lstrip(self._comment_char + ' ').rstrip())
 274             line_index += 1
 275             if line == self._comment_char + ' the research papers on the package. Check out http://www.gromacs.org.':
 276                 break
 277         while line_index < len(content_lines):
 278             line = content_lines[line_index].rstrip()
 279             if len(line) > 0 and line != self._comment_char:
 280                 break
 281             line_index += 1
 282         return (comment_block, line_index)
 283
 284     def create_comment_block(self, lines):
 285         output = []
 286         output.extend([(self._comment_char + ' ' + x).rstrip() for x in lines])
 287         output.append('')
 288         return output
 289
 290 comment_handlers = {
 291         'c': CommentHandlerC(),
 292         'tex': CommentHandlerSimple('%'),
 293         'sh': CommentHandlerSimple('#')
 294         }
 295
 296 def select_comment_handler(override, filename):
 297     """Select comment handler for a file based on file name and input options."""
 298     filetype = override
 299     if not filetype and filename != '-':
 300         basename = os.path.basename(filename)
 301         root, ext = os.path.splitext(basename)
 302         if ext == '.cmakein':
 303             dummy, ext2 = os.path.splitext(root)
 304             if ext2:
 305                 ext = ext2
 306         if ext in ('.c', '.cu', '.cpp', '.cl', '.h', '.cuh', '.clh', '.y', '.l', '.pre', '.bm'):
 307             filetype = 'c'
 308         elif ext in ('.tex',):
 309             filetype = 'tex'
 310         elif basename in ('CMakeLists.txt', 'GMXRC', 'git-pre-commit') or \
 311                 ext in ('.cmake', '.cmakein', '.py', '.sh', '.bash', '.csh', '.zsh'):
 312             filetype = 'sh'
 313     if filetype in comment_handlers:
 314         return comment_handlers[filetype]
 315     if filetype:
 316         sys.stderr.write("Unsupported input format: {0}\n".format(filetype))
 317     elif filename != '-':
 318         sys.stderr.write("Unsupported input format: {0}\n".format(filename))
 319     else:
 320         sys.stderr.write("No file name or file type provided.\n")
 321     sys.exit(1)
 322
 323 def create_copyright_header(years, other_copyrights=None, language='c'):
 324     if language not in comment_handlers:
 325         sys.strerr.write("Unsupported language: {0}\n".format(language))
 326         sys.exit(1)
 327     copyright_checker = CopyrightChecker()
 328     comment_handler = comment_handlers[language]
 329     copyright_lines = copyright_checker.get_copyright_text(years, other_copyrights)
 330     comment_lines = comment_handler.create_comment_block(copyright_lines)
 331     return '\n'.join(comment_lines) + '\n'
 332
 333 def process_options():
 334     """Process input options."""
 335     parser = OptionParser()
 336     parser.add_option('-l', '--lang',
 337                       help='Comment type to use (c or sh)')
 338     parser.add_option('-y', '--years',
 339                       help='Comma-separated list of years')
 340     parser.add_option('-F', '--files',
 341                       help='File to read list of files from')
 342     parser.add_option('--check', action='store_true',
 343                       help='Do not modify the files, only check the copyright (default action). ' +
 344                            'If specified together with --update, do the modifications ' +
 345                            'but produce output as if only --check was provided.')
 346     parser.add_option('--update-year', action='store_true',
 347                       help='Update the copyright year if outdated')
 348     parser.add_option('--replace-years', action='store_true',
 349                       help='Replace the copyright years with those given with --years')
 350     parser.add_option('--update-header', action='store_true',
 351                       help='Update the copyright header if outdated')
 352     parser.add_option('--replace-header', action='store_true',
 353                       help='Replace any copyright header with the current one')
 354     parser.add_option('--remove-old-copyrights', action='store_true',
 355                       help='Remove copyright statements not in the new format')
 356     parser.add_option('--add-missing', action='store_true',
 357                       help='Add missing copyright headers')
 358     options, args = parser.parse_args()
 359
 360     filenames = args
 361     if options.files:
 362         with open(options.files, 'r') as filelist:
 363             filenames = [x.strip() for x in filelist.read().splitlines()]
 364     elif not filenames:
 365         filenames = ['-']
 366
 367     # Default is --check if nothing provided.
 368     if not options.check and not options.update_year and \
 369             not options.update_header and not options.replace_header and \
 370             not options.add_missing:
 371         options.check = True
 372
 373     return options, filenames
 374
 375 def main():
 376     """Do processing as a stand-alone script."""
 377     options, filenames = process_options()
 378     years = options.years
 379     if not years:
 380         years = str(datetime.date.today().year)
 381     if years.endswith(','):
 382         years = years[:-1]
 383
 384     checker = CopyrightChecker()
 385
 386     # Process each input file in turn.
 387     for filename in filenames:
 388         comment_handler = select_comment_handler(options.lang, filename)
 389
 390         # Read the input file.  We are doing an in-place operation, so can't
 391         # operate in pass-through mode.
 392         if filename == '-':
 393             contents = sys.stdin.read().splitlines()
 394             reporter = Reporter(sys.stderr, '<stdin>')
 395         else:
 396             with open(filename, 'r') as inputfile:
 397                 contents = inputfile.read().splitlines()
 398             reporter = Reporter(sys.stdout, filename)
 399
 400         output = []
 401         # Keep lines that must be at the beginning of the file and skip them in
 402         # the check.
 403         if contents and (contents[0].startswith('#!/') or \
 404                 contents[0].startswith('%code requires') or \
 405                 contents[0].startswith('/* #if')):
 406             output.append(contents[0])
 407             contents = contents[1:]
 408         # Remove and skip empty lines at the beginning.
 409         while contents and len(contents[0]) == 0:
 410             contents = contents[1:]
 411
 412         # Analyze the first comment block in the file.
 413         comment_block, line_count = comment_handler.extract_first_comment_block(contents)
 414         state = checker.check_copyright(comment_block)
 415         need_update, file_years = checker.process_copyright(state, options, years, reporter)
 416         if state.other_copyrights and options.remove_old_copyrights:
 417             need_update = True
 418             state.other_copyrights = []
 419             reporter.report('old copyrights removed')
 420
 421         if need_update:
 422             # Remove the original comment if it was a copyright comment.
 423             if state.has_copyright:
 424                 contents = contents[line_count:]
 425             new_block = checker.get_copyright_text(file_years, state.other_copyrights)
 426             output.extend(comment_handler.create_comment_block(new_block))
 427
 428         # Write the output file if required.
 429         if need_update or filename == '-':
 430             # Append the rest of the input file as it was.
 431             output.extend(contents)
 432             output = '\n'.join(output) + '\n'
 433             if filename == '-':
 434                 sys.stdout.write(output)
 435             else:
 436                 with open(filename, 'w') as outputfile:
 437                     outputfile.write(output)
 438
 439 if __name__ == "__main__":
 440     main()