admin/copyright.py

   1 #!/usr/bin/env python3
   2 #
   3 # This file is part of the GROMACS molecular simulation package.
   4 #
   5 # Copyright (c) 2013,2014,2015,2016,2018 by the GROMACS development team.
   6 # Copyright (c) 2019,2020, by the GROMACS development team, led by
   7 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8 # and including many others, as listed in the AUTHORS file in the
   9 # top-level source directory and at http://www.gromacs.org.
  10 #
  11 # GROMACS is free software; you can redistribute it and/or
  12 # modify it under the terms of the GNU Lesser General Public License
  13 # as published by the Free Software Foundation; either version 2.1
  14 # of the License, or (at your option) any later version.
  15 #
  16 # GROMACS is distributed in the hope that it will be useful,
  17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19 # Lesser General Public License for more details.
  20 #
  21 # You should have received a copy of the GNU Lesser General Public
  22 # License along with GROMACS; if not, see
  23 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25 #
  26 # If you want to redistribute modifications to GROMACS, please
  27 # consider that scientific software is very special. Version
  28 # control is crucial - bugs must be traceable. We will be happy to
  29 # consider code for inclusion in the official distribution, but
  30 # derived work must not be called official GROMACS. Details are found
  31 # in the README & COPYING files - if they are missing, get the
  32 # official version at http://www.gromacs.org.
  33 #
  34 # To help us fund GROMACS development, we humbly ask that you cite
  35 # the research papers on the package. Check out http://www.gromacs.org.
  36
  37 """Checks and/or updates copyright headers in GROMACS source files.
  38
  39 It is used internally by several bash scripts to do copyright-relates tasks,
  40 but can also be invoked directly for some rare use cases.
  41
  42 See docs/dev-manual/code-formatting.rst for more details.
  43 """
  44
  45 import datetime
  46 import os.path
  47 import re
  48 import sys
  49
  50 from optparse import OptionParser
  51
  52 class CopyrightState(object):
  53
  54     """Information about an existing (or non-existing) copyright header."""
  55
  56     def __init__(self, has_copyright, is_correct, is_newstyle, years, other_copyrights):
  57         self.has_copyright = has_copyright
  58         self.is_correct = is_correct
  59         self.is_newstyle = is_newstyle
  60         self.years = years
  61         self.other_copyrights = other_copyrights
  62
  63 class CopyrightChecker(object):
  64
  65     """Logic for analyzing existing copyright headers and generating new ones."""
  66
  67     _header = ["", "This file is part of the GROMACS molecular simulation package.", ""]
  68     _copyright = "Copyright (c) {0}, by the GROMACS development team, led by"
  69     _footer = """
  70 Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  71 and including many others, as listed in the AUTHORS file in the
  72 top-level source directory and at http://www.gromacs.org.
  73
  74 GROMACS is free software; you can redistribute it and/or
  75 modify it under the terms of the GNU Lesser General Public License
  76 as published by the Free Software Foundation; either version 2.1
  77 of the License, or (at your option) any later version.
  78
  79 GROMACS is distributed in the hope that it will be useful,
  80 but WITHOUT ANY WARRANTY; without even the implied warranty of
  81 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  82 Lesser General Public License for more details.
  83
  84 You should have received a copy of the GNU Lesser General Public
  85 License along with GROMACS; if not, see
  86 http://www.gnu.org/licenses, or write to the Free Software Foundation,
  87 Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  88
  89 If you want to redistribute modifications to GROMACS, please
  90 consider that scientific software is very special. Version
  91 control is crucial - bugs must be traceable. We will be happy to
  92 consider code for inclusion in the official distribution, but
  93 derived work must not be called official GROMACS. Details are found
  94 in the README & COPYING files - if they are missing, get the
  95 official version at http://www.gromacs.org.
  96
  97 To help us fund GROMACS development, we humbly ask that you cite
  98 the research papers on the package. Check out http://www.gromacs.org.
  99 """.strip().splitlines()
 100
 101     def check_copyright(self, comment_block):
 102         """Analyze existing copyright header for correctness and extract information."""
 103         copyright_re = r'Copyright \(c\) (([0-9]{4}[,-])*[0-9]{4}),? by the GROMACS development team,'
 104         has_copyright = False
 105         is_newstyle = True
 106         is_correct = True
 107         next_header_line = 0
 108         next_footer_line = 0
 109         append_next_line_to_other_copyrights = False
 110         existing_years = ''
 111         other_copyrights = []
 112         for line in comment_block:
 113             if append_next_line_to_other_copyrights:
 114                 other_copyrights[-1] += ' ' + line
 115                 append_next_line_to_other_copyrights = False
 116                 continue
 117             if 'Copyright' in line:
 118                 has_copyright = True
 119                 match = re.match(copyright_re, line)
 120                 if match:
 121                     existing_years = match.group(1)
 122                     new_line = self._copyright.format(existing_years)
 123                     if line != new_line:
 124                         is_correct = False
 125                 else:
 126                     other_copyrights.append(line[line.find('Copyright'):])
 127                     if not line.startswith('Copyright'):
 128                         append_next_line_to_other_copyrights = True
 129                 if next_header_line != -1 or next_footer_line != 0:
 130                     is_correct = False
 131                 continue
 132             if line.startswith('Written by the Gromacs development team'):
 133                 has_copyright = True
 134             if next_header_line >= 0:
 135                 if line == self._header[next_header_line]:
 136                     next_header_line += 1
 137                     if next_header_line >= len(self._header):
 138                         next_header_line = -1
 139                 else:
 140                     is_correct = False
 141                     is_newstyle = False
 142             elif next_footer_line >= 0:
 143                 if line == self._footer[next_footer_line]:
 144                     next_footer_line += 1
 145                     if next_footer_line >= len(self._footer):
 146                         next_footer_line = -1
 147                 else:
 148                     is_correct = False
 149             else:
 150                 is_correct = False
 151         if next_header_line != -1 or next_footer_line != -1:
 152             is_correct = False
 153
 154         return CopyrightState(has_copyright, is_correct, is_newstyle, existing_years, other_copyrights)
 155
 156     def process_copyright(self, state, options, current_years, reporter):
 157         """Determine whether a copyrigth header needs to be updated and report issues."""
 158         need_update = False
 159
 160         if state.years:
 161             if options.replace_years:
 162                 if state.years != current_years:
 163                     need_update = True
 164                     reporter.report('copyright years replaced')
 165                 new_years = current_years
 166             else:
 167                 new_years = state.years
 168                 if not new_years.endswith(current_years):
 169                     if options.update_year:
 170                         need_update = True
 171                         new_years += ',' + current_years
 172                     if options.check or not need_update:
 173                         reporter.report('copyright year outdated')
 174                     else:
 175                         reporter.report('copyright year added')
 176         else:
 177             new_years = current_years
 178
 179         if not state.has_copyright:
 180             if options.add_missing:
 181                 need_update = True
 182             if options.check or not need_update:
 183                 reporter.report('copyright header missing')
 184             elif options.add_missing:
 185                 reporter.report('copyright header added')
 186         else:
 187             if not state.is_newstyle:
 188                 if options.replace_header:
 189                     need_update = True
 190                 if options.check or not need_update:
 191                     reporter.report('copyright header incorrect')
 192                 else:
 193                     reporter.report('copyright header replaced')
 194             elif not state.is_correct:
 195                 if options.update_header:
 196                     need_update = True
 197                 if options.check or not need_update:
 198                     reporter.report('copyright header outdated')
 199                 else:
 200                     reporter.report('copyright header updated')
 201
 202         return need_update, new_years
 203
 204     def get_copyright_text(self, years, other_copyrights):
 205         """Construct a new copyright header."""
 206         output = []
 207         output.extend(self._header)
 208         if other_copyrights:
 209             for line in other_copyrights:
 210                 outline = line.rstrip()
 211                 if outline.endswith(','):
 212                     outline = outline[:-1]
 213                 if not outline.endswith('.'):
 214                     outline += '.'
 215                 output.append(outline)
 216         output.append(self._copyright.format(years))
 217         output.extend(self._footer)
 218         return output
 219
 220 class Reporter(object):
 221
 222     """Wrapper for reporting issues in a file."""
 223
 224     def __init__(self, reportfile, filename):
 225         self._reportfile = reportfile
 226         self._filename = filename
 227
 228     def report(self, text):
 229         self._reportfile.write(self._filename + ': ' + text + '\n');
 230
 231 class CommentHandlerC(object):
 232
 233     """Handler for extracting and creating C-style comments."""
 234
 235     def extract_first_comment_block(self, content_lines):
 236         if not content_lines or not content_lines[0].startswith('/*'):
 237             return ([], 0)
 238         comment_block = [content_lines[0][2:].strip()]
 239         line_index = 1
 240         while line_index < len(content_lines):
 241             line = content_lines[line_index]
 242             if '*/' in content_lines[line_index]:
 243                 break
 244             comment_block.append(line.lstrip('* ').rstrip())
 245             line_index += 1
 246         return (comment_block, line_index + 1)
 247
 248     def create_comment_block(self, lines):
 249         output = []
 250         output.append(('/* ' + lines[0]).rstrip())
 251         output.extend([(' * ' + x).rstrip() for x in lines[1:]])
 252         output.append(' */')
 253         return output
 254
 255 class CommentHandlerSimple(object):
 256
 257     """Handler for extracting and creating sh-style comments.
 258
 259     Also other comments of the same type, but with a different comment
 260     character are supported."""
 261
 262     def __init__(self, comment_char):
 263         self._comment_char = comment_char
 264
 265     def extract_first_comment_block(self, content_lines):
 266         if not content_lines or not content_lines[0].startswith(self._comment_char):
 267             return ([], 0)
 268         comment_block = []
 269         line_index = 0
 270         while line_index < len(content_lines):
 271             line = content_lines[line_index]
 272             if not line.startswith(self._comment_char):
 273                 break
 274             comment_block.append(line.lstrip(self._comment_char + ' ').rstrip())
 275             line_index += 1
 276             if line == self._comment_char + ' the research papers on the package. Check out http://www.gromacs.org.':
 277                 break
 278         while line_index < len(content_lines):
 279             line = content_lines[line_index].rstrip()
 280             if len(line) > 0 and line != self._comment_char:
 281                 break
 282             line_index += 1
 283         return (comment_block, line_index)
 284
 285     def create_comment_block(self, lines):
 286         output = []
 287         output.extend([(self._comment_char + ' ' + x).rstrip() for x in lines])
 288         output.append('')
 289         return output
 290
 291 comment_handlers = {
 292         'c': CommentHandlerC(),
 293         'tex': CommentHandlerSimple('%'),
 294         'sh': CommentHandlerSimple('#')
 295         }
 296
 297 def select_comment_handler(override, filename):
 298     """Select comment handler for a file based on file name and input options."""
 299     filetype = override
 300     if not filetype and filename != '-':
 301         basename = os.path.basename(filename)
 302         root, ext = os.path.splitext(basename)
 303         if ext == '.cmakein':
 304             dummy, ext2 = os.path.splitext(root)
 305             if ext2:
 306                 ext = ext2
 307         if ext in ('.c', '.cu', '.cpp', '.cl', '.h', '.cuh', '.clh', '.y', '.l', '.pre', '.bm'):
 308             filetype = 'c'
 309         elif ext in ('.tex',):
 310             filetype = 'tex'
 311         elif basename in ('CMakeLists.txt', 'GMXRC', 'git-pre-commit') or \
 312                 ext in ('.cmake', '.cmakein', '.py', '.sh', '.bash', '.csh', '.zsh'):
 313             filetype = 'sh'
 314     if filetype in comment_handlers:
 315         return comment_handlers[filetype]
 316     if filetype:
 317         sys.stderr.write("Unsupported input format: {0}\n".format(filetype))
 318     elif filename != '-':
 319         sys.stderr.write("Unsupported input format: {0}\n".format(filename))
 320     else:
 321         sys.stderr.write("No file name or file type provided.\n")
 322     sys.exit(1)
 323
 324 def create_copyright_header(years, other_copyrights=None, language='c'):
 325     if language not in comment_handlers:
 326         sys.strerr.write("Unsupported language: {0}\n".format(language))
 327         sys.exit(1)
 328     copyright_checker = CopyrightChecker()
 329     comment_handler = comment_handlers[language]
 330     copyright_lines = copyright_checker.get_copyright_text(years, other_copyrights)
 331     comment_lines = comment_handler.create_comment_block(copyright_lines)
 332     return '\n'.join(comment_lines) + '\n'
 333
 334 def process_options():
 335     """Process input options."""
 336     parser = OptionParser()
 337     parser.add_option('-l', '--lang',
 338                       help='Comment type to use (c or sh)')
 339     parser.add_option('-y', '--years',
 340                       help='Comma-separated list of years')
 341     parser.add_option('-F', '--files',
 342                       help='File to read list of files from')
 343     parser.add_option('--check', action='store_true',
 344                       help='Do not modify the files, only check the copyright (default action). ' +
 345                            'If specified together with --update, do the modifications ' +
 346                            'but produce output as if only --check was provided.')
 347     parser.add_option('--update-year', action='store_true',
 348                       help='Update the copyright year if outdated')
 349     parser.add_option('--replace-years', action='store_true',
 350                       help='Replace the copyright years with those given with --years')
 351     parser.add_option('--update-header', action='store_true',
 352                       help='Update the copyright header if outdated')
 353     parser.add_option('--replace-header', action='store_true',
 354                       help='Replace any copyright header with the current one')
 355     parser.add_option('--remove-old-copyrights', action='store_true',
 356                       help='Remove copyright statements not in the new format')
 357     parser.add_option('--add-missing', action='store_true',
 358                       help='Add missing copyright headers')
 359     options, args = parser.parse_args()
 360
 361     filenames = args
 362     if options.files:
 363         with open(options.files, 'r') as filelist:
 364             filenames = [x.strip() for x in filelist.read().splitlines()]
 365     elif not filenames:
 366         filenames = ['-']
 367
 368     # Default is --check if nothing provided.
 369     if not options.check and not options.update_year and \
 370             not options.update_header and not options.replace_header and \
 371             not options.add_missing:
 372         options.check = True
 373
 374     return options, filenames
 375
 376 def main():
 377     """Do processing as a stand-alone script."""
 378     options, filenames = process_options()
 379     years = options.years
 380     if not years:
 381         years = str(datetime.date.today().year)
 382     if years.endswith(','):
 383         years = years[:-1]
 384
 385     checker = CopyrightChecker()
 386
 387     # Process each input file in turn.
 388     for filename in filenames:
 389         comment_handler = select_comment_handler(options.lang, filename)
 390
 391         # Read the input file.  We are doing an in-place operation, so can't
 392         # operate in pass-through mode.
 393         if filename == '-':
 394             contents = sys.stdin.read().splitlines()
 395             reporter = Reporter(sys.stderr, '<stdin>')
 396         else:
 397             with open(filename, 'r', encoding='utf-8') as inputfile:
 398                 contents = inputfile.read().splitlines()
 399             reporter = Reporter(sys.stdout, filename)
 400
 401         output = []
 402         # Keep lines that must be at the beginning of the file and skip them in
 403         # the check.
 404         if contents and (contents[0].startswith('#!/') or \
 405                 contents[0].startswith('%code requires') or \
 406                 contents[0].startswith('/* #if')):
 407             output.append(contents[0])
 408             contents = contents[1:]
 409         # Remove and skip empty lines at the beginning.
 410         while contents and len(contents[0]) == 0:
 411             contents = contents[1:]
 412
 413         # Analyze the first comment block in the file.
 414         comment_block, line_count = comment_handler.extract_first_comment_block(contents)
 415         state = checker.check_copyright(comment_block)
 416         need_update, file_years = checker.process_copyright(state, options, years, reporter)
 417         if state.other_copyrights and options.remove_old_copyrights:
 418             need_update = True
 419             state.other_copyrights = []
 420             reporter.report('old copyrights removed')
 421
 422         if need_update:
 423             # Remove the original comment if it was a copyright comment.
 424             if state.has_copyright:
 425                 contents = contents[line_count:]
 426             new_block = checker.get_copyright_text(file_years, state.other_copyrights)
 427             output.extend(comment_handler.create_comment_block(new_block))
 428
 429         # Write the output file if required.
 430         if need_update or filename == '-':
 431             # Append the rest of the input file as it was.
 432             output.extend(contents)
 433             output = '\n'.join(output) + '\n'
 434             if filename == '-':
 435                 sys.stdout.write(output)
 436             else:
 437                 with open(filename, 'w') as outputfile:
 438                     outputfile.write(output)
 439
 440 if __name__ == "__main__":
 441     main()