contrib/gcc-changelog/git_commit.py

   1 #!/usr/bin/env python3
   2
   3 # Copyright (C) 2020-2024 Free Software Foundation, Inc.
   4 #
   5 # This file is part of GCC.
   6 #
   7 # GCC is free software; you can redistribute it and/or modify it under
   8 # the terms of the GNU General Public License as published by the Free
   9 # Software Foundation; either version 3, or (at your option) any later
  10 # version.
  11 #
  12 # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 # for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with GCC; see the file COPYING3.  If not see
  19 # <http://www.gnu.org/licenses/>.
  20
  21 import difflib
  22 import os
  23 import re
  24 import sys
  25 from collections import defaultdict
  26
  27 default_changelog_locations = {
  28     'c++tools',
  29     'config',
  30     'contrib',
  31     'contrib/header-tools',
  32     'contrib/reghunt',
  33     'contrib/regression',
  34     'fixincludes',
  35     'gcc/ada',
  36     'gcc/analyzer',
  37     'gcc/brig',
  38     'gcc/c',
  39     'gcc/c-family',
  40     'gcc',
  41     'gcc/cp',
  42     'gcc/d',
  43     'gcc/fortran',
  44     'gcc/go',
  45     'gcc/jit',
  46     'gcc/lto',
  47     'gcc/m2',
  48     'gcc/objc',
  49     'gcc/objcp',
  50     'gcc/po',
  51     'gcc/rust',
  52     'gcc/testsuite',
  53     'gnattools',
  54     'gotools',
  55     'include',
  56     'intl',
  57     'libada',
  58     'libatomic',
  59     'libbacktrace',
  60     'libcc1',
  61     'libcody',
  62     'libcpp',
  63     'libcpp/po',
  64     'libdecnumber',
  65     'libffi',
  66     'libgcc',
  67     'libgcc/config/avr/libf7',
  68     'libgcc/config/libbid',
  69     'libgfortran',
  70     'libgm2',
  71     'libgomp',
  72     'libgrust',
  73     'libhsail-rt',
  74     'libiberty',
  75     'libitm',
  76     'libobjc',
  77     'libphobos',
  78     'libquadmath',
  79     'libsanitizer',
  80     'libssp',
  81     'libstdc++-v3',
  82     'libvtv',
  83     'lto-plugin',
  84     'maintainer-scripts',
  85     'zlib'}
  86
  87 bug_components = {
  88     'ada',
  89     'analyzer',
  90     'boehm-gc',
  91     'bootstrap',
  92     'c',
  93     'c++',
  94     'd',
  95     'debug',
  96     'demangler',
  97     'driver',
  98     'fastjar',
  99     'fortran',
 100     'gcov-profile',
 101     'go',
 102     'hsa',
 103     'inline-asm',
 104     'ipa',
 105     'java',
 106     'jit',
 107     'libbacktrace',
 108     'libcc1',
 109     'libf2c',
 110     'libffi',
 111     'libfortran',
 112     'libgcc',
 113     'libgcj',
 114     'libgdiagnostics',
 115     'libgomp',
 116     'libitm',
 117     'libobjc',
 118     'libquadmath',
 119     'libstdc++',
 120     'lto',
 121     'middle-end',
 122     'modula2',
 123     'objc',
 124     'objc++',
 125     'other',
 126     'pch',
 127     'pending',
 128     'plugins',
 129     'preprocessor',
 130     'regression',
 131     'rtl-optimization',
 132     'rust',
 133     'sanitizer',
 134     'sarif-replay',
 135     'spam',
 136     'target',
 137     'testsuite',
 138     'translation',
 139     'tree-optimization',
 140     'web'}
 141
 142 ignored_prefixes = {
 143     'gcc/d/dmd/',
 144     'gcc/go/gofrontend/',
 145     'gcc/testsuite/gdc.test/',
 146     'gcc/testsuite/go.test/test/',
 147     'libffi/',
 148     'libgo/',
 149     'libphobos/libdruntime/',
 150     'libphobos/src/',
 151     'libsanitizer/',
 152     }
 153
 154 wildcard_prefixes = {
 155     'gcc/testsuite/',
 156     'libstdc++-v3/doc/html/',
 157     'libstdc++-v3/testsuite/'
 158     }
 159
 160 misc_files = {
 161     'gcc/DATESTAMP',
 162     'gcc/BASE-VER',
 163     'gcc/DEV-PHASE'
 164     }
 165
 166 author_line_regex = \
 167         re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.*  <.*>)')
 168 additional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.*  <.*>)')
 169 changelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?')
 170 subject_pr_regex = re.compile(r'(^|\W)PR\s+(?P<component>[a-zA-Z0-9+-]+)/(?P<pr>\d{4,7})')
 171 subject_pr2_regex = re.compile(r'[(\[]PR\s*(?P<pr>\d{4,7})[)\]]')
 172 pr_regex = re.compile(r'\tPR (?P<component>[a-z0-9+-]+\/)?(?P<pr>[0-9]+)$')
 173 dr_regex = re.compile(r'\tDR ([0-9]+)$')
 174 star_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)')
 175 end_of_location_regex = re.compile(r'[\[<(:]')
 176 item_empty_regex = re.compile(r'\t(\* \S+ )?\(\S+\):\s*$')
 177 item_parenthesis_regex = re.compile(r'\t(\*|\(\S+\):)')
 178 revert_regex = re.compile(r'This reverts commit (?P<hash>[0-9a-f]+)\.$')
 179 cherry_pick_regex = re.compile(r'cherry picked from commit (?P<hash>\w+)')
 180
 181 LINE_LIMIT = 100
 182 TAB_WIDTH = 8
 183 CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
 184
 185 REVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ',
 186                    'acked-by: ', 'tested-by: ', 'reported-by: ',
 187                    'suggested-by: ')
 188 DATE_FORMAT = '%Y-%m-%d'
 189
 190
 191 def decode_path(path):
 192     # When core.quotepath is true (default value), utf8 chars are encoded like:
 193     # "b/ko\304\215ka.txt"
 194     #
 195     # The upstream bug is fixed:
 196     # https://github.com/gitpython-developers/GitPython/issues/1099
 197     #
 198     # but we still need a workaround for older versions of the library.
 199     # Please take a look at the explanation of the transformation:
 200     # https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string
 201
 202     if path.startswith('"') and path.endswith('"'):
 203         return (path.strip('"').encode('utf8').decode('unicode-escape')
 204                 .encode('latin-1').decode('utf8'))
 205     else:
 206         return path
 207
 208
 209 class Error:
 210     def __init__(self, message, line=None, details=None):
 211         self.message = message
 212         self.line = line
 213         self.details = details
 214
 215     def __repr__(self):
 216         s = self.message
 217         if self.line:
 218             s += ': "%s"' % self.line
 219         return s
 220
 221
 222 class ChangeLogEntry:
 223     def __init__(self, folder, authors, prs):
 224         self.folder = folder
 225         # The 'list.copy()' function is not available before Python 3.3
 226         self.author_lines = list(authors)
 227         self.initial_prs = list(prs)
 228         self.prs = list(prs)
 229         self.lines = []
 230         self.files = []
 231         self.file_patterns = []
 232         self.parentheses_stack = []
 233
 234     def parse_file_names(self):
 235         # Whether the content currently processed is between a star prefix the
 236         # end of the file list: a colon or an open paren.
 237         in_location = False
 238
 239         for line in self.lines:
 240             # If this line matches the star prefix, start the location
 241             # processing on the information that follows the star.
 242             # Note that we need to skip macro names that can be in form of:
 243             #
 244             # * config/i386/i386.md (*fix_trunc<mode>_i387_1,
 245             # *add<mode>3_ne, *add<mode>3_eq_0, *add<mode>3_ne_0,
 246             # *fist<mode>2_<rounding>_1, *<code><mode>3_1):
 247             #
 248             m = star_prefix_regex.match(line)
 249             if m and len(m.group('spaces')) == 1:
 250                 in_location = True
 251                 line = m.group('content')
 252
 253             if in_location:
 254                 # Strip everything that is not a filename in "line":
 255                 # entities "(NAME)", cases "<PATTERN>", conditions
 256                 # "[COND]", entry text (the colon, if present, and
 257                 # anything that follows it).
 258                 m = end_of_location_regex.search(line)
 259                 if m:
 260                     line = line[:m.start()]
 261                     in_location = False
 262
 263                 # At this point, all that's left is a list of filenames
 264                 # separated by commas and whitespaces.
 265                 for file in line.split(','):
 266                     file = file.strip()
 267                     if file:
 268                         if file.endswith('*'):
 269                             self.file_patterns.append(file[:-1])
 270                         else:
 271                             self.files.append(file)
 272
 273     @property
 274     def datetime(self):
 275         for author in self.author_lines:
 276             if author[1]:
 277                 return author[1]
 278         return None
 279
 280     @property
 281     def authors(self):
 282         return [author_line[0] for author_line in self.author_lines]
 283
 284     @property
 285     def is_empty(self):
 286         return not self.lines and self.prs == self.initial_prs
 287
 288     def contains_author(self, author):
 289         for author_lines in self.author_lines:
 290             if author_lines[0] == author:
 291                 return True
 292         return False
 293
 294
 295 class GitInfo:
 296     def __init__(self, hexsha, date, author, lines, modified_files):
 297         self.hexsha = hexsha
 298         self.date = date
 299         self.author = author
 300         self.lines = lines
 301         self.modified_files = modified_files
 302
 303
 304 class GitCommit:
 305     def __init__(self, info, commit_to_info_hook=None, ref_name=None):
 306         self.original_info = info
 307         self.info = info
 308         self.message = None
 309         self.changes = None
 310         self.changelog_entries = []
 311         self.errors = []
 312         self.warnings = []
 313         self.top_level_authors = []
 314         self.co_authors = []
 315         self.top_level_prs = []
 316         self.subject_prs = set()
 317         self.cherry_pick_commit = None
 318         self.revert_commit = None
 319         self.commit_to_info_hook = commit_to_info_hook
 320         self.init_changelog_locations(ref_name)
 321
 322         # Skip Update copyright years commits
 323         if self.info.lines and self.info.lines[0] == 'Update copyright years.':
 324             return
 325
 326         if self.info.lines and len(self.info.lines) > 1 and self.info.lines[1]:
 327             self.errors.append(Error('Expected empty second line in commit message', info.lines[0]))
 328
 329         # Identify first if the commit is a Revert commit
 330         for line in self.info.lines:
 331             m = revert_regex.fullmatch(line)
 332             if m:
 333                 self.revert_commit = m.group('hash')
 334                 break
 335         if self.revert_commit:
 336             # The following happens for get_email.py:
 337             if not self.commit_to_info_hook:
 338                 self.warnings.append(f"Invoked script can not obtain info about "
 339                                      f"reverted commits such as '{self.revert_commit}'")
 340                 return
 341             self.info = self.commit_to_info_hook(self.revert_commit)
 342             if not self.info:
 343                 self.errors.append(Error('Cannot find to-be-reverted commit', self.revert_commit))
 344                 return
 345
 346         self.check_commit_email()
 347
 348         # Extract PR numbers form the subject line
 349         # Match either [PRnnnn] / (PRnnnn) or PR component/nnnn
 350         if self.info.lines and not self.revert_commit:
 351             self.subject_prs = {m.group('pr') for m in subject_pr2_regex.finditer(info.lines[0])}
 352             for m in subject_pr_regex.finditer(info.lines[0]):
 353                 if not m.group('component') in bug_components:
 354                     self.errors.append(Error('invalid PR component in subject', info.lines[0]))
 355                 self.subject_prs.add(m.group('pr'))
 356
 357         # Allow complete deletion of ChangeLog files in a commit
 358         project_files = [f for f in self.info.modified_files
 359                          if (self.is_changelog_filename(f[0], allow_suffix=True) and f[1] != 'D')
 360                          or f[0] in misc_files]
 361         ignored_files = [f for f in self.info.modified_files
 362                          if self.in_ignored_location(f[0])]
 363         if len(project_files) == len(self.info.modified_files):
 364             # All modified files are only MISC files
 365             return
 366         elif project_files:
 367             err = 'ChangeLog, DATESTAMP, BASE-VER and DEV-PHASE updates ' \
 368                   'should be done separately from normal commits\n' \
 369                   '(note: ChangeLog entries will be automatically ' \
 370                   'added by a cron job)'
 371             self.errors.append(Error(err))
 372             return
 373
 374         all_are_ignored = (len(project_files) + len(ignored_files)
 375                            == len(self.info.modified_files))
 376         self.parse_lines(all_are_ignored)
 377         if self.changes:
 378             self.parse_changelog()
 379             self.parse_file_names()
 380             self.check_for_empty_description()
 381             self.check_for_broken_parentheses()
 382             self.deduce_changelog_locations()
 383             self.check_file_patterns()
 384             self.check_line_start()
 385             if not self.errors:
 386                 self.check_mentioned_files()
 387                 self.check_for_correct_changelog()
 388         if self.subject_prs:
 389             self.errors.append(Error('PR %s in subject but not in changelog' %
 390                                      ', '.join(self.subject_prs), self.info.lines[0]))
 391
 392     @property
 393     def success(self):
 394         return not self.errors
 395
 396     @property
 397     def new_files(self):
 398         return [x[0] for x in self.info.modified_files if x[1] == 'A']
 399
 400     @classmethod
 401     def is_changelog_filename(cls, path, allow_suffix=False):
 402         basename = os.path.basename(path)
 403         if basename == 'ChangeLog':
 404             return True
 405         elif allow_suffix and basename.startswith('ChangeLog'):
 406             return True
 407         else:
 408             return False
 409
 410     def find_changelog_location(self, name):
 411         if name.startswith('\t'):
 412             name = name[1:]
 413         if name.endswith(':'):
 414             name = name[:-1]
 415         if name.endswith('/'):
 416             name = name[:-1]
 417         return name if name in self.changelog_locations else None
 418
 419     @classmethod
 420     def format_git_author(cls, author):
 421         assert '<' in author
 422         return author.replace('<', ' <')
 423
 424     @classmethod
 425     def parse_git_name_status(cls, string):
 426         modified_files = []
 427         for entry in string.split('\n'):
 428             parts = entry.split('\t')
 429             t = parts[0]
 430             if t == 'A' or t == 'D' or t == 'M':
 431                 modified_files.append((parts[1], t))
 432             elif t.startswith('R'):
 433                 modified_files.append((parts[1], 'D'))
 434                 modified_files.append((parts[2], 'A'))
 435         return modified_files
 436
 437     def init_changelog_locations(self, ref_name):
 438         self.changelog_locations = list(default_changelog_locations)
 439         if ref_name:
 440             version = sys.maxsize
 441             if 'releases/gcc-' in ref_name:
 442                 version = int(ref_name.split('-')[-1])
 443             if version >= 12:
 444                 # HSA and BRIG were removed in GCC 12
 445                 self.changelog_locations.remove('gcc/brig')
 446                 self.changelog_locations.remove('libhsail-rt')
 447
 448     def parse_lines(self, all_are_ignored):
 449         body = self.info.lines
 450
 451         for i, b in enumerate(body):
 452             if not b:
 453                 continue
 454             if (changelog_regex.match(b) or self.find_changelog_location(b)
 455                     or star_prefix_regex.match(b) or pr_regex.match(b)
 456                     or dr_regex.match(b) or author_line_regex.match(b)
 457                     or b.lower().startswith(CO_AUTHORED_BY_PREFIX)):
 458                 self.changes = body[i:]
 459                 return
 460         if not all_are_ignored:
 461             self.errors.append(Error('cannot find a ChangeLog location in '
 462                                      'message'))
 463
 464     def parse_changelog(self):
 465         last_entry = None
 466         will_deduce = False
 467         for line in self.changes:
 468             if not line:
 469                 if last_entry and will_deduce:
 470                     last_entry = None
 471                 continue
 472             if line != line.rstrip():
 473                 self.errors.append(Error('trailing whitespace', line))
 474             if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT:
 475                 # support long filenames
 476                 if not line.startswith('\t* ') or not line.endswith(':') or ' ' in line[3:-1]:
 477                     self.errors.append(Error('line exceeds %d character limit'
 478                                              % LINE_LIMIT, line))
 479             m = changelog_regex.match(line)
 480             if m:
 481                 last_entry = ChangeLogEntry(m.group(1).rstrip('/'),
 482                                             self.top_level_authors,
 483                                             self.top_level_prs)
 484                 self.changelog_entries.append(last_entry)
 485             elif self.find_changelog_location(line):
 486                 last_entry = ChangeLogEntry(self.find_changelog_location(line),
 487                                             self.top_level_authors,
 488                                             self.top_level_prs)
 489                 self.changelog_entries.append(last_entry)
 490             else:
 491                 author_tuple = None
 492                 pr_line = None
 493                 if author_line_regex.match(line):
 494                     m = author_line_regex.match(line)
 495                     author_tuple = (m.group('name'), m.group('datetime'))
 496                 elif additional_author_regex.match(line):
 497                     m = additional_author_regex.match(line)
 498                     if len(m.group('spaces')) != 4:
 499                         msg = 'additional author must be indented with '\
 500                               'one tab and four spaces'
 501                         self.errors.append(Error(msg, line))
 502                     else:
 503                         author_tuple = (m.group('name'), None)
 504                 elif pr_regex.match(line):
 505                     m = pr_regex.match(line)
 506                     component = m.group('component')
 507                     pr = m.group('pr')
 508                     if not component:
 509                         self.errors.append(Error('missing PR component', line))
 510                         continue
 511                     elif not component[:-1] in bug_components:
 512                         self.errors.append(Error('invalid PR component', line))
 513                         continue
 514                     else:
 515                         pr_line = line.lstrip()
 516                     if pr in self.subject_prs:
 517                         self.subject_prs.remove(pr)
 518                 elif dr_regex.match(line):
 519                     pr_line = line.lstrip()
 520
 521                 lowered_line = line.lower()
 522                 if lowered_line.startswith(CO_AUTHORED_BY_PREFIX):
 523                     name = line[len(CO_AUTHORED_BY_PREFIX):]
 524                     author = self.format_git_author(name)
 525                     self.co_authors.append(author)
 526                     continue
 527                 elif lowered_line.startswith(REVIEW_PREFIXES):
 528                     continue
 529                 else:
 530                     m = cherry_pick_regex.search(line)
 531                     if m:
 532                         commit = m.group('hash')
 533                         if self.cherry_pick_commit:
 534                             msg = 'multiple cherry pick lines'
 535                             self.errors.append(Error(msg, line))
 536                         else:
 537                             self.cherry_pick_commit = commit
 538                         continue
 539
 540                 # ChangeLog name will be deduced later
 541                 if not last_entry:
 542                     if author_tuple:
 543                         self.top_level_authors.append(author_tuple)
 544                         continue
 545                     elif pr_line:
 546                         # append to top_level_prs only when we haven't met
 547                         # a ChangeLog entry
 548                         if (pr_line not in self.top_level_prs
 549                                 and not self.changelog_entries):
 550                             self.top_level_prs.append(pr_line)
 551                         continue
 552                     else:
 553                         last_entry = ChangeLogEntry(None,
 554                                                     self.top_level_authors,
 555                                                     self.top_level_prs)
 556                         self.changelog_entries.append(last_entry)
 557                         will_deduce = True
 558                 elif author_tuple:
 559                     if not last_entry.contains_author(author_tuple[0]):
 560                         last_entry.author_lines.append(author_tuple)
 561                     continue
 562
 563                 if not line.startswith('\t'):
 564                     err = Error('line should start with a tab', line)
 565                     self.errors.append(err)
 566                 elif pr_line:
 567                     last_entry.prs.append(pr_line)
 568                 else:
 569                     m = star_prefix_regex.match(line)
 570                     if m:
 571                         if (len(m.group('spaces')) != 1 and
 572                                 not last_entry.parentheses_stack):
 573                             msg = 'one space should follow asterisk'
 574                             self.errors.append(Error(msg, line))
 575                         else:
 576                             content = m.group('content')
 577                             parts = content.split(':')
 578                             if len(parts) > 1:
 579                                 for needle in ('()', '[]', '<>'):
 580                                     if ' ' + needle in parts[0]:
 581                                         msg = f'empty group "{needle}" found'
 582                                         self.errors.append(Error(msg, line))
 583                             last_entry.lines.append(line)
 584                             self.process_parentheses(last_entry, line)
 585                     else:
 586                         if last_entry.is_empty:
 587                             msg = 'first line should start with a tab, ' \
 588                                   'an asterisk and a space'
 589                             self.errors.append(Error(msg, line))
 590                         else:
 591                             last_entry.lines.append(line)
 592                             self.process_parentheses(last_entry, line)
 593
 594     def process_parentheses(self, last_entry, line):
 595         for c in line:
 596             if c == '(':
 597                 last_entry.parentheses_stack.append(line)
 598             elif c == ')':
 599                 if not last_entry.parentheses_stack:
 600                     msg = 'bad wrapping of parenthesis'
 601                     self.errors.append(Error(msg, line))
 602                 else:
 603                     del last_entry.parentheses_stack[-1]
 604
 605     def parse_file_names(self):
 606         for entry in self.changelog_entries:
 607             entry.parse_file_names()
 608
 609     def check_file_patterns(self):
 610         for entry in self.changelog_entries:
 611             for pattern in entry.file_patterns:
 612                 name = os.path.join(entry.folder, pattern)
 613                 if not [name.startswith(pr) for pr in wildcard_prefixes]:
 614                     msg = 'unsupported wildcard prefix'
 615                     self.errors.append(Error(msg, name))
 616
 617     def check_for_empty_description(self):
 618         for entry in self.changelog_entries:
 619             for i, line in enumerate(entry.lines):
 620                 if (item_empty_regex.match(line) and
 621                     (i == len(entry.lines) - 1
 622                      or not entry.lines[i+1].strip()
 623                      or item_parenthesis_regex.match(entry.lines[i+1]))):
 624                     msg = 'missing description of a change'
 625                     self.errors.append(Error(msg, line))
 626
 627     def check_for_broken_parentheses(self):
 628         for entry in self.changelog_entries:
 629             if entry.parentheses_stack:
 630                 msg = 'bad parentheses wrapping'
 631                 self.errors.append(Error(msg, entry.parentheses_stack[-1]))
 632
 633     def check_line_start(self):
 634         for entry in self.changelog_entries:
 635             for line in entry.lines:
 636                 if line.startswith('\t '):
 637                     msg = 'extra space after tab'
 638                     self.errors.append(Error(msg, line))
 639
 640     def get_file_changelog_location(self, changelog_file):
 641         for file in self.info.modified_files:
 642             if file[0] == changelog_file:
 643                 # root ChangeLog file
 644                 return ''
 645             index = file[0].find('/' + changelog_file)
 646             if index != -1:
 647                 return file[0][:index]
 648         return None
 649
 650     def deduce_changelog_locations(self):
 651         for entry in self.changelog_entries:
 652             if entry.folder is None:
 653                 changelog = None
 654                 for file in entry.files:
 655                     location = self.get_file_changelog_location(file)
 656                     if (location == ''
 657                        or (location and location in self.changelog_locations)):
 658                         if changelog and changelog != location:
 659                             msg = 'could not deduce ChangeLog file, ' \
 660                                   'not unique location'
 661                             self.errors.append(Error(msg))
 662                             return
 663                         changelog = location
 664                 if changelog is not None:
 665                     entry.folder = changelog
 666                 else:
 667                     msg = 'could not deduce ChangeLog file'
 668                     self.errors.append(Error(msg))
 669
 670     @classmethod
 671     def in_ignored_location(cls, path):
 672         for ignored in ignored_prefixes:
 673             if path.startswith(ignored):
 674                 return True
 675         return False
 676
 677     def get_changelog_by_path(self, path):
 678         components = path.split('/')
 679         while components:
 680             if '/'.join(components) in self.changelog_locations:
 681                 break
 682             components = components[:-1]
 683         return '/'.join(components)
 684
 685     def check_mentioned_files(self):
 686         folder_count = len([x.folder for x in self.changelog_entries])
 687         assert folder_count == len(self.changelog_entries)
 688
 689         mentioned_files = set()
 690         mentioned_patterns = []
 691         used_patterns = set()
 692         for entry in self.changelog_entries:
 693             if not entry.files and not entry.file_patterns:
 694                 msg = 'no files mentioned for ChangeLog in directory'
 695                 self.errors.append(Error(msg, entry.folder))
 696             assert not entry.folder.endswith('/')
 697             for file in entry.files:
 698                 if not self.is_changelog_filename(file):
 699                     item = os.path.join(entry.folder, file)
 700                     if item in mentioned_files:
 701                         msg = 'same file specified multiple times'
 702                         self.errors.append(Error(msg, file))
 703                     else:
 704                         mentioned_files.add(item)
 705             for pattern in entry.file_patterns:
 706                 mentioned_patterns.append(os.path.join(entry.folder, pattern))
 707
 708         cand = [x[0] for x in self.info.modified_files
 709                 if not self.is_changelog_filename(x[0])]
 710         changed_files = set(cand)
 711         for file in sorted(mentioned_files - changed_files):
 712             msg = 'unchanged file mentioned in a ChangeLog'
 713             candidates = difflib.get_close_matches(file, changed_files, 1)
 714             details = None
 715             if candidates:
 716                 msg += f' (did you mean "{candidates[0]}"?)'
 717                 details = '\n'.join(difflib.Differ().compare([file], [candidates[0]])).rstrip()
 718             self.errors.append(Error(msg, file, details))
 719         auto_add_warnings = defaultdict(list)
 720         for file in sorted(changed_files - mentioned_files):
 721             if not self.in_ignored_location(file):
 722                 if file in self.new_files:
 723                     changelog_location = self.get_changelog_by_path(file)
 724                     # Python2: we cannot use next(filter(...))
 725                     entries = filter(lambda x: x.folder == changelog_location,
 726                                      self.changelog_entries)
 727                     entries = list(entries)
 728                     entry = entries[0] if entries else None
 729                     if not entry:
 730                         prs = self.top_level_prs
 731                         if not prs:
 732                             # if all ChangeLog entries have identical PRs
 733                             # then use them
 734                             if self.changelog_entries:
 735                                 prs = self.changelog_entries[0].prs
 736                                 for entry in self.changelog_entries:
 737                                     if entry.prs != prs:
 738                                         prs = []
 739                                         break
 740                         entry = ChangeLogEntry(changelog_location,
 741                                                self.top_level_authors,
 742                                                prs)
 743                         self.changelog_entries.append(entry)
 744                     # strip prefix of the file
 745                     assert file.startswith(entry.folder)
 746                     # do not allow auto-addition of New files
 747                     # for the top-level folder
 748                     if entry.folder:
 749                         file = file[len(entry.folder):].lstrip('/')
 750                         entry.lines.append('\t* %s: New file.' % file)
 751                         entry.files.append(file)
 752                         auto_add_warnings[entry.folder].append(file)
 753                     else:
 754                         msg = 'new file in the top-level folder not mentioned in a ChangeLog'
 755                         self.errors.append(Error(msg, file))
 756                 else:
 757                     used_pattern = [p for p in mentioned_patterns
 758                                     if file.startswith(p)]
 759                     used_pattern = used_pattern[0] if used_pattern else None
 760                     if used_pattern:
 761                         used_patterns.add(used_pattern)
 762                     else:
 763                         msg = 'changed file not mentioned in a ChangeLog'
 764                         self.errors.append(Error(msg, file))
 765
 766         for pattern in mentioned_patterns:
 767             if pattern not in used_patterns:
 768                 error = "pattern doesn't match any changed files"
 769                 self.errors.append(Error(error, pattern))
 770         for entry, val in auto_add_warnings.items():
 771             if len(val) == 1:
 772                 self.warnings.append(f"Auto-added new file '{entry}/{val[0]}'")
 773             else:
 774                 self.warnings.append(f"Auto-added {len(val)} new files in '{entry}'")
 775
 776     def check_for_correct_changelog(self):
 777         for entry in self.changelog_entries:
 778             for file in entry.files:
 779                 full_path = os.path.join(entry.folder, file)
 780                 changelog_location = self.get_changelog_by_path(full_path)
 781                 if changelog_location != entry.folder:
 782                     msg = 'wrong ChangeLog location "%s", should be "%s"'
 783                     err = Error(msg % (entry.folder, changelog_location), file)
 784                     self.errors.append(err)
 785
 786     @classmethod
 787     def format_authors_in_changelog(cls, authors, timestamp, prefix=''):
 788         output = ''
 789         for i, author in enumerate(authors):
 790             if i == 0:
 791                 output += '%s%s  %s\n' % (prefix, timestamp, author)
 792             else:
 793                 output += '%s\t    %s\n' % (prefix, author)
 794         output += '\n'
 795         return output
 796
 797     def to_changelog_entries(self, use_commit_ts=False):
 798         current_timestamp = self.info.date.strftime(DATE_FORMAT)
 799         for entry in self.changelog_entries:
 800             output = ''
 801             timestamp = entry.datetime
 802             if self.revert_commit:
 803                 timestamp = current_timestamp
 804                 orig_date = self.original_info.date
 805                 current_timestamp = orig_date.strftime(DATE_FORMAT)
 806             elif self.cherry_pick_commit:
 807                 info = (self.commit_to_info_hook
 808                         and self.commit_to_info_hook(self.cherry_pick_commit))
 809                 # it can happen that it is a cherry-pick for a different
 810                 # repository
 811                 if info:
 812                     timestamp = info.date.strftime(DATE_FORMAT)
 813                 else:
 814                     if self.commit_to_info_hook:
 815                         self.warnings.append(f"Cherry-picked commit not found: '{self.cherry_pick_commit}'")
 816                     else:
 817                         self.warnings.append(f"Invoked script can not obtain info about "
 818                                              f"cherry-picked commits such as '{self.revert_commit}'")
 819                     timestamp = current_timestamp
 820             elif not timestamp or use_commit_ts:
 821                 timestamp = current_timestamp
 822             authors = entry.authors if entry.authors else [self.info.author]
 823             # add Co-Authored-By authors to all ChangeLog entries
 824             for author in self.co_authors:
 825                 if author not in authors:
 826                     authors.append(author)
 827
 828             if self.cherry_pick_commit or self.revert_commit:
 829                 original_author = self.original_info.author
 830                 output += self.format_authors_in_changelog([original_author],
 831                                                            current_timestamp)
 832                 if self.revert_commit:
 833                     output += '\tRevert:\n'
 834                 else:
 835                     output += '\tBackported from master:\n'
 836                 output += self.format_authors_in_changelog(authors,
 837                                                            timestamp, '\t')
 838             else:
 839                 output += self.format_authors_in_changelog(authors, timestamp)
 840             for pr in entry.prs:
 841                 output += '\t%s\n' % pr
 842             for line in entry.lines:
 843                 output += line + '\n'
 844             yield (entry.folder, output.rstrip())
 845
 846     def print_output(self):
 847         for entry, output in self.to_changelog_entries():
 848             print('------ %s/ChangeLog ------ ' % entry)
 849             print(output)
 850
 851     def print_errors(self):
 852         print('Errors:')
 853         for error in self.errors:
 854             print(error)
 855
 856     def print_warnings(self):
 857         if self.warnings:
 858             print('Warnings:')
 859             for warning in self.warnings:
 860                 print(warning)
 861
 862     def check_commit_email(self):
 863         # Parse 'Martin Liska  <mliska@suse.cz>'
 864         email = self.info.author.split(' ')[-1].strip('<>')
 865
 866         # Verify that all characters are ASCII
 867         # TODO: Python 3.7 provides a nicer function: isascii
 868         if len(email) != len(email.encode()):
 869             self.errors.append(Error(f'non-ASCII characters in git commit email address ({email})'))