Release 2024.12.13
[yt-dlp.git] / devscripts / make_changelog.py
blob7c876101b49deae91e7bb31c354c223f577fea0c
1 from __future__ import annotations
3 # Allow direct execution
4 import os
5 import sys
7 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
9 import enum
10 import itertools
11 import json
12 import logging
13 import re
14 from collections import defaultdict
15 from dataclasses import dataclass
16 from functools import lru_cache
17 from pathlib import Path
19 from devscripts.utils import read_file, run_process, write_file
21 BASE_URL = 'https://github.com'
22 LOCATION_PATH = Path(__file__).parent
23 HASH_LENGTH = 7
25 logger = logging.getLogger(__name__)
28 class CommitGroup(enum.Enum):
29 PRIORITY = 'Important'
30 CORE = 'Core'
31 EXTRACTOR = 'Extractor'
32 DOWNLOADER = 'Downloader'
33 POSTPROCESSOR = 'Postprocessor'
34 NETWORKING = 'Networking'
35 MISC = 'Misc.'
37 @classmethod
38 @lru_cache
39 def subgroup_lookup(cls):
40 return {
41 name: group
42 for group, names in {
43 cls.MISC: {
44 'build',
45 'ci',
46 'cleanup',
47 'devscripts',
48 'docs',
49 'test',
51 cls.NETWORKING: {
52 'rh',
54 }.items()
55 for name in names
58 @classmethod
59 @lru_cache
60 def group_lookup(cls):
61 result = {
62 'fd': cls.DOWNLOADER,
63 'ie': cls.EXTRACTOR,
64 'pp': cls.POSTPROCESSOR,
65 'upstream': cls.CORE,
67 result.update({item.name.lower(): item for item in iter(cls)})
68 return result
70 @classmethod
71 def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
72 group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
74 if result := cls.group_lookup().get(group):
75 return result, subgroup or None
77 if subgroup:
78 return None, value
80 return cls.subgroup_lookup().get(group), group or None
83 @dataclass
84 class Commit:
85 hash: str | None
86 short: str
87 authors: list[str]
89 def __str__(self):
90 result = f'{self.short!r}'
92 if self.hash:
93 result += f' ({self.hash[:HASH_LENGTH]})'
95 if self.authors:
96 authors = ', '.join(self.authors)
97 result += f' by {authors}'
99 return result
102 @dataclass
103 class CommitInfo:
104 details: str | None
105 sub_details: tuple[str, ...]
106 message: str
107 issues: list[str]
108 commit: Commit
109 fixes: list[Commit]
111 def key(self):
112 return ((self.details or '').lower(), self.sub_details, self.message)
115 def unique(items):
116 return sorted({item.strip().lower(): item for item in items if item}.values())
119 class Changelog:
120 MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE)
121 ALWAYS_SHOWN = (CommitGroup.PRIORITY,)
123 def __init__(self, groups, repo, collapsible=False):
124 self._groups = groups
125 self._repo = repo
126 self._collapsible = collapsible
128 def __str__(self):
129 return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ')
131 def _format_groups(self, groups):
132 first = True
133 for item in CommitGroup:
134 if self._collapsible and item not in self.ALWAYS_SHOWN and first:
135 first = False
136 yield '\n<details><summary><h3>Changelog</h3></summary>\n'
138 if group := groups[item]:
139 yield self.format_module(item.value, group)
141 if self._collapsible:
142 yield '\n</details>'
144 def format_module(self, name, group):
145 result = f'\n#### {name} changes\n' if name else '\n'
146 return result + '\n'.join(self._format_group(group))
148 def _format_group(self, group):
149 sorted_group = sorted(group, key=CommitInfo.key)
150 detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower())
151 for _, items in detail_groups:
152 items = list(items)
153 details = items[0].details
155 if details == 'cleanup':
156 items = self._prepare_cleanup_misc_items(items)
158 prefix = '-'
159 if details:
160 if len(items) == 1:
161 prefix = f'- **{details}**:'
162 else:
163 yield f'- **{details}**'
164 prefix = '\t-'
166 sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details)))
167 for sub_details, entries in sub_detail_groups:
168 if not sub_details:
169 for entry in entries:
170 yield f'{prefix} {self.format_single_change(entry)}'
171 continue
173 entries = list(entries)
174 sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}'
175 if len(entries) == 1:
176 yield f'{sub_prefix}: {self.format_single_change(entries[0])}'
177 continue
179 yield sub_prefix
180 for entry in entries:
181 yield f'\t{prefix} {self.format_single_change(entry)}'
183 def _prepare_cleanup_misc_items(self, items):
184 cleanup_misc_items = defaultdict(list)
185 sorted_items = []
186 for item in items:
187 if self.MISC_RE.search(item.message):
188 cleanup_misc_items[tuple(item.commit.authors)].append(item)
189 else:
190 sorted_items.append(item)
192 for commit_infos in cleanup_misc_items.values():
193 sorted_items.append(CommitInfo(
194 'cleanup', ('Miscellaneous',), ', '.join(
195 self._format_message_link(None, info.commit.hash)
196 for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
197 [], Commit(None, '', commit_infos[0].commit.authors), []))
199 return sorted_items
201 def format_single_change(self, info: CommitInfo):
202 message, sep, rest = info.message.partition('\n')
203 if '[' not in message:
204 # If the message doesn't already contain markdown links, try to add a link to the commit
205 message = self._format_message_link(message, info.commit.hash)
207 if info.issues:
208 message = f'{message} ({self._format_issues(info.issues)})'
210 if info.commit.authors:
211 message = f'{message} by {self._format_authors(info.commit.authors)}'
213 if info.fixes:
214 fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
216 authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold)
217 if authors != info.commit.authors:
218 fix_message = f'{fix_message} by {self._format_authors(authors)}'
220 message = f'{message} (With fixes in {fix_message})'
222 return message if not sep else f'{message}{sep}{rest}'
224 def _format_message_link(self, message, commit_hash):
225 assert message or commit_hash, 'Improperly defined commit message or override'
226 message = message if message else commit_hash[:HASH_LENGTH]
227 return f'[{message}]({self.repo_url}/commit/{commit_hash})' if commit_hash else message
229 def _format_issues(self, issues):
230 return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
232 @staticmethod
233 def _format_authors(authors):
234 return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors)
236 @property
237 def repo_url(self):
238 return f'{BASE_URL}/{self._repo}'
241 class CommitRange:
242 COMMAND = 'git'
243 COMMIT_SEPARATOR = '-----'
245 AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
246 MESSAGE_RE = re.compile(r'''
247 (?:\[(?P<prefix>[^\]]+)\]\ )?
248 (?:(?P<sub_details>`?[\w.-]+`?): )?
249 (?P<message>.+?)
250 (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
251 ''', re.VERBOSE | re.DOTALL)
252 EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
253 REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
254 FIXES_RE = re.compile(r'(?i:(?:bug\s*)?fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Improve)\s+([\da-f]{40})')
255 UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
257 def __init__(self, start, end, default_author=None):
258 self._start, self._end = start, end
259 self._commits, self._fixes = self._get_commits_and_fixes(default_author)
260 self._commits_added = []
262 def __iter__(self):
263 return iter(itertools.chain(self._commits.values(), self._commits_added))
265 def __len__(self):
266 return len(self._commits) + len(self._commits_added)
268 def __contains__(self, commit):
269 if isinstance(commit, Commit):
270 if not commit.hash:
271 return False
272 commit = commit.hash
274 return commit in self._commits
276 def _get_commits_and_fixes(self, default_author):
277 result = run_process(
278 self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
279 f'{self._start}..{self._end}' if self._start else self._end).stdout
281 commits, reverts = {}, {}
282 fixes = defaultdict(list)
283 lines = iter(result.splitlines(False))
284 for i, commit_hash in enumerate(lines):
285 short = next(lines)
286 skip = short.startswith('Release ') or short == '[version] update'
288 fix_commitish = None
289 if match := self.FIXES_RE.search(short):
290 fix_commitish = match.group(1)
292 authors = [default_author] if default_author else []
293 for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
294 if match := self.AUTHOR_INDICATOR_RE.match(line):
295 authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
296 if not fix_commitish and (match := self.FIXES_RE.fullmatch(line)):
297 fix_commitish = match.group(1)
299 commit = Commit(commit_hash, short, authors)
300 if skip and (self._start or not i):
301 logger.debug(f'Skipped commit: {commit}')
302 continue
303 elif skip:
304 logger.debug(f'Reached Release commit, breaking: {commit}')
305 break
307 if match := self.REVERT_RE.fullmatch(commit.short):
308 reverts[match.group(1)] = commit
309 continue
311 if fix_commitish:
312 fixes[fix_commitish].append(commit)
314 commits[commit.hash] = commit
316 for commitish, revert_commit in reverts.items():
317 if reverted := commits.pop(commitish, None):
318 logger.debug(f'{commitish} fully reverted {reverted}')
319 else:
320 commits[revert_commit.hash] = revert_commit
322 for commitish, fix_commits in fixes.items():
323 if commitish in commits:
324 hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits)
325 logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}')
326 for fix_commit in fix_commits:
327 del commits[fix_commit.hash]
328 else:
329 logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}')
331 return commits, fixes
333 def apply_overrides(self, overrides):
334 for override in overrides:
335 when = override.get('when')
336 if when and when not in self and when != self._start:
337 logger.debug(f'Ignored {when!r} override')
338 continue
340 override_hash = override.get('hash') or when
341 if override['action'] == 'add':
342 commit = Commit(override.get('hash'), override['short'], override.get('authors') or [])
343 logger.info(f'ADD {commit}')
344 self._commits_added.append(commit)
346 elif override['action'] == 'remove':
347 if override_hash in self._commits:
348 logger.info(f'REMOVE {self._commits[override_hash]}')
349 del self._commits[override_hash]
351 elif override['action'] == 'change':
352 if override_hash not in self._commits:
353 continue
354 commit = Commit(override_hash, override['short'], override.get('authors') or [])
355 logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')
356 self._commits[commit.hash] = commit
358 self._commits = dict(reversed(self._commits.items()))
360 def groups(self):
361 group_dict = defaultdict(list)
362 for commit in self:
363 upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
364 if upstream_re:
365 commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'
367 match = self.MESSAGE_RE.fullmatch(commit.short)
368 if not match:
369 logger.error(f'Error parsing short commit message: {commit.short!r}')
370 continue
372 prefix, sub_details_alt, message, issues = match.groups()
373 issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else []
375 if prefix:
376 groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(',')))
377 group = next(iter(filter(None, groups)), None)
378 details = ', '.join(unique(details))
379 sub_details = list(itertools.chain.from_iterable(sub_details))
380 else:
381 group = CommitGroup.CORE
382 details = None
383 sub_details = []
385 if sub_details_alt:
386 sub_details.append(sub_details_alt)
387 sub_details = tuple(unique(sub_details))
389 if not group:
390 if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
391 group = CommitGroup.EXTRACTOR
392 logger.error(f'Assuming [ie] group for {commit.short!r}')
393 else:
394 group = CommitGroup.CORE
396 commit_info = CommitInfo(
397 details, sub_details, message.strip(),
398 issues, commit, self._fixes[commit.hash])
400 logger.debug(f'Resolved {commit.short!r} to {commit_info!r}')
401 group_dict[group].append(commit_info)
403 return group_dict
405 @staticmethod
406 def details_from_prefix(prefix):
407 if not prefix:
408 return CommitGroup.CORE, None, ()
410 prefix, *sub_details = prefix.split(':')
412 group, details = CommitGroup.get(prefix)
413 if group is CommitGroup.PRIORITY and details:
414 details = details.partition('/')[2].strip()
416 if details and '/' in details:
417 logger.error(f'Prefix is overnested, using first part: {prefix}')
418 details = details.partition('/')[0].strip()
420 if details == 'common':
421 details = None
422 elif group is CommitGroup.NETWORKING and details == 'rh':
423 details = 'Request Handler'
425 return group, details, sub_details
428 def get_new_contributors(contributors_path, commits):
429 contributors = set()
430 if contributors_path.exists():
431 for line in read_file(contributors_path).splitlines():
432 author, _, _ = line.strip().partition(' (')
433 authors = author.split('/')
434 contributors.update(map(str.casefold, authors))
436 new_contributors = set()
437 for commit in commits:
438 for author in commit.authors:
439 author_folded = author.casefold()
440 if author_folded not in contributors:
441 contributors.add(author_folded)
442 new_contributors.add(author)
444 return sorted(new_contributors, key=str.casefold)
447 def create_changelog(args):
448 logging.basicConfig(
449 datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}',
450 level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr)
452 commits = CommitRange(None, args.commitish, args.default_author)
454 if not args.no_override:
455 if args.override_path.exists():
456 overrides = json.loads(read_file(args.override_path))
457 commits.apply_overrides(overrides)
458 else:
459 logger.warning(f'File {args.override_path.as_posix()} does not exist')
461 logger.info(f'Loaded {len(commits)} commits')
463 if new_contributors := get_new_contributors(args.contributors_path, commits):
464 if args.contributors:
465 write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
466 logger.info(f'New contributors: {", ".join(new_contributors)}')
468 return Changelog(commits.groups(), args.repo, args.collapsible)
471 def create_parser():
472 import argparse
474 parser = argparse.ArgumentParser(
475 description='Create a changelog markdown from a git commit range')
476 parser.add_argument(
477 'commitish', default='HEAD', nargs='?',
478 help='The commitish to create the range from (default: %(default)s)')
479 parser.add_argument(
480 '-v', '--verbosity', action='count', default=0,
481 help='increase verbosity (can be used twice)')
482 parser.add_argument(
483 '-c', '--contributors', action='store_true',
484 help='update CONTRIBUTORS file (default: %(default)s)')
485 parser.add_argument(
486 '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS',
487 help='path to the CONTRIBUTORS file')
488 parser.add_argument(
489 '--no-override', action='store_true',
490 help='skip override json in commit generation (default: %(default)s)')
491 parser.add_argument(
492 '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json',
493 help='path to the changelog_override.json file')
494 parser.add_argument(
495 '--default-author', default='pukkandan',
496 help='the author to use without a author indicator (default: %(default)s)')
497 parser.add_argument(
498 '--repo', default='yt-dlp/yt-dlp',
499 help='the github repository to use for the operations (default: %(default)s)')
500 parser.add_argument(
501 '--collapsible', action='store_true',
502 help='make changelog collapsible (default: %(default)s)')
504 return parser
507 if __name__ == '__main__':
508 print(create_changelog(create_parser().parse_args()))