libcxx/utils/synchronize_csv_status_files.py

   1 #!/usr/bin/env python3
   2 # ===----------------------------------------------------------------------===##
   3 #
   4 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   5 # See https://llvm.org/LICENSE.txt for license information.
   6 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   7 #
   8 # ===----------------------------------------------------------------------===##
   9
  10 from typing import List, Dict, Tuple, Optional
  11 import copy
  12 import csv
  13 import itertools
  14 import json
  15 import os
  16 import pathlib
  17 import re
  18 import subprocess
  19
  20 # Number of the 'Libc++ Standards Conformance' project on Github
  21 LIBCXX_CONFORMANCE_PROJECT = '31'
  22
  23 def extract_between_markers(text: str, begin_marker: str, end_marker: str) -> Optional[str]:
  24     """
  25     Given a string containing special markers, extract everything located beetwen these markers.
  26
  27     If the beginning marker is not found, None is returned. If the beginning marker is found but
  28     there is no end marker, it is an error (this is done to avoid silently accepting inputs that
  29     are erroneous by mistake).
  30     """
  31     start = text.find(begin_marker)
  32     if start == -1:
  33         return None
  34
  35     start += len(begin_marker) # skip the marker itself
  36     end = text.find(end_marker, start)
  37     if end == -1:
  38         raise ArgumentError(f"Could not find end marker {end_marker} in: {text[start:]}")
  39
  40     return text[start:end]
  41
  42 class PaperStatus:
  43     TODO = 1
  44     IN_PROGRESS = 2
  45     PARTIAL = 3
  46     DONE = 4
  47     NOTHING_TO_DO = 5
  48
  49     _status: int
  50
  51     _original: Optional[str]
  52     """
  53     Optional string from which the paper status was created. This is used to carry additional
  54     information from CSV rows, like any notes associated to the status.
  55     """
  56
  57     def __init__(self, status: int, original: Optional[str] = None):
  58         self._status = status
  59         self._original = original
  60
  61     def __eq__(self, other) -> bool:
  62         return self._status == other._status
  63
  64     def __lt__(self, other) -> bool:
  65         relative_order = {
  66             PaperStatus.TODO: 0,
  67             PaperStatus.IN_PROGRESS: 1,
  68             PaperStatus.PARTIAL: 2,
  69             PaperStatus.DONE: 3,
  70             PaperStatus.NOTHING_TO_DO: 3,
  71         }
  72         return relative_order[self._status] < relative_order[other._status]
  73
  74     @staticmethod
  75     def from_csv_entry(entry: str):
  76         """
  77         Parse a paper status out of a CSV row entry. Entries can look like:
  78         - '' (an empty string, which means the paper is not done yet)
  79         - '|In Progress|'
  80         - '|Partial|'
  81         - '|Complete|'
  82         - '|Nothing To Do|'
  83         """
  84         if entry == '':
  85             return PaperStatus(PaperStatus.TODO, entry)
  86         elif entry == '|In Progress|':
  87             return PaperStatus(PaperStatus.IN_PROGRESS, entry)
  88         elif entry == '|Partial|':
  89             return PaperStatus(PaperStatus.PARTIAL, entry)
  90         elif entry == '|Complete|':
  91             return PaperStatus(PaperStatus.DONE, entry)
  92         elif entry == '|Nothing To Do|':
  93             return PaperStatus(PaperStatus.NOTHING_TO_DO, entry)
  94         else:
  95             raise RuntimeError(f'Unexpected CSV entry for status: {entry}')
  96
  97     @staticmethod
  98     def from_github_issue(issue: Dict):
  99         """
 100         Parse a paper status out of a Github issue obtained from querying a Github project.
 101         """
 102         if 'status' not in issue:
 103             return PaperStatus(PaperStatus.TODO)
 104         elif issue['status'] == 'Todo':
 105             return PaperStatus(PaperStatus.TODO)
 106         elif issue['status'] == 'In Progress':
 107             return PaperStatus(PaperStatus.IN_PROGRESS)
 108         elif issue['status'] == 'Partial':
 109             return PaperStatus(PaperStatus.PARTIAL)
 110         elif issue['status'] == 'Done':
 111             return PaperStatus(PaperStatus.DONE)
 112         elif issue['status'] == 'Nothing To Do':
 113             return PaperStatus(PaperStatus.NOTHING_TO_DO)
 114         else:
 115             raise RuntimeError(f"Received unrecognizable Github issue status: {issue['status']}")
 116
 117     def to_csv_entry(self) -> str:
 118         """
 119         Return the issue state formatted for a CSV entry. The status is formatted as '|Complete|',
 120         '|In Progress|', etc.
 121         """
 122         mapping = {
 123             PaperStatus.TODO: '',
 124             PaperStatus.IN_PROGRESS: '|In Progress|',
 125             PaperStatus.PARTIAL: '|Partial|',
 126             PaperStatus.DONE: '|Complete|',
 127             PaperStatus.NOTHING_TO_DO: '|Nothing To Do|',
 128         }
 129         return self._original if self._original is not None else mapping[self._status]
 130
 131 class PaperInfo:
 132     paper_number: str
 133     """
 134     Identifier for the paper or the LWG issue. This must be something like 'PnnnnRx', 'Nxxxxx' or 'LWGxxxxx'.
 135     """
 136
 137     paper_name: str
 138     """
 139     Plain text string representing the name of the paper.
 140     """
 141
 142     status: PaperStatus
 143     """
 144     Status of the paper/issue. This can be complete, in progress, partial, or done.
 145     """
 146
 147     meeting: Optional[str]
 148     """
 149     Plain text string representing the meeting at which the paper/issue was voted.
 150     """
 151
 152     first_released_version: Optional[str]
 153     """
 154     First version of LLVM in which this paper/issue was resolved.
 155     """
 156
 157     notes: Optional[str]
 158     """
 159     Optional plain text string representing notes to associate to the paper.
 160     This is used to populate the "Notes" column in the CSV status pages.
 161     """
 162
 163     original: Optional[object]
 164     """
 165     Object from which this PaperInfo originated. This is used to track the CSV row or Github issue that
 166     was used to generate this PaperInfo and is useful for error reporting purposes.
 167     """
 168
 169     def __init__(self, paper_number: str, paper_name: str,
 170                        status: PaperStatus,
 171                        meeting: Optional[str] = None,
 172                        first_released_version: Optional[str] = None,
 173                        notes: Optional[str] = None,
 174                        original: Optional[object] = None):
 175         self.paper_number = paper_number
 176         self.paper_name = paper_name
 177         self.status = status
 178         self.meeting = meeting
 179         self.first_released_version = first_released_version
 180         self.notes = notes
 181         self.original = original
 182
 183     def for_printing(self) -> Tuple[str, str, str, str, str, str]:
 184         return (
 185             f'`{self.paper_number} <https://wg21.link/{self.paper_number}>`__',
 186             self.paper_name,
 187             self.meeting if self.meeting is not None else '',
 188             self.status.to_csv_entry(),
 189             self.first_released_version if self.first_released_version is not None else '',
 190             self.notes if self.notes is not None else '',
 191         )
 192
 193     def __repr__(self) -> str:
 194         return repr(self.original) if self.original is not None else repr(self.for_printing())
 195
 196     @staticmethod
 197     def from_csv_row(row: Tuple[str, str, str, str, str, str]):# -> PaperInfo:
 198         """
 199         Given a row from one of our status-tracking CSV files, create a PaperInfo object representing that row.
 200         """
 201         # Extract the paper number from the first column
 202         match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+))\s+", row[0])
 203         if match is None:
 204             raise RuntimeError(f"Can't parse paper/issue number out of row: {row}")
 205
 206         return PaperInfo(
 207             paper_number=match.group(1),
 208             paper_name=row[1],
 209             status=PaperStatus.from_csv_entry(row[3]),
 210             meeting=row[2] or None,
 211             first_released_version=row[4] or None,
 212             notes=row[5] or None,
 213             original=row,
 214         )
 215
 216     @staticmethod
 217     def from_github_issue(issue: Dict):# -> PaperInfo:
 218         """
 219         Create a PaperInfo object from the Github issue information obtained from querying a Github Project.
 220         """
 221         # Extract the paper number from the issue title
 222         match = re.search(r"((P[0-9R]+)|(LWG[0-9]+)|(N[0-9]+)):", issue['title'])
 223         if match is None:
 224             raise RuntimeError(f"Issue doesn't have a title that we know how to parse: {issue}")
 225         paper = match.group(1)
 226
 227         # Extract any notes from the Github issue and populate the RST notes with them
 228         issue_description = issue['content']['body']
 229         notes = extract_between_markers(issue_description, 'BEGIN-RST-NOTES', 'END-RST-NOTES')
 230         notes = notes.strip() if notes is not None else notes
 231
 232         return PaperInfo(
 233             paper_number=paper,
 234             paper_name=issue['title'],
 235             status=PaperStatus.from_github_issue(issue),
 236             meeting=issue.get('meeting Voted', None),
 237             first_released_version=None, # TODO
 238             notes=notes,
 239             original=issue,
 240         )
 241
 242 def merge(paper: PaperInfo, gh: PaperInfo) -> PaperInfo:
 243     """
 244     Merge a paper coming from a CSV row with a corresponding Github-tracked paper.
 245
 246     If the CSV row has a status that is "less advanced" than the Github issue, simply update the CSV
 247     row with the newer status. Otherwise, report an error if they have a different status because
 248     something must be wrong.
 249
 250     We don't update issues from 'To Do' to 'In Progress', since that only creates churn and the
 251     status files aim to document user-facing functionality in releases, for which 'In Progress'
 252     is not useful.
 253
 254     In case we don't update the CSV row's status, we still take any updated notes coming
 255     from the Github issue.
 256     """
 257     if paper.status == PaperStatus(PaperStatus.TODO) and gh.status == PaperStatus(PaperStatus.IN_PROGRESS):
 258         result = copy.deepcopy(paper)
 259         result.notes = gh.notes
 260     elif paper.status < gh.status:
 261         result = copy.deepcopy(gh)
 262     elif paper.status == gh.status:
 263         result = copy.deepcopy(paper)
 264         result.notes = gh.notes
 265     else:
 266         print(f"We found a CSV row and a Github issue with different statuses:\nrow: {paper}\nGithub issue: {gh}")
 267         result = copy.deepcopy(paper)
 268     return result
 269
 270 def load_csv(file: pathlib.Path) -> List[Tuple]:
 271     rows = []
 272     with open(file, newline='') as f:
 273         reader = csv.reader(f, delimiter=',')
 274         for row in reader:
 275             rows.append(row)
 276     return rows
 277
 278 def write_csv(output: pathlib.Path, rows: List[Tuple]):
 279     with open(output, 'w', newline='') as f:
 280         writer = csv.writer(f, quoting=csv.QUOTE_ALL, lineterminator='\n')
 281         for row in rows:
 282             writer.writerow(row)
 283
 284 def sync_csv(rows: List[Tuple], from_github: List[PaperInfo]) -> List[Tuple]:
 285     """
 286     Given a list of CSV rows representing an existing status file and a list of PaperInfos representing
 287     up-to-date (but potentially incomplete) tracking information from Github, this function returns the
 288     new CSV rows synchronized with the up-to-date information.
 289
 290     Note that this only tracks changes from 'not implemented' issues to 'implemented'. If an up-to-date
 291     PaperInfo reports that a paper is not implemented but the existing CSV rows report it as implemented,
 292     it is an error (i.e. the result is not a CSV row where the paper is *not* implemented).
 293     """
 294     results = [rows[0]] # Start with the header
 295     for row in rows[1:]: # Skip the header
 296         # If the row contains empty entries, this is a "separator row" between meetings.
 297         # Preserve it as-is.
 298         if row[0] == "":
 299             results.append(row)
 300             continue
 301
 302         paper = PaperInfo.from_csv_row(row)
 303
 304         # Find any Github issues tracking this paper. Each row must have one and exactly one Github
 305         # issue tracking it, which we validate below.
 306         tracking = [gh for gh in from_github if paper.paper_number == gh.paper_number]
 307
 308         # If there is no tracking issue for that row in the CSV, this is an error since we're
 309         # missing a Github issue.
 310         if len(tracking) == 0:
 311             print(f"Can't find any Github issue for CSV row: {row}")
 312             results.append(row)
 313             continue
 314
 315         # If there's more than one tracking issue, something is weird too.
 316         if len(tracking) > 1:
 317             print(f"Found a row with more than one tracking issue: {row}\ntracked by: {tracking}")
 318             results.append(row)
 319             continue
 320
 321         results.append(merge(paper, tracking[0]).for_printing())
 322
 323     return results
 324
 325 CSV_FILES_TO_SYNC = [
 326     'Cxx17Issues.csv',
 327     'Cxx17Papers.csv',
 328     'Cxx20Issues.csv',
 329     'Cxx20Papers.csv',
 330     'Cxx23Issues.csv',
 331     'Cxx23Papers.csv',
 332     'Cxx2cIssues.csv',
 333     'Cxx2cPapers.csv',
 334 ]
 335
 336 def main():
 337     libcxx_root = pathlib.Path(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 338
 339     # Extract the list of PaperInfos from issues we're tracking on Github.
 340     print("Loading all issues from Github")
 341     gh_command_line = ['gh', 'project', 'item-list', LIBCXX_CONFORMANCE_PROJECT, '--owner', 'llvm', '--format', 'json', '--limit', '9999999']
 342     project_info = json.loads(subprocess.check_output(gh_command_line))
 343     from_github = [PaperInfo.from_github_issue(i) for i in project_info['items']]
 344
 345     for filename in CSV_FILES_TO_SYNC:
 346         print(f"Synchronizing {filename} with Github issues")
 347         file = libcxx_root / 'docs' / 'Status' / filename
 348         csv = load_csv(file)
 349         synced = sync_csv(csv, from_github)
 350         write_csv(file, synced)
 351
 352 if __name__ == '__main__':
 353     main()