scripts/maint/format_changelog.py

   1 #!/usr/bin/env python
   2 # Copyright (c) 2014-2019, The Tor Project, Inc.
   3 # See LICENSE for licensing information
   4 #
   5 # This script reformats a section of the changelog to wrap everything to
   6 # the right width and put blank lines in the right places.  Eventually,
   7 # it might include a linter.
   8 #
   9 # To run it, pipe a section of the changelog (starting with "Changes
  10 # in Tor 0.x.y.z-alpha" through the script.)
  11
  12 # Future imports for Python 2.7, mandatory in 3.0
  13 from __future__ import division
  14 from __future__ import print_function
  15 from __future__ import unicode_literals
  16
  17 import os
  18 import re
  19 import sys
  20 import optparse
  21
  22 # ==============================
  23 # Oh, look!  It's a cruddy approximation to Knuth's elegant text wrapping
  24 # algorithm, with totally ad hoc parameters!
  25 #
  26 # We're trying to minimize:
  27 #    The total of the cubes of ragged space on underflowed intermediate lines,
  28 #  PLUS
  29 #    100 * the fourth power of overflowed characters
  30 #  PLUS
  31 #    .1 * a bit more than the cube of ragged space on the last line.
  32 #  PLUS
  33 #    OPENPAREN_PENALTY for each line that starts with (
  34 #
  35 # We use an obvious dynamic programming algorithm to sorta approximate this.
  36 # It's not coded right or optimally, but it's fast enough for changelogs
  37 #
  38 # (Code found in an old directory of mine, lightly cleaned. -NM)
  39
  40 NO_HYPHENATE=set("""
  41 pf-divert
  42 tor-resolve
  43 tor-gencert
  44 """.split())
  45
  46 LASTLINE_UNDERFLOW_EXPONENT = 1
  47 LASTLINE_UNDERFLOW_PENALTY = 1
  48
  49 UNDERFLOW_EXPONENT = 3
  50 UNDERFLOW_PENALTY = 1
  51
  52 OVERFLOW_EXPONENT = 4
  53 OVERFLOW_PENALTY = 2000
  54
  55 ORPHAN_PENALTY = 10000
  56
  57 OPENPAREN_PENALTY = 200
  58
  59 def generate_wrapping(words, divisions):
  60     lines = []
  61     last = 0
  62     for i in divisions:
  63         w = words[last:i]
  64         last = i
  65         line = " ".join(w).replace("\xff ","-").replace("\xff","-")
  66         lines.append(line.strip())
  67     return lines
  68
  69 def wrapping_quality(words, divisions, width1, width2):
  70     total = 0.0
  71
  72     lines = generate_wrapping(words, divisions)
  73     for line in lines:
  74         length = len(line)
  75         if line is lines[0]:
  76             width = width1
  77         else:
  78             width = width2
  79
  80         if line[0:1] == '(':
  81             total += OPENPAREN_PENALTY
  82
  83         if length > width:
  84             total += OVERFLOW_PENALTY * (
  85                 (length - width) ** OVERFLOW_EXPONENT )
  86         else:
  87             if line is lines[-1]:
  88                 e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
  89                 if " " not in line:
  90                     total += ORPHAN_PENALTY
  91             else:
  92                 e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
  93
  94             total += p * ((width - length) ** e)
  95
  96     return total
  97
  98 def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
  99     wrapping_after = [ (0,), ]
 100
 101     w1 = width - prefix_len1
 102     w2 = width - prefix_len2
 103
 104     for i in range(1, len(words)+1):
 105         best_so_far = None
 106         best_score = 1e300
 107         for j in range(i):
 108             t = wrapping_after[j]
 109             t1 = t[:-1] + (i,)
 110             t2 = t + (i,)
 111             wq1 = wrapping_quality(words, t1, w1, w2)
 112             wq2 = wrapping_quality(words, t2, w1, w2)
 113
 114             if wq1 < best_score:
 115                 best_so_far = t1
 116                 best_score = wq1
 117             if wq2 < best_score:
 118                 best_so_far = t2
 119                 best_score = wq2
 120         wrapping_after.append( best_so_far )
 121
 122     lines = generate_wrapping(words, wrapping_after[-1])
 123
 124     return lines
 125
 126 def hyphenatable(word):
 127     if "--" in word:
 128         return False
 129
 130     if re.match(r'^[^\d\-]\D*-', word):
 131         stripped = re.sub(r'^\W+','',word)
 132         stripped = re.sub(r'\W+$','',word)
 133         return stripped not in NO_HYPHENATE
 134     else:
 135         return False
 136
 137 def split_paragraph(s):
 138     "Split paragraph into words; tuned for Tor."
 139
 140     r = []
 141     for word in s.split():
 142         if hyphenatable(word):
 143             while "-" in word:
 144                 a,word = word.split("-",1)
 145                 r.append(a+"\xff")
 146         r.append(word)
 147     return r
 148
 149 def fill(text, width, initial_indent, subsequent_indent):
 150     words = split_paragraph(text)
 151     lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
 152                       width)
 153     res = [ initial_indent, lines[0], "\n" ]
 154     for line in lines[1:]:
 155         res.append(subsequent_indent)
 156         res.append(line)
 157         res.append("\n")
 158     return "".join(res)
 159
 160 # ==============================
 161
 162
 163 TP_MAINHEAD = 0
 164 TP_HEADTEXT = 1
 165 TP_BLANK = 2
 166 TP_SECHEAD = 3
 167 TP_ITEMFIRST = 4
 168 TP_ITEMBODY = 5
 169 TP_END = 6
 170 TP_PREHEAD = 7
 171
 172 def head_parser(line):
 173     if re.match(r'^Changes in', line):
 174         return TP_MAINHEAD
 175     elif re.match(r'^[A-Za-z]', line):
 176         return TP_PREHEAD
 177     elif re.match(r'^  o ', line):
 178         return TP_SECHEAD
 179     elif re.match(r'^\s*$', line):
 180         return TP_BLANK
 181     else:
 182         return TP_HEADTEXT
 183
 184 def body_parser(line):
 185     if re.match(r'^  o ', line):
 186         return TP_SECHEAD
 187     elif re.match(r'^    -',line):
 188         return TP_ITEMFIRST
 189     elif re.match(r'^      \S', line):
 190         return TP_ITEMBODY
 191     elif re.match(r'^\s*$', line):
 192         return TP_BLANK
 193     elif re.match(r'^Changes in', line):
 194         return TP_END
 195     elif re.match(r'^\s+\S', line):
 196         return TP_HEADTEXT
 197     else:
 198         print("Weird line %r"%line, file=sys.stderr)
 199
 200 def clean_head(head):
 201     return head
 202
 203 def head_score(s):
 204     m = re.match(r'^ +o (.*)', s)
 205     if not m:
 206         print("Can't score %r"%s, file=sys.stderr)
 207         return 99999
 208     lw = m.group(1).lower()
 209     if lw.startswith("security") and "feature" not in lw:
 210         score = -300
 211     elif lw.startswith("deprecated version"):
 212         score = -200
 213     elif lw.startswith("directory auth"):
 214         score = -150
 215     elif (('new' in lw and 'requirement' in lw) or
 216           ('new' in lw and 'dependenc' in lw) or
 217           ('build' in lw and 'requirement' in lw) or
 218           ('removed' in lw and 'platform' in lw)):
 219         score = -100
 220     elif lw.startswith("major feature"):
 221         score = 00
 222     elif lw.startswith("major bug"):
 223         score = 50
 224     elif lw.startswith("major"):
 225         score = 70
 226     elif lw.startswith("minor feature"):
 227         score = 200
 228     elif lw.startswith("minor bug"):
 229         score = 250
 230     elif lw.startswith("minor"):
 231         score = 270
 232     else:
 233         score = 1000
 234
 235     if 'secur' in lw:
 236         score -= 2
 237
 238     if "(other)" in lw:
 239         score += 2
 240
 241     if '(' not in lw:
 242         score -= 1
 243
 244     return score
 245
 246 class ChangeLog(object):
 247     def __init__(self, wrapText=True, blogOrder=True, drupalBreak=False):
 248         self.prehead = []
 249         self.mainhead = None
 250         self.headtext = []
 251         self.curgraf = None
 252         self.sections = []
 253         self.cursection = None
 254         self.lineno = 0
 255         self.wrapText = wrapText
 256         self.blogOrder = blogOrder
 257         self.drupalBreak = drupalBreak
 258
 259     def addLine(self, tp, line):
 260         self.lineno += 1
 261
 262         if tp == TP_MAINHEAD:
 263             assert not self.mainhead
 264             self.mainhead = line
 265
 266         elif tp == TP_PREHEAD:
 267             self.prehead.append(line)
 268
 269         elif tp == TP_HEADTEXT:
 270             if self.curgraf is None:
 271                 self.curgraf = []
 272                 self.headtext.append(self.curgraf)
 273             self.curgraf.append(line)
 274
 275         elif tp == TP_BLANK:
 276             self.curgraf = None
 277
 278         elif tp == TP_SECHEAD:
 279             self.cursection = [ self.lineno, line, [] ]
 280             self.sections.append(self.cursection)
 281
 282         elif tp == TP_ITEMFIRST:
 283             item = ( self.lineno, [ [line] ])
 284             self.curgraf = item[1][0]
 285             self.cursection[2].append(item)
 286
 287         elif tp == TP_ITEMBODY:
 288             if self.curgraf is None:
 289                 self.curgraf = []
 290                 self.cursection[2][-1][1].append(self.curgraf)
 291             self.curgraf.append(line)
 292
 293         else:
 294             assert False  # This should be unreachable.
 295
 296     def lint_head(self, line, head):
 297         m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)
 298         if not m:
 299             print("Weird header format on line %s"%line, file=sys.stderr)
 300
 301     def lint_item(self, line, grafs, head_type):
 302         pass
 303
 304     def lint(self):
 305         self.head_lines = {}
 306         for sec_line, sec_head, items in self.sections:
 307             head_type = self.lint_head(sec_line, sec_head)
 308             for item_line, grafs in items:
 309                 self.lint_item(item_line, grafs, head_type)
 310
 311     def dumpGraf(self,par,indent1,indent2=-1):
 312         if not self.wrapText:
 313             for line in par:
 314                 print(line)
 315             return
 316
 317         if indent2 == -1:
 318             indent2 = indent1
 319         text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
 320
 321         sys.stdout.write(fill(text,
 322                               width=72,
 323                               initial_indent=" "*indent1,
 324                               subsequent_indent=" "*indent2))
 325
 326     def dumpPreheader(self, graf):
 327         self.dumpGraf(graf, 0)
 328         print()
 329
 330     def dumpMainhead(self, head):
 331         print(head)
 332
 333     def dumpHeadGraf(self, graf):
 334         self.dumpGraf(graf, 2)
 335         print()
 336
 337     def dumpSectionHeader(self, header):
 338         print(header)
 339
 340     def dumpStartOfSections(self):
 341         pass
 342
 343     def dumpEndOfSections(self):
 344         pass
 345
 346     def dumpEndOfSection(self):
 347         print()
 348
 349     def dumpEndOfChangelog(self):
 350         print()
 351
 352     def dumpDrupalBreak(self):
 353         pass
 354
 355     def dumpItem(self, grafs):
 356         self.dumpGraf(grafs[0],4,6)
 357         for par in grafs[1:]:
 358             print()
 359             self.dumpGraf(par,6,6)
 360
 361     def collateAndSortSections(self):
 362         heads = []
 363         sectionsByHead = { }
 364         for _, head, items in self.sections:
 365             head = clean_head(head)
 366             try:
 367                 s = sectionsByHead[head]
 368             except KeyError:
 369                 s = sectionsByHead[head] = []
 370                 heads.append( (head_score(head), head.lower(), head, s) )
 371
 372             s.extend(items)
 373
 374         heads.sort()
 375         self.sections = [ (0, head, items) for _1,_2,head,items in heads ]
 376
 377     def dump(self):
 378         if self.prehead:
 379             self.dumpPreheader(self.prehead)
 380
 381         if not self.blogOrder:
 382             self.dumpMainhead(self.mainhead)
 383
 384         for par in self.headtext:
 385             self.dumpHeadGraf(par)
 386
 387         if self.blogOrder:
 388             self.dumpMainhead(self.mainhead)
 389
 390         drupalBreakAfter = None
 391         if self.drupalBreak and len(self.sections) > 4:
 392             drupalBreakAfter = self.sections[1][2]
 393
 394         self.dumpStartOfSections()
 395         for _,head,items in self.sections:
 396             if not head.endswith(':'):
 397                 print("adding : to %r"%head, file=sys.stderr)
 398                 head = head + ":"
 399             self.dumpSectionHeader(head)
 400             for _,grafs in items:
 401                 self.dumpItem(grafs)
 402             self.dumpEndOfSection()
 403             if items is drupalBreakAfter:
 404                 self.dumpDrupalBreak()
 405         self.dumpEndOfSections()
 406         self.dumpEndOfChangelog()
 407
 408 # Map from issue prefix to pair of (visible prefix, url prefix)
 409 ISSUE_PREFIX_MAP = {
 410     "" : ( "", "tpo/core/tor" ),
 411     "tor#" : ( "", "tpo/core/tor" ),
 412     "chutney#" : ( "chutney#", "tpo/core/chutney" ),
 413     "torspec#" : ( "torspec#", "tpo/core/torspec" ),
 414     "trunnel#" : ( "trunnel#", "tpo/core/trunnel" ),
 415     "torsocks#" : ( "torsocks#", "tpo/core/torsocks"),
 416 }
 417
 418 # Let's turn bugs to html.
 419 BUG_PAT = re.compile('(bug|ticket|issue|feature)\s+([\w/]+#)?(\d{4,6})', re.I)
 420 def bug_html(m):
 421     kind = m.group(1)
 422     prefix = m.group(2) or ""
 423     bugno = m.group(3)
 424     try:
 425         disp_prefix, url_prefix = ISSUE_PREFIX_MAP[prefix]
 426     except KeyError:
 427         print("Can't figure out URL for {}{}".format(prefix,bugno),
 428               file=sys.stderr)
 429         return "{} {}{}".format(kind, prefix, bugno)
 430
 431     return "{} <a href='https://bugs.torproject.org/{}/{}'>{}{}</a>".format(
 432         kind, url_prefix, bugno, disp_prefix, bugno)
 433
 434 class HTMLChangeLog(ChangeLog):
 435     def __init__(self, *args, **kwargs):
 436         ChangeLog.__init__(self, *args, **kwargs)
 437
 438     def htmlText(self, graf):
 439         output = []
 440         for line in graf:
 441             line = line.rstrip().replace("&","&amp;")
 442             line = line.rstrip().replace("<","&lt;").replace(">","&gt;")
 443             output.append(line.strip())
 444         output = " ".join(output)
 445         output = BUG_PAT.sub(bug_html, output)
 446         sys.stdout.write(output)
 447
 448     def htmlPar(self, graf):
 449         sys.stdout.write("<p>")
 450         self.htmlText(graf)
 451         sys.stdout.write("</p>\n")
 452
 453     def dumpPreheader(self, graf):
 454         self.htmlPar(graf)
 455
 456     def dumpMainhead(self, head):
 457         sys.stdout.write("<h2>%s</h2>"%head)
 458
 459     def dumpHeadGraf(self, graf):
 460         self.htmlPar(graf)
 461
 462     def dumpSectionHeader(self, header):
 463         header = header.replace(" o ", "", 1).lstrip()
 464         sys.stdout.write("  <li>%s\n"%header)
 465         sys.stdout.write("  <ul>\n")
 466
 467     def dumpEndOfSection(self):
 468         sys.stdout.write("  </ul>\n\n")
 469
 470     def dumpEndOfChangelog(self):
 471         pass
 472
 473     def dumpStartOfSections(self):
 474         print("<ul>\n")
 475
 476     def dumpEndOfSections(self):
 477         print("</ul>\n")
 478
 479     def dumpDrupalBreak(self):
 480         print("\n</ul>\n")
 481         print("<p>&nbsp;</p>")
 482         print("\n<!--break-->\n\n")
 483         print("<ul>")
 484
 485     def dumpItem(self, grafs):
 486         grafs[0][0] = grafs[0][0].replace(" - ", "", 1).lstrip()
 487         sys.stdout.write("  <li>")
 488         if len(grafs) > 1:
 489             for par in grafs:
 490                 self.htmlPar(par)
 491         else:
 492             self.htmlText(grafs[0])
 493         print()
 494
 495 op = optparse.OptionParser(usage="usage: %prog [options] [filename]")
 496 op.add_option('-W', '--no-wrap', action='store_false',
 497               dest='wrapText', default=True,
 498               help='Do not re-wrap paragraphs')
 499 op.add_option('-S', '--no-sort', action='store_false',
 500               dest='sort', default=True,
 501               help='Do not sort or collate sections')
 502 op.add_option('-o', '--output', dest='output',
 503               default='-', metavar='FILE', help="write output to FILE")
 504 op.add_option('-H', '--html', action='store_true',
 505               dest='html', default=False,
 506               help="generate an HTML fragment")
 507 op.add_option('-1', '--first', action='store_true',
 508               dest='firstOnly', default=False,
 509               help="write only the first section")
 510 op.add_option('-b', '--blog-header', action='store_true',
 511               dest='blogOrder', default=False,
 512               help="Write the header in blog order")
 513 op.add_option('-B', '--blog', action='store_true',
 514               dest='blogFormat', default=False,
 515               help="Set all other options as appropriate for a blog post")
 516 op.add_option('--inplace', action='store_true',
 517               dest='inplace', default=False,
 518               help="Alter the ChangeLog in place")
 519 op.add_option('--drupal-break', action='store_true',
 520               dest='drupalBreak', default=False,
 521               help='Insert a drupal-friendly <!--break--> as needed')
 522
 523 options,args = op.parse_args()
 524
 525 if options.blogFormat:
 526     options.blogOrder = True
 527     options.html = True
 528     options.sort = False
 529     options.wrapText = False
 530     options.firstOnly = True
 531     options.drupalBreak = True
 532
 533 if len(args) > 1:
 534     op.error("Too many arguments")
 535 elif len(args) == 0:
 536     fname = 'ChangeLog'
 537 else:
 538     fname = args[0]
 539
 540 if options.inplace:
 541     assert options.output == '-'
 542     options.output = fname
 543
 544 if fname != '-':
 545     sys.stdin = open(fname, 'r')
 546
 547 nextline = None
 548
 549 if options.html:
 550     ChangeLogClass = HTMLChangeLog
 551 else:
 552     ChangeLogClass = ChangeLog
 553
 554 CL = ChangeLogClass(wrapText=options.wrapText,
 555                     blogOrder=options.blogOrder,
 556                     drupalBreak=options.drupalBreak)
 557 parser = head_parser
 558
 559 for line in sys.stdin:
 560     line = line.rstrip()
 561     tp = parser(line)
 562
 563     if tp == TP_SECHEAD:
 564         parser = body_parser
 565     elif tp == TP_END:
 566         nextline = line
 567         break
 568
 569     CL.addLine(tp,line)
 570
 571 CL.lint()
 572
 573 if options.output != '-':
 574     fname_new = options.output+".new"
 575     fname_out = options.output
 576     sys.stdout = open(fname_new, 'w')
 577 else:
 578     fname_new = fname_out = None
 579
 580 if options.sort:
 581     CL.collateAndSortSections()
 582
 583 CL.dump()
 584
 585 if options.firstOnly:
 586     sys.exit(0)
 587
 588 if nextline is not None:
 589     print(nextline)
 590
 591 for line in sys.stdin:
 592     sys.stdout.write(line)
 593
 594 if fname_new is not None:
 595     os.rename(fname_new, fname_out)