Merge branch 'maint-0.4.8'
[tor.git] / scripts / maint / format_changelog.py
blob32b47ffcbb136146c4c8e98fa4bd9d3a50d85902
1 #!/usr/bin/env python
2 # Copyright (c) 2014-2019, The Tor Project, Inc.
3 # See LICENSE for licensing information
5 # This script reformats a section of the changelog to wrap everything to
6 # the right width and put blank lines in the right places. Eventually,
7 # it might include a linter.
9 # To run it, pipe a section of the changelog (starting with "Changes
10 # in Tor 0.x.y.z-alpha" through the script.)
12 # Future imports for Python 2.7, mandatory in 3.0
13 from __future__ import division
14 from __future__ import print_function
15 from __future__ import unicode_literals
17 import os
18 import re
19 import sys
20 import optparse
22 # ==============================
23 # Oh, look! It's a cruddy approximation to Knuth's elegant text wrapping
24 # algorithm, with totally ad hoc parameters!
26 # We're trying to minimize:
27 # The total of the cubes of ragged space on underflowed intermediate lines,
28 # PLUS
29 # 100 * the fourth power of overflowed characters
30 # PLUS
31 # .1 * a bit more than the cube of ragged space on the last line.
32 # PLUS
33 # OPENPAREN_PENALTY for each line that starts with (
35 # We use an obvious dynamic programming algorithm to sorta approximate this.
36 # It's not coded right or optimally, but it's fast enough for changelogs
38 # (Code found in an old directory of mine, lightly cleaned. -NM)
40 NO_HYPHENATE=set("""
41 pf-divert
42 tor-resolve
43 tor-gencert
44 """.split())
46 LASTLINE_UNDERFLOW_EXPONENT = 1
47 LASTLINE_UNDERFLOW_PENALTY = 1
49 UNDERFLOW_EXPONENT = 3
50 UNDERFLOW_PENALTY = 1
52 OVERFLOW_EXPONENT = 4
53 OVERFLOW_PENALTY = 2000
55 ORPHAN_PENALTY = 10000
57 OPENPAREN_PENALTY = 200
59 def generate_wrapping(words, divisions):
60 lines = []
61 last = 0
62 for i in divisions:
63 w = words[last:i]
64 last = i
65 line = " ".join(w).replace("\xff ","-").replace("\xff","-")
66 lines.append(line.strip())
67 return lines
69 def wrapping_quality(words, divisions, width1, width2):
70 total = 0.0
72 lines = generate_wrapping(words, divisions)
73 for line in lines:
74 length = len(line)
75 if line is lines[0]:
76 width = width1
77 else:
78 width = width2
80 if line[0:1] == '(':
81 total += OPENPAREN_PENALTY
83 if length > width:
84 total += OVERFLOW_PENALTY * (
85 (length - width) ** OVERFLOW_EXPONENT )
86 else:
87 if line is lines[-1]:
88 e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
89 if " " not in line:
90 total += ORPHAN_PENALTY
91 else:
92 e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
94 total += p * ((width - length) ** e)
96 return total
98 def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
99 wrapping_after = [ (0,), ]
101 w1 = width - prefix_len1
102 w2 = width - prefix_len2
104 for i in range(1, len(words)+1):
105 best_so_far = None
106 best_score = 1e300
107 for j in range(i):
108 t = wrapping_after[j]
109 t1 = t[:-1] + (i,)
110 t2 = t + (i,)
111 wq1 = wrapping_quality(words, t1, w1, w2)
112 wq2 = wrapping_quality(words, t2, w1, w2)
114 if wq1 < best_score:
115 best_so_far = t1
116 best_score = wq1
117 if wq2 < best_score:
118 best_so_far = t2
119 best_score = wq2
120 wrapping_after.append( best_so_far )
122 lines = generate_wrapping(words, wrapping_after[-1])
124 return lines
126 def hyphenatable(word):
127 if "--" in word:
128 return False
130 if re.match(r'^[^\d\-]\D*-', word):
131 stripped = re.sub(r'^\W+','',word)
132 stripped = re.sub(r'\W+$','',word)
133 return stripped not in NO_HYPHENATE
134 else:
135 return False
137 def split_paragraph(s):
138 "Split paragraph into words; tuned for Tor."
140 r = []
141 for word in s.split():
142 if hyphenatable(word):
143 while "-" in word:
144 a,word = word.split("-",1)
145 r.append(a+"\xff")
146 r.append(word)
147 return r
149 def fill(text, width, initial_indent, subsequent_indent):
150 words = split_paragraph(text)
151 lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
152 width)
153 res = [ initial_indent, lines[0], "\n" ]
154 for line in lines[1:]:
155 res.append(subsequent_indent)
156 res.append(line)
157 res.append("\n")
158 return "".join(res)
160 # ==============================
163 TP_MAINHEAD = 0
164 TP_HEADTEXT = 1
165 TP_BLANK = 2
166 TP_SECHEAD = 3
167 TP_ITEMFIRST = 4
168 TP_ITEMBODY = 5
169 TP_END = 6
170 TP_PREHEAD = 7
172 def head_parser(line):
173 if re.match(r'^Changes in', line):
174 return TP_MAINHEAD
175 elif re.match(r'^[A-Za-z]', line):
176 return TP_PREHEAD
177 elif re.match(r'^ o ', line):
178 return TP_SECHEAD
179 elif re.match(r'^\s*$', line):
180 return TP_BLANK
181 else:
182 return TP_HEADTEXT
184 def body_parser(line):
185 if re.match(r'^ o ', line):
186 return TP_SECHEAD
187 elif re.match(r'^ -',line):
188 return TP_ITEMFIRST
189 elif re.match(r'^ \S', line):
190 return TP_ITEMBODY
191 elif re.match(r'^\s*$', line):
192 return TP_BLANK
193 elif re.match(r'^Changes in', line):
194 return TP_END
195 elif re.match(r'^\s+\S', line):
196 return TP_HEADTEXT
197 else:
198 print("Weird line %r"%line, file=sys.stderr)
200 def clean_head(head):
201 return head
203 def head_score(s):
204 m = re.match(r'^ +o (.*)', s)
205 if not m:
206 print("Can't score %r"%s, file=sys.stderr)
207 return 99999
208 lw = m.group(1).lower()
209 if lw.startswith("security") and "feature" not in lw:
210 score = -300
211 elif lw.startswith("deprecated version"):
212 score = -200
213 elif lw.startswith("directory auth"):
214 score = -150
215 elif (('new' in lw and 'requirement' in lw) or
216 ('new' in lw and 'dependenc' in lw) or
217 ('build' in lw and 'requirement' in lw) or
218 ('removed' in lw and 'platform' in lw)):
219 score = -100
220 elif lw.startswith("major feature"):
221 score = 00
222 elif lw.startswith("major bug"):
223 score = 50
224 elif lw.startswith("major"):
225 score = 70
226 elif lw.startswith("minor feature"):
227 score = 200
228 elif lw.startswith("minor bug"):
229 score = 250
230 elif lw.startswith("minor"):
231 score = 270
232 else:
233 score = 1000
235 if 'secur' in lw:
236 score -= 2
238 if "(other)" in lw:
239 score += 2
241 if '(' not in lw:
242 score -= 1
244 return score
246 class ChangeLog(object):
247 def __init__(self, wrapText=True, blogOrder=True, drupalBreak=False):
248 self.prehead = []
249 self.mainhead = None
250 self.headtext = []
251 self.curgraf = None
252 self.sections = []
253 self.cursection = None
254 self.lineno = 0
255 self.wrapText = wrapText
256 self.blogOrder = blogOrder
257 self.drupalBreak = drupalBreak
259 def addLine(self, tp, line):
260 self.lineno += 1
262 if tp == TP_MAINHEAD:
263 assert not self.mainhead
264 self.mainhead = line
266 elif tp == TP_PREHEAD:
267 self.prehead.append(line)
269 elif tp == TP_HEADTEXT:
270 if self.curgraf is None:
271 self.curgraf = []
272 self.headtext.append(self.curgraf)
273 self.curgraf.append(line)
275 elif tp == TP_BLANK:
276 self.curgraf = None
278 elif tp == TP_SECHEAD:
279 self.cursection = [ self.lineno, line, [] ]
280 self.sections.append(self.cursection)
282 elif tp == TP_ITEMFIRST:
283 item = ( self.lineno, [ [line] ])
284 self.curgraf = item[1][0]
285 self.cursection[2].append(item)
287 elif tp == TP_ITEMBODY:
288 if self.curgraf is None:
289 self.curgraf = []
290 self.cursection[2][-1][1].append(self.curgraf)
291 self.curgraf.append(line)
293 else:
294 assert False # This should be unreachable.
296 def lint_head(self, line, head):
297 m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)
298 if not m:
299 print("Weird header format on line %s"%line, file=sys.stderr)
301 def lint_item(self, line, grafs, head_type):
302 pass
304 def lint(self):
305 self.head_lines = {}
306 for sec_line, sec_head, items in self.sections:
307 head_type = self.lint_head(sec_line, sec_head)
308 for item_line, grafs in items:
309 self.lint_item(item_line, grafs, head_type)
311 def dumpGraf(self,par,indent1,indent2=-1):
312 if not self.wrapText:
313 for line in par:
314 print(line)
315 return
317 if indent2 == -1:
318 indent2 = indent1
319 text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
321 sys.stdout.write(fill(text,
322 width=72,
323 initial_indent=" "*indent1,
324 subsequent_indent=" "*indent2))
326 def dumpPreheader(self, graf):
327 self.dumpGraf(graf, 0)
328 print()
330 def dumpMainhead(self, head):
331 print(head)
333 def dumpHeadGraf(self, graf):
334 self.dumpGraf(graf, 2)
335 print()
337 def dumpSectionHeader(self, header):
338 print(header)
340 def dumpStartOfSections(self):
341 pass
343 def dumpEndOfSections(self):
344 pass
346 def dumpEndOfSection(self):
347 print()
349 def dumpEndOfChangelog(self):
350 print()
352 def dumpDrupalBreak(self):
353 pass
355 def dumpItem(self, grafs):
356 self.dumpGraf(grafs[0],4,6)
357 for par in grafs[1:]:
358 print()
359 self.dumpGraf(par,6,6)
361 def collateAndSortSections(self):
362 heads = []
363 sectionsByHead = { }
364 for _, head, items in self.sections:
365 head = clean_head(head)
366 try:
367 s = sectionsByHead[head]
368 except KeyError:
369 s = sectionsByHead[head] = []
370 heads.append( (head_score(head), head.lower(), head, s) )
372 s.extend(items)
374 heads.sort()
375 self.sections = [ (0, head, items) for _1,_2,head,items in heads ]
377 def dump(self):
378 if self.prehead:
379 self.dumpPreheader(self.prehead)
381 if not self.blogOrder:
382 self.dumpMainhead(self.mainhead)
384 for par in self.headtext:
385 self.dumpHeadGraf(par)
387 if self.blogOrder:
388 self.dumpMainhead(self.mainhead)
390 drupalBreakAfter = None
391 if self.drupalBreak and len(self.sections) > 4:
392 drupalBreakAfter = self.sections[1][2]
394 self.dumpStartOfSections()
395 for _,head,items in self.sections:
396 if not head.endswith(':'):
397 print("adding : to %r"%head, file=sys.stderr)
398 head = head + ":"
399 self.dumpSectionHeader(head)
400 for _,grafs in items:
401 self.dumpItem(grafs)
402 self.dumpEndOfSection()
403 if items is drupalBreakAfter:
404 self.dumpDrupalBreak()
405 self.dumpEndOfSections()
406 self.dumpEndOfChangelog()
408 # Map from issue prefix to pair of (visible prefix, url prefix)
409 ISSUE_PREFIX_MAP = {
410 "" : ( "", "tpo/core/tor" ),
411 "tor#" : ( "", "tpo/core/tor" ),
412 "chutney#" : ( "chutney#", "tpo/core/chutney" ),
413 "torspec#" : ( "torspec#", "tpo/core/torspec" ),
414 "trunnel#" : ( "trunnel#", "tpo/core/trunnel" ),
415 "torsocks#" : ( "torsocks#", "tpo/core/torsocks"),
418 # Let's turn bugs to html.
419 BUG_PAT = re.compile('(bug|ticket|issue|feature)\s+([\w/]+#)?(\d{4,6})', re.I)
420 def bug_html(m):
421 kind = m.group(1)
422 prefix = m.group(2) or ""
423 bugno = m.group(3)
424 try:
425 disp_prefix, url_prefix = ISSUE_PREFIX_MAP[prefix]
426 except KeyError:
427 print("Can't figure out URL for {}{}".format(prefix,bugno),
428 file=sys.stderr)
429 return "{} {}{}".format(kind, prefix, bugno)
431 return "{} <a href='https://bugs.torproject.org/{}/{}'>{}{}</a>".format(
432 kind, url_prefix, bugno, disp_prefix, bugno)
434 class HTMLChangeLog(ChangeLog):
435 def __init__(self, *args, **kwargs):
436 ChangeLog.__init__(self, *args, **kwargs)
438 def htmlText(self, graf):
439 output = []
440 for line in graf:
441 line = line.rstrip().replace("&","&amp;")
442 line = line.rstrip().replace("<","&lt;").replace(">","&gt;")
443 output.append(line.strip())
444 output = " ".join(output)
445 output = BUG_PAT.sub(bug_html, output)
446 sys.stdout.write(output)
448 def htmlPar(self, graf):
449 sys.stdout.write("<p>")
450 self.htmlText(graf)
451 sys.stdout.write("</p>\n")
453 def dumpPreheader(self, graf):
454 self.htmlPar(graf)
456 def dumpMainhead(self, head):
457 sys.stdout.write("<h2>%s</h2>"%head)
459 def dumpHeadGraf(self, graf):
460 self.htmlPar(graf)
462 def dumpSectionHeader(self, header):
463 header = header.replace(" o ", "", 1).lstrip()
464 sys.stdout.write(" <li>%s\n"%header)
465 sys.stdout.write(" <ul>\n")
467 def dumpEndOfSection(self):
468 sys.stdout.write(" </ul>\n\n")
470 def dumpEndOfChangelog(self):
471 pass
473 def dumpStartOfSections(self):
474 print("<ul>\n")
476 def dumpEndOfSections(self):
477 print("</ul>\n")
479 def dumpDrupalBreak(self):
480 print("\n</ul>\n")
481 print("<p>&nbsp;</p>")
482 print("\n<!--break-->\n\n")
483 print("<ul>")
485 def dumpItem(self, grafs):
486 grafs[0][0] = grafs[0][0].replace(" - ", "", 1).lstrip()
487 sys.stdout.write(" <li>")
488 if len(grafs) > 1:
489 for par in grafs:
490 self.htmlPar(par)
491 else:
492 self.htmlText(grafs[0])
493 print()
495 op = optparse.OptionParser(usage="usage: %prog [options] [filename]")
496 op.add_option('-W', '--no-wrap', action='store_false',
497 dest='wrapText', default=True,
498 help='Do not re-wrap paragraphs')
499 op.add_option('-S', '--no-sort', action='store_false',
500 dest='sort', default=True,
501 help='Do not sort or collate sections')
502 op.add_option('-o', '--output', dest='output',
503 default='-', metavar='FILE', help="write output to FILE")
504 op.add_option('-H', '--html', action='store_true',
505 dest='html', default=False,
506 help="generate an HTML fragment")
507 op.add_option('-1', '--first', action='store_true',
508 dest='firstOnly', default=False,
509 help="write only the first section")
510 op.add_option('-b', '--blog-header', action='store_true',
511 dest='blogOrder', default=False,
512 help="Write the header in blog order")
513 op.add_option('-B', '--blog', action='store_true',
514 dest='blogFormat', default=False,
515 help="Set all other options as appropriate for a blog post")
516 op.add_option('--inplace', action='store_true',
517 dest='inplace', default=False,
518 help="Alter the ChangeLog in place")
519 op.add_option('--drupal-break', action='store_true',
520 dest='drupalBreak', default=False,
521 help='Insert a drupal-friendly <!--break--> as needed')
523 options,args = op.parse_args()
525 if options.blogFormat:
526 options.blogOrder = True
527 options.html = True
528 options.sort = False
529 options.wrapText = False
530 options.firstOnly = True
531 options.drupalBreak = True
533 if len(args) > 1:
534 op.error("Too many arguments")
535 elif len(args) == 0:
536 fname = 'ChangeLog'
537 else:
538 fname = args[0]
540 if options.inplace:
541 assert options.output == '-'
542 options.output = fname
544 if fname != '-':
545 sys.stdin = open(fname, 'r')
547 nextline = None
549 if options.html:
550 ChangeLogClass = HTMLChangeLog
551 else:
552 ChangeLogClass = ChangeLog
554 CL = ChangeLogClass(wrapText=options.wrapText,
555 blogOrder=options.blogOrder,
556 drupalBreak=options.drupalBreak)
557 parser = head_parser
559 for line in sys.stdin:
560 line = line.rstrip()
561 tp = parser(line)
563 if tp == TP_SECHEAD:
564 parser = body_parser
565 elif tp == TP_END:
566 nextline = line
567 break
569 CL.addLine(tp,line)
571 CL.lint()
573 if options.output != '-':
574 fname_new = options.output+".new"
575 fname_out = options.output
576 sys.stdout = open(fname_new, 'w')
577 else:
578 fname_new = fname_out = None
580 if options.sort:
581 CL.collateAndSortSections()
583 CL.dump()
585 if options.firstOnly:
586 sys.exit(0)
588 if nextline is not None:
589 print(nextline)
591 for line in sys.stdin:
592 sys.stdout.write(line)
594 if fname_new is not None:
595 os.rename(fname_new, fname_out)