format_changelog.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569
  1. #!/usr/bin/python
  2. # Copyright (c) 2014-2019, The Tor Project, Inc.
  3. # See LICENSE for licensing information
  4. #
  5. # This script reformats a section of the changelog to wrap everything to
  6. # the right width and put blank lines in the right places. Eventually,
  7. # it might include a linter.
  8. #
  9. # To run it, pipe a section of the changelog (starting with "Changes
  10. # in Tor 0.x.y.z-alpha" through the script.)
  11. import os
  12. import re
  13. import sys
  14. import optparse
  15. # ==============================
  16. # Oh, look! It's a cruddy approximation to Knuth's elegant text wrapping
  17. # algorithm, with totally ad hoc parameters!
  18. #
  19. # We're trying to minimize:
  20. # The total of the cubes of ragged space on underflowed intermediate lines,
  21. # PLUS
  22. # 100 * the fourth power of overflowed characters
  23. # PLUS
  24. # .1 * a bit more than the cube of ragged space on the last line.
  25. # PLUS
  26. # OPENPAREN_PENALTY for each line that starts with (
  27. #
  28. # We use an obvious dynamic programming algorithm to sorta approximate this.
  29. # It's not coded right or optimally, but it's fast enough for changelogs
  30. #
  31. # (Code found in an old directory of mine, lightly cleaned. -NM)
  32. NO_HYPHENATE=set("""
  33. pf-divert
  34. tor-resolve
  35. tor-gencert
  36. """.split())
  37. LASTLINE_UNDERFLOW_EXPONENT = 1
  38. LASTLINE_UNDERFLOW_PENALTY = 1
  39. UNDERFLOW_EXPONENT = 3
  40. UNDERFLOW_PENALTY = 1
  41. OVERFLOW_EXPONENT = 4
  42. OVERFLOW_PENALTY = 2000
  43. ORPHAN_PENALTY = 10000
  44. OPENPAREN_PENALTY = 200
  45. def generate_wrapping(words, divisions):
  46. lines = []
  47. last = 0
  48. for i in divisions:
  49. w = words[last:i]
  50. last = i
  51. line = " ".join(w).replace("\xff ","-").replace("\xff","-")
  52. lines.append(line.strip())
  53. return lines
  54. def wrapping_quality(words, divisions, width1, width2):
  55. total = 0.0
  56. lines = generate_wrapping(words, divisions)
  57. for line in lines:
  58. length = len(line)
  59. if line is lines[0]:
  60. width = width1
  61. else:
  62. width = width2
  63. if line[0:1] == '(':
  64. total += OPENPAREN_PENALTY
  65. if length > width:
  66. total += OVERFLOW_PENALTY * (
  67. (length - width) ** OVERFLOW_EXPONENT )
  68. else:
  69. if line is lines[-1]:
  70. e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
  71. if " " not in line:
  72. total += ORPHAN_PENALTY
  73. else:
  74. e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
  75. total += p * ((width - length) ** e)
  76. return total
  77. def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
  78. wrapping_after = [ (0,), ]
  79. w1 = width - prefix_len1
  80. w2 = width - prefix_len2
  81. for i in range(1, len(words)+1):
  82. best_so_far = None
  83. best_score = 1e300
  84. for j in range(i):
  85. t = wrapping_after[j]
  86. t1 = t[:-1] + (i,)
  87. t2 = t + (i,)
  88. wq1 = wrapping_quality(words, t1, w1, w2)
  89. wq2 = wrapping_quality(words, t2, w1, w2)
  90. if wq1 < best_score:
  91. best_so_far = t1
  92. best_score = wq1
  93. if wq2 < best_score:
  94. best_so_far = t2
  95. best_score = wq2
  96. wrapping_after.append( best_so_far )
  97. lines = generate_wrapping(words, wrapping_after[-1])
  98. return lines
  99. def hyphenatable(word):
  100. if "--" in word:
  101. return False
  102. if re.match(r'^[^\d\-]\D*-', word):
  103. stripped = re.sub(r'^\W+','',word)
  104. stripped = re.sub(r'\W+$','',word)
  105. return stripped not in NO_HYPHENATE
  106. else:
  107. return False
  108. def split_paragraph(s):
  109. "Split paragraph into words; tuned for Tor."
  110. r = []
  111. for word in s.split():
  112. if hyphenatable(word):
  113. while "-" in word:
  114. a,word = word.split("-",1)
  115. r.append(a+"\xff")
  116. r.append(word)
  117. return r
  118. def fill(text, width, initial_indent, subsequent_indent):
  119. words = split_paragraph(text)
  120. lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
  121. width)
  122. res = [ initial_indent, lines[0], "\n" ]
  123. for line in lines[1:]:
  124. res.append(subsequent_indent)
  125. res.append(line)
  126. res.append("\n")
  127. return "".join(res)
  128. # ==============================
  129. TP_MAINHEAD = 0
  130. TP_HEADTEXT = 1
  131. TP_BLANK = 2
  132. TP_SECHEAD = 3
  133. TP_ITEMFIRST = 4
  134. TP_ITEMBODY = 5
  135. TP_END = 6
  136. TP_PREHEAD = 7
  137. def head_parser(line):
  138. if re.match(r'^Changes in', line):
  139. return TP_MAINHEAD
  140. elif re.match(r'^[A-Za-z]', line):
  141. return TP_PREHEAD
  142. elif re.match(r'^ o ', line):
  143. return TP_SECHEAD
  144. elif re.match(r'^\s*$', line):
  145. return TP_BLANK
  146. else:
  147. return TP_HEADTEXT
  148. def body_parser(line):
  149. if re.match(r'^ o ', line):
  150. return TP_SECHEAD
  151. elif re.match(r'^ -',line):
  152. return TP_ITEMFIRST
  153. elif re.match(r'^ \S', line):
  154. return TP_ITEMBODY
  155. elif re.match(r'^\s*$', line):
  156. return TP_BLANK
  157. elif re.match(r'^Changes in', line):
  158. return TP_END
  159. elif re.match(r'^\s+\S', line):
  160. return TP_HEADTEXT
  161. else:
  162. print "Weird line %r"%line
  163. def clean_head(head):
  164. return head
  165. def head_score(s):
  166. m = re.match(r'^ +o (.*)', s)
  167. if not m:
  168. print >>sys.stderr, "Can't score %r"%s
  169. return 99999
  170. lw = m.group(1).lower()
  171. if lw.startswith("security") and "feature" not in lw:
  172. score = -300
  173. elif lw.startswith("deprecated version"):
  174. score = -200
  175. elif lw.startswith("directory auth"):
  176. score = -150
  177. elif (('new' in lw and 'requirement' in lw) or
  178. ('new' in lw and 'dependenc' in lw) or
  179. ('build' in lw and 'requirement' in lw) or
  180. ('removed' in lw and 'platform' in lw)):
  181. score = -100
  182. elif lw.startswith("major feature"):
  183. score = 00
  184. elif lw.startswith("major bug"):
  185. score = 50
  186. elif lw.startswith("major"):
  187. score = 70
  188. elif lw.startswith("minor feature"):
  189. score = 200
  190. elif lw.startswith("minor bug"):
  191. score = 250
  192. elif lw.startswith("minor"):
  193. score = 270
  194. else:
  195. score = 1000
  196. if 'secur' in lw:
  197. score -= 2
  198. if "(other)" in lw:
  199. score += 2
  200. if '(' not in lw:
  201. score -= 1
  202. return score
  203. class ChangeLog(object):
  204. def __init__(self, wrapText=True, blogOrder=True, drupalBreak=False):
  205. self.prehead = []
  206. self.mainhead = None
  207. self.headtext = []
  208. self.curgraf = None
  209. self.sections = []
  210. self.cursection = None
  211. self.lineno = 0
  212. self.wrapText = wrapText
  213. self.blogOrder = blogOrder
  214. self.drupalBreak = drupalBreak
  215. def addLine(self, tp, line):
  216. self.lineno += 1
  217. if tp == TP_MAINHEAD:
  218. assert not self.mainhead
  219. self.mainhead = line
  220. elif tp == TP_PREHEAD:
  221. self.prehead.append(line)
  222. elif tp == TP_HEADTEXT:
  223. if self.curgraf is None:
  224. self.curgraf = []
  225. self.headtext.append(self.curgraf)
  226. self.curgraf.append(line)
  227. elif tp == TP_BLANK:
  228. self.curgraf = None
  229. elif tp == TP_SECHEAD:
  230. self.cursection = [ self.lineno, line, [] ]
  231. self.sections.append(self.cursection)
  232. elif tp == TP_ITEMFIRST:
  233. item = ( self.lineno, [ [line] ])
  234. self.curgraf = item[1][0]
  235. self.cursection[2].append(item)
  236. elif tp == TP_ITEMBODY:
  237. if self.curgraf is None:
  238. self.curgraf = []
  239. self.cursection[2][-1][1].append(self.curgraf)
  240. self.curgraf.append(line)
  241. else:
  242. assert "This" is "unreachable"
  243. def lint_head(self, line, head):
  244. m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)
  245. if not m:
  246. print >>sys.stderr, "Weird header format on line %s"%line
  247. def lint_item(self, line, grafs, head_type):
  248. pass
  249. def lint(self):
  250. self.head_lines = {}
  251. for sec_line, sec_head, items in self.sections:
  252. head_type = self.lint_head(sec_line, sec_head)
  253. for item_line, grafs in items:
  254. self.lint_item(item_line, grafs, head_type)
  255. def dumpGraf(self,par,indent1,indent2=-1):
  256. if not self.wrapText:
  257. for line in par:
  258. print line
  259. return
  260. if indent2 == -1:
  261. indent2 = indent1
  262. text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
  263. sys.stdout.write(fill(text,
  264. width=72,
  265. initial_indent=" "*indent1,
  266. subsequent_indent=" "*indent2))
  267. def dumpPreheader(self, graf):
  268. self.dumpGraf(graf, 0)
  269. print
  270. def dumpMainhead(self, head):
  271. print head
  272. def dumpHeadGraf(self, graf):
  273. self.dumpGraf(graf, 2)
  274. print
  275. def dumpSectionHeader(self, header):
  276. print header
  277. def dumpStartOfSections(self):
  278. pass
  279. def dumpEndOfSections(self):
  280. pass
  281. def dumpEndOfSection(self):
  282. print
  283. def dumpEndOfChangelog(self):
  284. print
  285. def dumpDrupalBreak(self):
  286. pass
  287. def dumpItem(self, grafs):
  288. self.dumpGraf(grafs[0],4,6)
  289. for par in grafs[1:]:
  290. print
  291. self.dumpGraf(par,6,6)
  292. def collateAndSortSections(self):
  293. heads = []
  294. sectionsByHead = { }
  295. for _, head, items in self.sections:
  296. head = clean_head(head)
  297. try:
  298. s = sectionsByHead[head]
  299. except KeyError:
  300. s = sectionsByHead[head] = []
  301. heads.append( (head_score(head), head.lower(), head, s) )
  302. s.extend(items)
  303. heads.sort()
  304. self.sections = [ (0, head, items) for _1,_2,head,items in heads ]
  305. def dump(self):
  306. if self.prehead:
  307. self.dumpPreheader(self.prehead)
  308. if not self.blogOrder:
  309. self.dumpMainhead(self.mainhead)
  310. for par in self.headtext:
  311. self.dumpHeadGraf(par)
  312. if self.blogOrder:
  313. self.dumpMainhead(self.mainhead)
  314. drupalBreakAfter = None
  315. if self.drupalBreak and len(self.sections) > 4:
  316. drupalBreakAfter = self.sections[1][2]
  317. self.dumpStartOfSections()
  318. for _,head,items in self.sections:
  319. if not head.endswith(':'):
  320. print >>sys.stderr, "adding : to %r"%head
  321. head = head + ":"
  322. self.dumpSectionHeader(head)
  323. for _,grafs in items:
  324. self.dumpItem(grafs)
  325. self.dumpEndOfSection()
  326. if items is drupalBreakAfter:
  327. self.dumpDrupalBreak()
  328. self.dumpEndOfSections()
  329. self.dumpEndOfChangelog()
  330. # Let's turn bugs to html.
  331. BUG_PAT = re.compile('(bug|ticket|feature)\s+(\d{4,5})', re.I)
  332. def bug_html(m):
  333. return "%s <a href='https://bugs.torproject.org/%s'>%s</a>" % (m.group(1), m.group(2), m.group(2))
  334. class HTMLChangeLog(ChangeLog):
  335. def __init__(self, *args, **kwargs):
  336. ChangeLog.__init__(self, *args, **kwargs)
  337. def htmlText(self, graf):
  338. output = []
  339. for line in graf:
  340. line = line.rstrip().replace("&","&amp;")
  341. line = line.rstrip().replace("<","&lt;").replace(">","&gt;")
  342. output.append(line.strip())
  343. output = " ".join(output)
  344. output = BUG_PAT.sub(bug_html, output)
  345. sys.stdout.write(output)
  346. def htmlPar(self, graf):
  347. sys.stdout.write("<p>")
  348. self.htmlText(graf)
  349. sys.stdout.write("</p>\n")
  350. def dumpPreheader(self, graf):
  351. self.htmlPar(graf)
  352. def dumpMainhead(self, head):
  353. sys.stdout.write("<h2>%s</h2>"%head)
  354. def dumpHeadGraf(self, graf):
  355. self.htmlPar(graf)
  356. def dumpSectionHeader(self, header):
  357. header = header.replace(" o ", "", 1).lstrip()
  358. sys.stdout.write(" <li>%s\n"%header)
  359. sys.stdout.write(" <ul>\n")
  360. def dumpEndOfSection(self):
  361. sys.stdout.write(" </ul>\n\n")
  362. def dumpEndOfChangelog(self):
  363. pass
  364. def dumpStartOfSections(self):
  365. print "<ul>\n"
  366. def dumpEndOfSections(self):
  367. print "</ul>\n"
  368. def dumpDrupalBreak(self):
  369. print "\n</ul>\n"
  370. print "<p>&nbsp;</p>"
  371. print "\n<!--break-->\n\n"
  372. print "<ul>"
  373. def dumpItem(self, grafs):
  374. grafs[0][0] = grafs[0][0].replace(" - ", "", 1).lstrip()
  375. sys.stdout.write(" <li>")
  376. if len(grafs) > 1:
  377. for par in grafs:
  378. self.htmlPar(par)
  379. else:
  380. self.htmlText(grafs[0])
  381. print
  382. op = optparse.OptionParser(usage="usage: %prog [options] [filename]")
  383. op.add_option('-W', '--no-wrap', action='store_false',
  384. dest='wrapText', default=True,
  385. help='Do not re-wrap paragraphs')
  386. op.add_option('-S', '--no-sort', action='store_false',
  387. dest='sort', default=True,
  388. help='Do not sort or collate sections')
  389. op.add_option('-o', '--output', dest='output',
  390. default='-', metavar='FILE', help="write output to FILE")
  391. op.add_option('-H', '--html', action='store_true',
  392. dest='html', default=False,
  393. help="generate an HTML fragment")
  394. op.add_option('-1', '--first', action='store_true',
  395. dest='firstOnly', default=False,
  396. help="write only the first section")
  397. op.add_option('-b', '--blog-header', action='store_true',
  398. dest='blogOrder', default=False,
  399. help="Write the header in blog order")
  400. op.add_option('-B', '--blog', action='store_true',
  401. dest='blogFormat', default=False,
  402. help="Set all other options as appropriate for a blog post")
  403. op.add_option('--inplace', action='store_true',
  404. dest='inplace', default=False,
  405. help="Alter the ChangeLog in place")
  406. op.add_option('--drupal-break', action='store_true',
  407. dest='drupalBreak', default=False,
  408. help='Insert a drupal-friendly <!--break--> as needed')
  409. options,args = op.parse_args()
  410. if options.blogFormat:
  411. options.blogOrder = True
  412. options.html = True
  413. options.sort = False
  414. options.wrapText = False
  415. options.firstOnly = True
  416. options.drupalBreak = True
  417. if len(args) > 1:
  418. op.error("Too many arguments")
  419. elif len(args) == 0:
  420. fname = 'ChangeLog'
  421. else:
  422. fname = args[0]
  423. if options.inplace:
  424. assert options.output == '-'
  425. options.output = fname
  426. if fname != '-':
  427. sys.stdin = open(fname, 'r')
  428. nextline = None
  429. if options.html:
  430. ChangeLogClass = HTMLChangeLog
  431. else:
  432. ChangeLogClass = ChangeLog
  433. CL = ChangeLogClass(wrapText=options.wrapText,
  434. blogOrder=options.blogOrder,
  435. drupalBreak=options.drupalBreak)
  436. parser = head_parser
  437. for line in sys.stdin:
  438. line = line.rstrip()
  439. tp = parser(line)
  440. if tp == TP_SECHEAD:
  441. parser = body_parser
  442. elif tp == TP_END:
  443. nextline = line
  444. break
  445. CL.addLine(tp,line)
  446. CL.lint()
  447. if options.output != '-':
  448. fname_new = options.output+".new"
  449. fname_out = options.output
  450. sys.stdout = open(fname_new, 'w')
  451. else:
  452. fname_new = fname_out = None
  453. if options.sort:
  454. CL.collateAndSortSections()
  455. CL.dump()
  456. if options.firstOnly:
  457. sys.exit(0)
  458. if nextline is not None:
  459. print nextline
  460. for line in sys.stdin:
  461. sys.stdout.write(line)
  462. if fname_new is not None:
  463. os.rename(fname_new, fname_out)