format_changelog.py 15 KB


  1. #!/usr/bin/python
  2. # Copyright (c) 2014-2015, The Tor Project, Inc.
  3. # See LICENSE for licensing information
  4. #
  5. # This script reformats a section of the changelog to wrap everything to
  6. # the right width and put blank lines in the right places. Eventually,
  7. # it might include a linter.
  8. #
  9. # To run it, pipe a section of the changelog (starting with "Changes
  10. # in Tor 0.x.y.z-alpha" through the script.)
  11. import os
  12. import re
  13. import sys
  14. import optparse
  15. # ==============================
  16. # Oh, look! It's a cruddy approximation to Knuth's elegant text wrapping
  17. # algorithm, with totally ad hoc parameters!
  18. #
  19. # We're trying to minimize:
  20. # The total of the cubes of ragged space on underflowed intermediate lines,
  21. # PLUS
  22. # 100 * the fourth power of overflowed characters
  23. # PLUS
  24. # .1 * a bit more than the cube of ragged space on the last line.
  25. # PLUS
  26. # OPENPAREN_PENALTY for each line that starts with (
  27. #
  28. # We use an obvious dynamic programming algorithm to sorta approximate this.
  29. # It's not coded right or optimally, but it's fast enough for changelogs
  30. #
  31. # (Code found in an old directory of mine, lightly cleaned. -NM)
  32. NO_HYPHENATE=set("""
  33. pf-divert
  34. tor-resolve
  35. tor-gencert
  36. tor-fw-helper
  37. """.split())
  38. LASTLINE_UNDERFLOW_EXPONENT = 1
  39. LASTLINE_UNDERFLOW_PENALTY = 1
  40. UNDERFLOW_EXPONENT = 3
  41. UNDERFLOW_PENALTY = 1
  42. OVERFLOW_EXPONENT = 4
  43. OVERFLOW_PENALTY = 2000
  44. ORPHAN_PENALTY = 10000
  45. OPENPAREN_PENALTY = 200
  46. def generate_wrapping(words, divisions):
  47. lines = []
  48. last = 0
  49. for i in divisions:
  50. w = words[last:i]
  51. last = i
  52. line = " ".join(w).replace("\xff ","-").replace("\xff","-")
  53. lines.append(line)
  54. return lines
  55. def wrapping_quality(words, divisions, width1, width2):
  56. total = 0.0
  57. lines = generate_wrapping(words, divisions)
  58. for line in lines:
  59. length = len(line)
  60. if line is lines[0]:
  61. width = width1
  62. else:
  63. width = width2
  64. if line[0:1] == '(':
  65. total += OPENPAREN_PENALTY
  66. if length > width:
  67. total += OVERFLOW_PENALTY * (
  68. (length - width) ** OVERFLOW_EXPONENT )
  69. else:
  70. if line is lines[-1]:
  71. e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
  72. if " " not in line:
  73. total += ORPHAN_PENALTY
  74. else:
  75. e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
  76. total += p * ((width - length) ** e)
  77. return total
  78. def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
  79. wrapping_after = [ (0,), ]
  80. w1 = width - prefix_len1
  81. w2 = width - prefix_len2
  82. for i in range(1, len(words)+1):
  83. best_so_far = None
  84. best_score = 1e300
  85. for j in range(i):
  86. t = wrapping_after[j]
  87. t1 = t[:-1] + (i,)
  88. t2 = t + (i,)
  89. wq1 = wrapping_quality(words, t1, w1, w2)
  90. wq2 = wrapping_quality(words, t2, w1, w2)
  91. if wq1 < best_score:
  92. best_so_far = t1
  93. best_score = wq1
  94. if wq2 < best_score:
  95. best_so_far = t2
  96. best_score = wq2
  97. wrapping_after.append( best_so_far )
  98. lines = generate_wrapping(words, wrapping_after[-1])
  99. return lines
  100. def hyphenatable(word):
  101. if "--" in word:
  102. return False
  103. if re.match(r'^[^\d\-]\D*-', word):
  104. stripped = re.sub(r'^\W+','',word)
  105. stripped = re.sub(r'\W+$','',word)
  106. return stripped not in NO_HYPHENATE
  107. else:
  108. return False
  109. def split_paragraph(s):
  110. "Split paragraph into words; tuned for Tor."
  111. r = []
  112. for word in s.split():
  113. if hyphenatable(word):
  114. while "-" in word:
  115. a,word = word.split("-",1)
  116. r.append(a+"\xff")
  117. r.append(word)
  118. return r
  119. def fill(text, width, initial_indent, subsequent_indent):
  120. words = split_paragraph(text)
  121. lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
  122. width)
  123. res = [ initial_indent, lines[0], "\n" ]
  124. for line in lines[1:]:
  125. res.append(subsequent_indent)
  126. res.append(line)
  127. res.append("\n")
  128. return "".join(res)
  129. # ==============================
  130. TP_MAINHEAD = 0
  131. TP_HEADTEXT = 1
  132. TP_BLANK = 2
  133. TP_SECHEAD = 3
  134. TP_ITEMFIRST = 4
  135. TP_ITEMBODY = 5
  136. TP_END = 6
  137. TP_PREHEAD = 7
  138. def head_parser(line):
  139. if re.match(r'^Changes in', line):
  140. return TP_MAINHEAD
  141. elif re.match(r'^[A-Za-z]', line):
  142. return TP_PREHEAD
  143. elif re.match(r'^ o ', line):
  144. return TP_SECHEAD
  145. elif re.match(r'^\s*$', line):
  146. return TP_BLANK
  147. else:
  148. return TP_HEADTEXT
  149. def body_parser(line):
  150. if re.match(r'^ o ', line):
  151. return TP_SECHEAD
  152. elif re.match(r'^ -',line):
  153. return TP_ITEMFIRST
  154. elif re.match(r'^ \S', line):
  155. return TP_ITEMBODY
  156. elif re.match(r'^\s*$', line):
  157. return TP_BLANK
  158. elif re.match(r'^Changes in', line):
  159. return TP_END
  160. elif re.match(r'^\s+\S', line):
  161. return TP_HEADTEXT
  162. else:
  163. print "Weird line %r"%line
  164. def clean_head(head):
  165. return head
  166. def head_score(s):
  167. m = re.match(r'^ +o (.*)', s)
  168. if not m:
  169. print >>sys.stderr, "Can't score %r"%s
  170. return 99999
  171. lw = m.group(1).lower()
  172. if lw.startswith("security") and "feature" not in lw:
  173. score = -300
  174. elif lw.startswith("deprecated version"):
  175. score = -200
  176. elif (('new' in lw and 'requirement' in lw) or
  177. ('new' in lw and 'dependenc' in lw) or
  178. ('build' in lw and 'requirement' in lw) or
  179. ('removed' in lw and 'platform' in lw)):
  180. score = -100
  181. elif lw.startswith("major feature"):
  182. score = 00
  183. elif lw.startswith("major bug"):
  184. score = 50
  185. elif lw.startswith("major"):
  186. score = 70
  187. elif lw.startswith("minor feature"):
  188. score = 200
  189. elif lw.startswith("minor bug"):
  190. score = 250
  191. elif lw.startswith("minor"):
  192. score = 270
  193. else:
  194. score = 1000
  195. if 'secur' in lw:
  196. score -= 2
  197. if "(other)" in lw:
  198. score += 2
  199. if '(' not in lw:
  200. score -= 1
  201. return score
  202. class ChangeLog(object):
  203. def __init__(self, wrapText=True, blogOrder=True, drupalBreak=False):
  204. self.prehead = []
  205. self.mainhead = None
  206. self.headtext = []
  207. self.curgraf = None
  208. self.sections = []
  209. self.cursection = None
  210. self.lineno = 0
  211. self.wrapText = wrapText
  212. self.blogOrder = blogOrder
  213. self.drupalBreak = drupalBreak
  214. def addLine(self, tp, line):
  215. self.lineno += 1
  216. if tp == TP_MAINHEAD:
  217. assert not self.mainhead
  218. self.mainhead = line
  219. elif tp == TP_PREHEAD:
  220. self.prehead.append(line)
  221. elif tp == TP_HEADTEXT:
  222. if self.curgraf is None:
  223. self.curgraf = []
  224. self.headtext.append(self.curgraf)
  225. self.curgraf.append(line)
  226. elif tp == TP_BLANK:
  227. self.curgraf = None
  228. elif tp == TP_SECHEAD:
  229. self.cursection = [ self.lineno, line, [] ]
  230. self.sections.append(self.cursection)
  231. elif tp == TP_ITEMFIRST:
  232. item = ( self.lineno, [ [line] ])
  233. self.curgraf = item[1][0]
  234. self.cursection[2].append(item)
  235. elif tp == TP_ITEMBODY:
  236. if self.curgraf is None:
  237. self.curgraf = []
  238. self.cursection[2][-1][1].append(self.curgraf)
  239. self.curgraf.append(line)
  240. else:
  241. assert "This" is "unreachable"
  242. def lint_head(self, line, head):
  243. m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)
  244. if not m:
  245. print >>sys.stderr, "Weird header format on line %s"%line
  246. def lint_item(self, line, grafs, head_type):
  247. pass
  248. def lint(self):
  249. self.head_lines = {}
  250. for sec_line, sec_head, items in self.sections:
  251. head_type = self.lint_head(sec_line, sec_head)
  252. for item_line, grafs in items:
  253. self.lint_item(item_line, grafs, head_type)
  254. def dumpGraf(self,par,indent1,indent2=-1):
  255. if not self.wrapText:
  256. for line in par:
  257. print line
  258. return
  259. if indent2 == -1:
  260. indent2 = indent1
  261. text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
  262. sys.stdout.write(fill(text,
  263. width=72,
  264. initial_indent=" "*indent1,
  265. subsequent_indent=" "*indent2))
  266. def dumpPreheader(self, graf):
  267. self.dumpGraf(graf, 0)
  268. print
  269. def dumpMainhead(self, head):
  270. print head
  271. def dumpHeadGraf(self, graf):
  272. self.dumpGraf(graf, 2)
  273. print
  274. def dumpSectionHeader(self, header):
  275. print header
  276. def dumpStartOfSections(self):
  277. pass
  278. def dumpEndOfSections(self):
  279. pass
  280. def dumpEndOfSection(self):
  281. print
  282. def dumpEndOfChangelog(self):
  283. print
  284. def dumpDrupalBreak(self):
  285. pass
  286. def dumpItem(self, grafs):
  287. self.dumpGraf(grafs[0],4,6)
  288. for par in grafs[1:]:
  289. print
  290. self.dumpGraf(par,6,6)
  291. def collateAndSortSections(self):
  292. heads = []
  293. sectionsByHead = { }
  294. for _, head, items in self.sections:
  295. head = clean_head(head)
  296. try:
  297. s = sectionsByHead[head]
  298. except KeyError:
  299. s = sectionsByHead[head] = []
  300. heads.append( (head_score(head), head.lower(), head, s) )
  301. s.extend(items)
  302. heads.sort()
  303. self.sections = [ (0, head, items) for _1,_2,head,items in heads ]
  304. def dump(self):
  305. if self.prehead:
  306. self.dumpPreheader(self.prehead)
  307. if not self.blogOrder:
  308. self.dumpMainhead(self.mainhead)
  309. for par in self.headtext:
  310. self.dumpHeadGraf(par)
  311. if self.blogOrder:
  312. self.dumpMainhead(self.mainhead)
  313. drupalBreakAfter = None
  314. if self.drupalBreak and len(self.sections) > 4:
  315. drupalBreakAfter = self.sections[1][2]
  316. self.dumpStartOfSections()
  317. for _,head,items in self.sections:
  318. if not head.endswith(':'):
  319. print >>sys.stderr, "adding : to %r"%head
  320. head = head + ":"
  321. self.dumpSectionHeader(head)
  322. for _,grafs in items:
  323. self.dumpItem(grafs)
  324. self.dumpEndOfSection()
  325. if items is drupalBreakAfter:
  326. self.dumpDrupalBreak()
  327. self.dumpEndOfSections()
  328. self.dumpEndOfChangelog()
  329. class HTMLChangeLog(ChangeLog):
  330. def __init__(self, *args, **kwargs):
  331. ChangeLog.__init__(self, *args, **kwargs)
  332. def htmlText(self, graf):
  333. for line in graf:
  334. line = line.rstrip().replace("&","&amp;")
  335. line = line.rstrip().replace("<","&lt;").replace(">","&gt;")
  336. sys.stdout.write(line.strip())
  337. sys.stdout.write(" ")
  338. def htmlPar(self, graf):
  339. sys.stdout.write("<p>")
  340. self.htmlText(graf)
  341. sys.stdout.write("</p>\n")
  342. def dumpPreheader(self, graf):
  343. self.htmlPar(graf)
  344. def dumpMainhead(self, head):
  345. sys.stdout.write("<h2>%s</h2>"%head)
  346. def dumpHeadGraf(self, graf):
  347. self.htmlPar(graf)
  348. def dumpSectionHeader(self, header):
  349. header = header.replace(" o ", "", 1).lstrip()
  350. sys.stdout.write(" <li>%s\n"%header)
  351. sys.stdout.write(" <ul>\n")
  352. def dumpEndOfSection(self):
  353. sys.stdout.write(" </ul>\n\n")
  354. def dumpEndOfChangelog(self):
  355. pass
  356. def dumpStartOfSections(self):
  357. print "<ul>\n"
  358. def dumpEndOfSections(self):
  359. print "</ul>\n"
  360. def dumpDrupalBreak(self):
  361. print "\n</ul>\n"
  362. print "<p>&nbsp;</p>"
  363. print "\n<!--break-->\n\n"
  364. print "<ul>"
  365. def dumpItem(self, grafs):
  366. grafs[0][0] = grafs[0][0].replace(" - ", "", 1).lstrip()
  367. sys.stdout.write(" <li>")
  368. if len(grafs) > 1:
  369. for par in grafs:
  370. self.htmlPar(par)
  371. else:
  372. self.htmlText(grafs[0])
  373. print
  374. op = optparse.OptionParser(usage="usage: %prog [options] [filename]")
  375. op.add_option('-W', '--no-wrap', action='store_false',
  376. dest='wrapText', default=True,
  377. help='Do not re-wrap paragraphs')
  378. op.add_option('-S', '--no-sort', action='store_false',
  379. dest='sort', default=True,
  380. help='Do not sort or collate sections')
  381. op.add_option('-o', '--output', dest='output',
  382. default='-', metavar='FILE', help="write output to FILE")
  383. op.add_option('-H', '--html', action='store_true',
  384. dest='html', default=False,
  385. help="generate an HTML fragment")
  386. op.add_option('-1', '--first', action='store_true',
  387. dest='firstOnly', default=False,
  388. help="write only the first section")
  389. op.add_option('-b', '--blog-header', action='store_true',
  390. dest='blogOrder', default=False,
  391. help="Write the header in blog order")
  392. op.add_option('-B', '--blog', action='store_true',
  393. dest='blogFormat', default=False,
  394. help="Set all other options as appropriate for a blog post")
  395. op.add_option('--inplace', action='store_true',
  396. dest='inplace', default=False,
  397. help="Alter the ChangeLog in place")
  398. op.add_option('--drupal-break', action='store_true',
  399. dest='drupalBreak', default=False,
  400. help='Insert a drupal-friendly <!--break--> as needed')
  401. options,args = op.parse_args()
  402. if options.blogFormat:
  403. options.blogOrder = True
  404. options.html = True
  405. options.sort = False
  406. options.wrapText = False
  407. options.firstOnly = True
  408. options.drupalBreak = True
  409. if len(args) > 1:
  410. op.error("Too many arguments")
  411. elif len(args) == 0:
  412. fname = 'ChangeLog'
  413. else:
  414. fname = args[0]
  415. if options.inplace:
  416. assert options.output == '-'
  417. options.output = fname
  418. if fname != '-':
  419. sys.stdin = open(fname, 'r')
  420. nextline = None
  421. if options.html:
  422. ChangeLogClass = HTMLChangeLog
  423. else:
  424. ChangeLogClass = ChangeLog
  425. CL = ChangeLogClass(wrapText=options.wrapText,
  426. blogOrder=options.blogOrder,
  427. drupalBreak=options.drupalBreak)
  428. parser = head_parser
  429. for line in sys.stdin:
  430. line = line.rstrip()
  431. tp = parser(line)
  432. if tp == TP_SECHEAD:
  433. parser = body_parser
  434. elif tp == TP_END:
  435. nextline = line
  436. break
  437. CL.addLine(tp,line)
  438. CL.lint()
  439. if options.output != '-':
  440. fname_new = options.output+".new"
  441. fname_out = options.output
  442. sys.stdout = open(fname_new, 'w')
  443. else:
  444. fname_new = fname_out = None
  445. if options.sort:
  446. CL.collateAndSortSections()
  447. CL.dump()
  448. if options.firstOnly:
  449. sys.exit(0)
  450. if nextline is not None:
  451. print nextline
  452. for line in sys.stdin:
  453. sys.stdout.write(line)
  454. if fname_new is not None:
  455. os.rename(fname_new, fname_out)