format_changelog.py 15 KB


  1. #!/usr/bin/python
  2. # Copyright (c) 2014-2015, The Tor Project, Inc.
  3. # See LICENSE for licensing information
  4. #
  5. # This script reformats a section of the changelog to wrap everything to
  6. # the right width and put blank lines in the right places. Eventually,
  7. # it might include a linter.
  8. #
  9. # To run it, pipe a section of the changelog (starting with "Changes
  10. # in Tor 0.x.y.z-alpha" through the script.)
  11. import os
  12. import re
  13. import sys
  14. import optparse
  15. # ==============================
  16. # Oh, look! It's a cruddy approximation to Knuth's elegant text wrapping
  17. # algorithm, with totally ad hoc parameters!
  18. #
  19. # We're trying to minimize:
  20. # The total of the cubes of ragged space on underflowed intermediate lines,
  21. # PLUS
  22. # 100 * the fourth power of overflowed characters
  23. # PLUS
  24. # .1 * a bit more than the cube of ragged space on the last line.
  25. # PLUS
  26. # OPENPAREN_PENALTY for each line that starts with (
  27. #
  28. # We use an obvious dynamic programming algorithm to sorta approximate this.
  29. # It's not coded right or optimally, but it's fast enough for changelogs
  30. #
  31. # (Code found in an old directory of mine, lightly cleaned. -NM)
  32. NO_HYPHENATE=set("""
  33. pf-divert
  34. tor-resolve
  35. tor-gencert
  36. """.split())
  37. LASTLINE_UNDERFLOW_EXPONENT = 1
  38. LASTLINE_UNDERFLOW_PENALTY = 1
  39. UNDERFLOW_EXPONENT = 3
  40. UNDERFLOW_PENALTY = 1
  41. OVERFLOW_EXPONENT = 4
  42. OVERFLOW_PENALTY = 2000
  43. ORPHAN_PENALTY = 10000
  44. OPENPAREN_PENALTY = 200
  45. def generate_wrapping(words, divisions):
  46. lines = []
  47. last = 0
  48. for i in divisions:
  49. w = words[last:i]
  50. last = i
  51. line = " ".join(w).replace("\xff ","-").replace("\xff","-")
  52. lines.append(line.strip())
  53. return lines
  54. def wrapping_quality(words, divisions, width1, width2):
  55. total = 0.0
  56. lines = generate_wrapping(words, divisions)
  57. for line in lines:
  58. length = len(line)
  59. if line is lines[0]:
  60. width = width1
  61. else:
  62. width = width2
  63. if line[0:1] == '(':
  64. total += OPENPAREN_PENALTY
  65. if length > width:
  66. total += OVERFLOW_PENALTY * (
  67. (length - width) ** OVERFLOW_EXPONENT )
  68. else:
  69. if line is lines[-1]:
  70. e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
  71. if " " not in line:
  72. total += ORPHAN_PENALTY
  73. else:
  74. e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
  75. total += p * ((width - length) ** e)
  76. return total
  77. def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
  78. wrapping_after = [ (0,), ]
  79. w1 = width - prefix_len1
  80. w2 = width - prefix_len2
  81. for i in range(1, len(words)+1):
  82. best_so_far = None
  83. best_score = 1e300
  84. for j in range(i):
  85. t = wrapping_after[j]
  86. t1 = t[:-1] + (i,)
  87. t2 = t + (i,)
  88. wq1 = wrapping_quality(words, t1, w1, w2)
  89. wq2 = wrapping_quality(words, t2, w1, w2)
  90. if wq1 < best_score:
  91. best_so_far = t1
  92. best_score = wq1
  93. if wq2 < best_score:
  94. best_so_far = t2
  95. best_score = wq2
  96. wrapping_after.append( best_so_far )
  97. lines = generate_wrapping(words, wrapping_after[-1])
  98. return lines
  99. def hyphenatable(word):
  100. if "--" in word:
  101. return False
  102. if re.match(r'^[^\d\-]\D*-', word):
  103. stripped = re.sub(r'^\W+','',word)
  104. stripped = re.sub(r'\W+$','',word)
  105. return stripped not in NO_HYPHENATE
  106. else:
  107. return False
  108. def split_paragraph(s):
  109. "Split paragraph into words; tuned for Tor."
  110. r = []
  111. for word in s.split():
  112. if hyphenatable(word):
  113. while "-" in word:
  114. a,word = word.split("-",1)
  115. r.append(a+"\xff")
  116. r.append(word)
  117. return r
  118. def fill(text, width, initial_indent, subsequent_indent):
  119. words = split_paragraph(text)
  120. lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
  121. width)
  122. res = [ initial_indent, lines[0], "\n" ]
  123. for line in lines[1:]:
  124. res.append(subsequent_indent)
  125. res.append(line)
  126. res.append("\n")
  127. return "".join(res)
  128. # ==============================
  129. TP_MAINHEAD = 0
  130. TP_HEADTEXT = 1
  131. TP_BLANK = 2
  132. TP_SECHEAD = 3
  133. TP_ITEMFIRST = 4
  134. TP_ITEMBODY = 5
  135. TP_END = 6
  136. TP_PREHEAD = 7
  137. def head_parser(line):
  138. if re.match(r'^Changes in', line):
  139. return TP_MAINHEAD
  140. elif re.match(r'^[A-Za-z]', line):
  141. return TP_PREHEAD
  142. elif re.match(r'^ o ', line):
  143. return TP_SECHEAD
  144. elif re.match(r'^\s*$', line):
  145. return TP_BLANK
  146. else:
  147. return TP_HEADTEXT
  148. def body_parser(line):
  149. if re.match(r'^ o ', line):
  150. return TP_SECHEAD
  151. elif re.match(r'^ -',line):
  152. return TP_ITEMFIRST
  153. elif re.match(r'^ \S', line):
  154. return TP_ITEMBODY
  155. elif re.match(r'^\s*$', line):
  156. return TP_BLANK
  157. elif re.match(r'^Changes in', line):
  158. return TP_END
  159. elif re.match(r'^\s+\S', line):
  160. return TP_HEADTEXT
  161. else:
  162. print "Weird line %r"%line
  163. def clean_head(head):
  164. return head
  165. def head_score(s):
  166. m = re.match(r'^ +o (.*)', s)
  167. if not m:
  168. print >>sys.stderr, "Can't score %r"%s
  169. return 99999
  170. lw = m.group(1).lower()
  171. if lw.startswith("security") and "feature" not in lw:
  172. score = -300
  173. elif lw.startswith("deprecated version"):
  174. score = -200
  175. elif (('new' in lw and 'requirement' in lw) or
  176. ('new' in lw and 'dependenc' in lw) or
  177. ('build' in lw and 'requirement' in lw) or
  178. ('removed' in lw and 'platform' in lw)):
  179. score = -100
  180. elif lw.startswith("major feature"):
  181. score = 00
  182. elif lw.startswith("major bug"):
  183. score = 50
  184. elif lw.startswith("major"):
  185. score = 70
  186. elif lw.startswith("minor feature"):
  187. score = 200
  188. elif lw.startswith("minor bug"):
  189. score = 250
  190. elif lw.startswith("minor"):
  191. score = 270
  192. else:
  193. score = 1000
  194. if 'secur' in lw:
  195. score -= 2
  196. if "(other)" in lw:
  197. score += 2
  198. if '(' not in lw:
  199. score -= 1
  200. return score
  201. class ChangeLog(object):
  202. def __init__(self, wrapText=True, blogOrder=True, drupalBreak=False):
  203. self.prehead = []
  204. self.mainhead = None
  205. self.headtext = []
  206. self.curgraf = None
  207. self.sections = []
  208. self.cursection = None
  209. self.lineno = 0
  210. self.wrapText = wrapText
  211. self.blogOrder = blogOrder
  212. self.drupalBreak = drupalBreak
  213. def addLine(self, tp, line):
  214. self.lineno += 1
  215. if tp == TP_MAINHEAD:
  216. assert not self.mainhead
  217. self.mainhead = line
  218. elif tp == TP_PREHEAD:
  219. self.prehead.append(line)
  220. elif tp == TP_HEADTEXT:
  221. if self.curgraf is None:
  222. self.curgraf = []
  223. self.headtext.append(self.curgraf)
  224. self.curgraf.append(line)
  225. elif tp == TP_BLANK:
  226. self.curgraf = None
  227. elif tp == TP_SECHEAD:
  228. self.cursection = [ self.lineno, line, [] ]
  229. self.sections.append(self.cursection)
  230. elif tp == TP_ITEMFIRST:
  231. item = ( self.lineno, [ [line] ])
  232. self.curgraf = item[1][0]
  233. self.cursection[2].append(item)
  234. elif tp == TP_ITEMBODY:
  235. if self.curgraf is None:
  236. self.curgraf = []
  237. self.cursection[2][-1][1].append(self.curgraf)
  238. self.curgraf.append(line)
  239. else:
  240. assert "This" is "unreachable"
  241. def lint_head(self, line, head):
  242. m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)
  243. if not m:
  244. print >>sys.stderr, "Weird header format on line %s"%line
  245. def lint_item(self, line, grafs, head_type):
  246. pass
  247. def lint(self):
  248. self.head_lines = {}
  249. for sec_line, sec_head, items in self.sections:
  250. head_type = self.lint_head(sec_line, sec_head)
  251. for item_line, grafs in items:
  252. self.lint_item(item_line, grafs, head_type)
  253. def dumpGraf(self,par,indent1,indent2=-1):
  254. if not self.wrapText:
  255. for line in par:
  256. print line
  257. return
  258. if indent2 == -1:
  259. indent2 = indent1
  260. text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
  261. sys.stdout.write(fill(text,
  262. width=72,
  263. initial_indent=" "*indent1,
  264. subsequent_indent=" "*indent2))
  265. def dumpPreheader(self, graf):
  266. self.dumpGraf(graf, 0)
  267. print
  268. def dumpMainhead(self, head):
  269. print head
  270. def dumpHeadGraf(self, graf):
  271. self.dumpGraf(graf, 2)
  272. print
  273. def dumpSectionHeader(self, header):
  274. print header
  275. def dumpStartOfSections(self):
  276. pass
  277. def dumpEndOfSections(self):
  278. pass
  279. def dumpEndOfSection(self):
  280. print
  281. def dumpEndOfChangelog(self):
  282. print
  283. def dumpDrupalBreak(self):
  284. pass
  285. def dumpItem(self, grafs):
  286. self.dumpGraf(grafs[0],4,6)
  287. for par in grafs[1:]:
  288. print
  289. self.dumpGraf(par,6,6)
  290. def collateAndSortSections(self):
  291. heads = []
  292. sectionsByHead = { }
  293. for _, head, items in self.sections:
  294. head = clean_head(head)
  295. try:
  296. s = sectionsByHead[head]
  297. except KeyError:
  298. s = sectionsByHead[head] = []
  299. heads.append( (head_score(head), head.lower(), head, s) )
  300. s.extend(items)
  301. heads.sort()
  302. self.sections = [ (0, head, items) for _1,_2,head,items in heads ]
  303. def dump(self):
  304. if self.prehead:
  305. self.dumpPreheader(self.prehead)
  306. if not self.blogOrder:
  307. self.dumpMainhead(self.mainhead)
  308. for par in self.headtext:
  309. self.dumpHeadGraf(par)
  310. if self.blogOrder:
  311. self.dumpMainhead(self.mainhead)
  312. drupalBreakAfter = None
  313. if self.drupalBreak and len(self.sections) > 4:
  314. drupalBreakAfter = self.sections[1][2]
  315. self.dumpStartOfSections()
  316. for _,head,items in self.sections:
  317. if not head.endswith(':'):
  318. print >>sys.stderr, "adding : to %r"%head
  319. head = head + ":"
  320. self.dumpSectionHeader(head)
  321. for _,grafs in items:
  322. self.dumpItem(grafs)
  323. self.dumpEndOfSection()
  324. if items is drupalBreakAfter:
  325. self.dumpDrupalBreak()
  326. self.dumpEndOfSections()
  327. self.dumpEndOfChangelog()
  328. class HTMLChangeLog(ChangeLog):
  329. def __init__(self, *args, **kwargs):
  330. ChangeLog.__init__(self, *args, **kwargs)
  331. def htmlText(self, graf):
  332. for line in graf:
  333. line = line.rstrip().replace("&","&amp;")
  334. line = line.rstrip().replace("<","&lt;").replace(">","&gt;")
  335. sys.stdout.write(line.strip())
  336. sys.stdout.write(" ")
  337. def htmlPar(self, graf):
  338. sys.stdout.write("<p>")
  339. self.htmlText(graf)
  340. sys.stdout.write("</p>\n")
  341. def dumpPreheader(self, graf):
  342. self.htmlPar(graf)
  343. def dumpMainhead(self, head):
  344. sys.stdout.write("<h2>%s</h2>"%head)
  345. def dumpHeadGraf(self, graf):
  346. self.htmlPar(graf)
  347. def dumpSectionHeader(self, header):
  348. header = header.replace(" o ", "", 1).lstrip()
  349. sys.stdout.write(" <li>%s\n"%header)
  350. sys.stdout.write(" <ul>\n")
  351. def dumpEndOfSection(self):
  352. sys.stdout.write(" </ul>\n\n")
  353. def dumpEndOfChangelog(self):
  354. pass
  355. def dumpStartOfSections(self):
  356. print "<ul>\n"
  357. def dumpEndOfSections(self):
  358. print "</ul>\n"
  359. def dumpDrupalBreak(self):
  360. print "\n</ul>\n"
  361. print "<p>&nbsp;</p>"
  362. print "\n<!--break-->\n\n"
  363. print "<ul>"
  364. def dumpItem(self, grafs):
  365. grafs[0][0] = grafs[0][0].replace(" - ", "", 1).lstrip()
  366. sys.stdout.write(" <li>")
  367. if len(grafs) > 1:
  368. for par in grafs:
  369. self.htmlPar(par)
  370. else:
  371. self.htmlText(grafs[0])
  372. print
  373. op = optparse.OptionParser(usage="usage: %prog [options] [filename]")
  374. op.add_option('-W', '--no-wrap', action='store_false',
  375. dest='wrapText', default=True,
  376. help='Do not re-wrap paragraphs')
  377. op.add_option('-S', '--no-sort', action='store_false',
  378. dest='sort', default=True,
  379. help='Do not sort or collate sections')
  380. op.add_option('-o', '--output', dest='output',
  381. default='-', metavar='FILE', help="write output to FILE")
  382. op.add_option('-H', '--html', action='store_true',
  383. dest='html', default=False,
  384. help="generate an HTML fragment")
  385. op.add_option('-1', '--first', action='store_true',
  386. dest='firstOnly', default=False,
  387. help="write only the first section")
  388. op.add_option('-b', '--blog-header', action='store_true',
  389. dest='blogOrder', default=False,
  390. help="Write the header in blog order")
  391. op.add_option('-B', '--blog', action='store_true',
  392. dest='blogFormat', default=False,
  393. help="Set all other options as appropriate for a blog post")
  394. op.add_option('--inplace', action='store_true',
  395. dest='inplace', default=False,
  396. help="Alter the ChangeLog in place")
  397. op.add_option('--drupal-break', action='store_true',
  398. dest='drupalBreak', default=False,
  399. help='Insert a drupal-friendly <!--break--> as needed')
  400. options,args = op.parse_args()
  401. if options.blogFormat:
  402. options.blogOrder = True
  403. options.html = True
  404. options.sort = False
  405. options.wrapText = False
  406. options.firstOnly = True
  407. options.drupalBreak = True
  408. if len(args) > 1:
  409. op.error("Too many arguments")
  410. elif len(args) == 0:
  411. fname = 'ChangeLog'
  412. else:
  413. fname = args[0]
  414. if options.inplace:
  415. assert options.output == '-'
  416. options.output = fname
  417. if fname != '-':
  418. sys.stdin = open(fname, 'r')
  419. nextline = None
  420. if options.html:
  421. ChangeLogClass = HTMLChangeLog
  422. else:
  423. ChangeLogClass = ChangeLog
  424. CL = ChangeLogClass(wrapText=options.wrapText,
  425. blogOrder=options.blogOrder,
  426. drupalBreak=options.drupalBreak)
  427. parser = head_parser
  428. for line in sys.stdin:
  429. line = line.rstrip()
  430. tp = parser(line)
  431. if tp == TP_SECHEAD:
  432. parser = body_parser
  433. elif tp == TP_END:
  434. nextline = line
  435. break
  436. CL.addLine(tp,line)
  437. CL.lint()
  438. if options.output != '-':
  439. fname_new = options.output+".new"
  440. fname_out = options.output
  441. sys.stdout = open(fname_new, 'w')
  442. else:
  443. fname_new = fname_out = None
  444. if options.sort:
  445. CL.collateAndSortSections()
  446. CL.dump()
  447. if options.firstOnly:
  448. sys.exit(0)
  449. if nextline is not None:
  450. print nextline
  451. for line in sys.stdin:
  452. sys.stdout.write(line)
  453. if fname_new is not None:
  454. os.rename(fname_new, fname_out)