format_changelog.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. #!/usr/bin/python
  2. # Copyright (c) 2014, The Tor Project, Inc.
  3. # See LICENSE for licensing information
  4. #
  5. # This script reformats a section of the changelog to wrap everything to
  6. # the right width and put blank lines in the right places. Eventually,
  7. # it might include a linter.
  8. #
  9. # To run it, pipe a section of the changelog (starting with "Changes
  10. # in Tor 0.x.y.z-alpha" through the script.)
  11. import os
  12. import re
  13. import sys
  14. import optparse
  15. # ==============================
  16. # Oh, look! It's a cruddy approximation to Knuth's elegant text wrapping
  17. # algorithm, with totally ad hoc parameters!
  18. #
  19. # We're trying to minimize:
  20. # The total of the cubes of ragged space on underflowed intermediate lines,
  21. # PLUS
  22. # 100 * the fourth power of overflowed characters
  23. # PLUS
  24. # .1 * a bit more than the cube of ragged space on the last line.
  25. # PLUS
  26. # OPENPAREN_PENALTY for each line that starts with (
  27. #
  28. # We use an obvious dynamic programming algorithm to sorta approximate this.
  29. # It's not coded right or optimally, but it's fast enough for changelogs
  30. #
  31. # (Code found in an old directory of mine, lightly cleaned. -NM)
  32. NO_HYPHENATE=set("""
  33. pf-divert
  34. tor-resolve
  35. tor-gencert
  36. tor-fw-helper
  37. """.split())
  38. LASTLINE_UNDERFLOW_EXPONENT = 1
  39. LASTLINE_UNDERFLOW_PENALTY = 1
  40. UNDERFLOW_EXPONENT = 3
  41. UNDERFLOW_PENALTY = 1
  42. OVERFLOW_EXPONENT = 4
  43. OVERFLOW_PENALTY = 2000
  44. ORPHAN_PENALTY = 10000
  45. OPENPAREN_PENALTY = 200
  46. def generate_wrapping(words, divisions):
  47. lines = []
  48. last = 0
  49. for i in divisions:
  50. w = words[last:i]
  51. last = i
  52. line = " ".join(w).replace("\xff ","-").replace("\xff","-")
  53. lines.append(line)
  54. return lines
  55. def wrapping_quality(words, divisions, width1, width2):
  56. total = 0.0
  57. lines = generate_wrapping(words, divisions)
  58. for line in lines:
  59. length = len(line)
  60. if line is lines[0]:
  61. width = width1
  62. else:
  63. width = width2
  64. if line[0:1] == '(':
  65. total += OPENPAREN_PENALTY
  66. if length > width:
  67. total += OVERFLOW_PENALTY * (
  68. (length - width) ** OVERFLOW_EXPONENT )
  69. else:
  70. if line is lines[-1]:
  71. e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
  72. if " " not in line:
  73. total += ORPHAN_PENALTY
  74. else:
  75. e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
  76. total += p * ((width - length) ** e)
  77. return total
  78. def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
  79. wrapping_after = [ (0,), ]
  80. w1 = width - prefix_len1
  81. w2 = width - prefix_len2
  82. for i in range(1, len(words)+1):
  83. best_so_far = None
  84. best_score = 1e300
  85. for j in range(i):
  86. t = wrapping_after[j]
  87. t1 = t[:-1] + (i,)
  88. t2 = t + (i,)
  89. wq1 = wrapping_quality(words, t1, w1, w2)
  90. wq2 = wrapping_quality(words, t2, w1, w2)
  91. if wq1 < best_score:
  92. best_so_far = t1
  93. best_score = wq1
  94. if wq2 < best_score:
  95. best_so_far = t2
  96. best_score = wq2
  97. wrapping_after.append( best_so_far )
  98. lines = generate_wrapping(words, wrapping_after[-1])
  99. return lines
  100. def hyphenatable(word):
  101. if "--" in word:
  102. return False
  103. if re.match(r'^[^\d\-]\D*-', word):
  104. stripped = re.sub(r'^\W+','',word)
  105. stripped = re.sub(r'\W+$','',word)
  106. return stripped not in NO_HYPHENATE
  107. else:
  108. return False
  109. def split_paragraph(s):
  110. "Split paragraph into words; tuned for Tor."
  111. r = []
  112. for word in s.split():
  113. if hyphenatable(word):
  114. while "-" in word:
  115. a,word = word.split("-",1)
  116. r.append(a+"\xff")
  117. r.append(word)
  118. return r
  119. def fill(text, width, initial_indent, subsequent_indent):
  120. words = split_paragraph(text)
  121. lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
  122. width)
  123. res = [ initial_indent, lines[0], "\n" ]
  124. for line in lines[1:]:
  125. res.append(subsequent_indent)
  126. res.append(line)
  127. res.append("\n")
  128. return "".join(res)
  129. # ==============================
  130. TP_MAINHEAD = 0
  131. TP_HEADTEXT = 1
  132. TP_BLANK = 2
  133. TP_SECHEAD = 3
  134. TP_ITEMFIRST = 4
  135. TP_ITEMBODY = 5
  136. TP_END = 6
  137. TP_PREHEAD = 7
  138. def head_parser(line):
  139. if re.match(r'^Changes in', line):
  140. return TP_MAINHEAD
  141. elif re.match(r'^[A-Za-z]', line):
  142. return TP_PREHEAD
  143. elif re.match(r'^ o ', line):
  144. return TP_SECHEAD
  145. elif re.match(r'^\s*$', line):
  146. return TP_BLANK
  147. else:
  148. return TP_HEADTEXT
  149. def body_parser(line):
  150. if re.match(r'^ o ', line):
  151. return TP_SECHEAD
  152. elif re.match(r'^ -',line):
  153. return TP_ITEMFIRST
  154. elif re.match(r'^ \S', line):
  155. return TP_ITEMBODY
  156. elif re.match(r'^\s*$', line):
  157. return TP_BLANK
  158. elif re.match(r'^Changes in', line):
  159. return TP_END
  160. elif re.match(r'^\s+\S', line):
  161. return TP_HEADTEXT
  162. else:
  163. print "Weird line %r"%line
  164. def clean_head(head):
  165. return head
  166. def head_score(s):
  167. m = re.match(r'^ +o (.*)', s)
  168. if not m:
  169. print >>sys.stderr, "Can't score %r"%s
  170. return 99999
  171. lw = m.group(1).lower()
  172. if lw.startswith("security") and "feature" not in lw:
  173. score = -300
  174. elif lw.startswith("deprecated versions"):
  175. score = -200
  176. elif "build require" in lw:
  177. score = -100
  178. elif lw.startswith("major feature"):
  179. score = 00
  180. elif lw.startswith("major bug"):
  181. score = 50
  182. elif lw.startswith("major"):
  183. score = 70
  184. elif lw.startswith("minor feature"):
  185. score = 200
  186. elif lw.startswith("minor bug"):
  187. score = 250
  188. elif lw.startswith("minor"):
  189. score = 270
  190. else:
  191. score = 1000
  192. if 'secur' in lw:
  193. score -= 2
  194. if "(other)" in lw:
  195. score += 2
  196. if '(' not in lw:
  197. score -= 1
  198. return score
  199. class ChangeLog(object):
  200. def __init__(self, wrapText=True):
  201. self.prehead = []
  202. self.mainhead = None
  203. self.headtext = []
  204. self.curgraf = None
  205. self.sections = []
  206. self.cursection = None
  207. self.lineno = 0
  208. self.wrapText = wrapText
  209. def addLine(self, tp, line):
  210. self.lineno += 1
  211. if tp == TP_MAINHEAD:
  212. assert not self.mainhead
  213. self.mainhead = line
  214. elif tp == TP_PREHEAD:
  215. self.prehead.append(line)
  216. elif tp == TP_HEADTEXT:
  217. if self.curgraf is None:
  218. self.curgraf = []
  219. self.headtext.append(self.curgraf)
  220. self.curgraf.append(line)
  221. elif tp == TP_BLANK:
  222. self.curgraf = None
  223. elif tp == TP_SECHEAD:
  224. self.cursection = [ self.lineno, line, [] ]
  225. self.sections.append(self.cursection)
  226. elif tp == TP_ITEMFIRST:
  227. item = ( self.lineno, [ [line] ])
  228. self.curgraf = item[1][0]
  229. self.cursection[2].append(item)
  230. elif tp == TP_ITEMBODY:
  231. if self.curgraf is None:
  232. self.curgraf = []
  233. self.cursection[2][-1][1].append(self.curgraf)
  234. self.curgraf.append(line)
  235. else:
  236. assert "This" is "unreachable"
  237. def lint_head(self, line, head):
  238. m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)
  239. if not m:
  240. print >>sys.stderr, "Weird header format on line %s"%line
  241. def lint_item(self, line, grafs, head_type):
  242. pass
  243. def lint(self):
  244. self.head_lines = {}
  245. for sec_line, sec_head, items in self.sections:
  246. head_type = self.lint_head(sec_line, sec_head)
  247. for item_line, grafs in items:
  248. self.lint_item(item_line, grafs, head_type)
  249. def dumpGraf(self,par,indent1,indent2=-1):
  250. if not self.wrapText:
  251. for line in par:
  252. print line
  253. return
  254. if indent2 == -1:
  255. indent2 = indent1
  256. text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
  257. sys.stdout.write(fill(text,
  258. width=72,
  259. initial_indent=" "*indent1,
  260. subsequent_indent=" "*indent2))
  261. def collateAndSortSections(self):
  262. heads = []
  263. sectionsByHead = { }
  264. for _, head, items in self.sections:
  265. head = clean_head(head)
  266. try:
  267. s = sectionsByHead[head]
  268. except KeyError:
  269. s = sectionsByHead[head] = []
  270. heads.append( (head_score(head), head.lower(), head, s) )
  271. s.extend(items)
  272. heads.sort()
  273. self.sections = [ (0, head, items) for _1,_2,head,items in heads ]
  274. def dump(self):
  275. if self.prehead:
  276. self.dumpGraf(self.prehead, 0)
  277. print
  278. print self.mainhead
  279. for par in self.headtext:
  280. self.dumpGraf(par, 2)
  281. print
  282. for _,head,items in self.sections:
  283. if not head.endswith(':'):
  284. print >>sys.stderr, "adding : to %r"%head
  285. head = head + ":"
  286. print head
  287. for _,grafs in items:
  288. self.dumpGraf(grafs[0],4,6)
  289. for par in grafs[1:]:
  290. print
  291. self.dumpGraf(par,6,6)
  292. print
  293. print
  294. op = optparse.OptionParser(usage="usage: %prog [options] [filename]")
  295. op.add_option('-W', '--no-wrap', action='store_false',
  296. dest='wrapText', default=True,
  297. help='Do not re-wrap paragraphs')
  298. op.add_option('-S', '--no-sort', action='store_false',
  299. dest='sort', default=True,
  300. help='Do not sort or collate sections')
  301. op.add_option('-o', '--output', dest='output',
  302. default=None, metavar='FILE', help="write output to FILE")
  303. options,args = op.parse_args()
  304. if len(args) > 1:
  305. op.error("Too many arguments")
  306. elif len(args) == 0:
  307. fname = 'ChangeLog'
  308. else:
  309. fname = args[0]
  310. if options.output == None:
  311. options.output = fname
  312. if fname != '-':
  313. sys.stdin = open(fname, 'r')
  314. nextline = None
  315. CL = ChangeLog(wrapText=options.wrapText)
  316. parser = head_parser
  317. for line in sys.stdin:
  318. line = line.rstrip()
  319. tp = parser(line)
  320. if tp == TP_SECHEAD:
  321. parser = body_parser
  322. elif tp == TP_END:
  323. nextline = line
  324. break
  325. CL.addLine(tp,line)
  326. CL.lint()
  327. if options.output != '-':
  328. fname_new = options.output+".new"
  329. fname_out = options.output
  330. sys.stdout = open(fname_new, 'w')
  331. else:
  332. fname_new = fname_out = None
  333. if options.sort:
  334. CL.collateAndSortSections()
  335. CL.dump()
  336. if nextline is not None:
  337. print nextline
  338. for line in sys.stdin:
  339. sys.stdout.write(line)
  340. if fname_new is not None:
  341. os.rename(fname_new, fname_out)