format_changelog.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. #!/usr/bin/python
  2. # Copyright (c) 2014, The Tor Project, Inc.
  3. # See LICENSE for licensing information
  4. #
  5. # This script reformats a section of the changelog to wrap everything to
  6. # the right width and put blank lines in the right places. Eventually,
  7. # it might include a linter.
  8. #
  9. # To run it, pipe a section of the changelog (starting with "Changes
  10. # in Tor 0.x.y.z-alpha" through the script.)
  11. import os
  12. import re
  13. import sys
  14. # ==============================
  15. # Oh, look! It's a cruddy approximation to Knuth's elegant text wrapping
  16. # algorithm, with totally ad hoc parameters!
  17. #
  18. # We're trying to minimize:
  19. # The total of the cubes of ragged space on underflowed intermediate lines,
  20. # PLUS
  21. # 100 * the fourth power of overflowed characters
  22. # PLUS
  23. # .1 * a bit more than the cube of ragged space on the last line.
  24. # PLUS
  25. # OPENPAREN_PENALTY for each line that starts with (
  26. #
  27. # We use an obvious dynamic programming algorithm to sorta approximate this.
  28. # It's not coded right or optimally, but it's fast enough for changelogs
  29. #
  30. # (Code found in an old directory of mine, lightly cleaned. -NM)
  31. NO_HYPHENATE=set("""
  32. pf-divert
  33. """.split())
  34. LASTLINE_UNDERFLOW_EXPONENT = 1
  35. LASTLINE_UNDERFLOW_PENALTY = 1
  36. UNDERFLOW_EXPONENT = 3
  37. UNDERFLOW_PENALTY = 1
  38. OVERFLOW_EXPONENT = 4
  39. OVERFLOW_PENALTY = 2000
  40. ORPHAN_PENALTY = 10000
  41. OPENPAREN_PENALTY = 200
  42. def generate_wrapping(words, divisions):
  43. lines = []
  44. last = 0
  45. for i in divisions:
  46. w = words[last:i]
  47. last = i
  48. line = " ".join(w).replace("\xff ","-").replace("\xff","-")
  49. lines.append(line)
  50. return lines
  51. def wrapping_quality(words, divisions, width1, width2):
  52. total = 0.0
  53. lines = generate_wrapping(words, divisions)
  54. for line in lines:
  55. length = len(line)
  56. if line is lines[0]:
  57. width = width1
  58. else:
  59. width = width2
  60. if line[0:1] == '(':
  61. total += OPENPAREN_PENALTY
  62. if length > width:
  63. total += OVERFLOW_PENALTY * (
  64. (length - width) ** OVERFLOW_EXPONENT )
  65. else:
  66. if line is lines[-1]:
  67. e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
  68. if " " not in line:
  69. total += ORPHAN_PENALTY
  70. else:
  71. e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
  72. total += p * ((width - length) ** e)
  73. return total
  74. def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
  75. wrapping_after = [ (0,), ]
  76. w1 = width - prefix_len1
  77. w2 = width - prefix_len2
  78. for i in range(1, len(words)+1):
  79. best_so_far = None
  80. best_score = 1e300
  81. for j in range(i):
  82. t = wrapping_after[j]
  83. t1 = t[:-1] + (i,)
  84. t2 = t + (i,)
  85. wq1 = wrapping_quality(words, t1, w1, w2)
  86. wq2 = wrapping_quality(words, t2, w1, w2)
  87. if wq1 < best_score:
  88. best_so_far = t1
  89. best_score = wq1
  90. if wq2 < best_score:
  91. best_so_far = t2
  92. best_score = wq2
  93. wrapping_after.append( best_so_far )
  94. lines = generate_wrapping(words, wrapping_after[-1])
  95. return lines
  96. def hyphenateable(word):
  97. if re.match(r'^[^\d\-]\D*-', word):
  98. stripped = re.sub(r'^\W+','',word)
  99. stripped = re.sub(r'\W+$','',word)
  100. return stripped not in NO_HYPHENATE
  101. else:
  102. return False
  103. def split_paragraph(s):
  104. "Split paragraph into words; tuned for Tor."
  105. r = []
  106. for word in s.split():
  107. if hyphenateable(word):
  108. while "-" in word:
  109. a,word = word.split("-",1)
  110. r.append(a+"\xff")
  111. r.append(word)
  112. return r
  113. def fill(text, width, initial_indent, subsequent_indent):
  114. words = split_paragraph(text)
  115. lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
  116. width)
  117. res = [ initial_indent, lines[0], "\n" ]
  118. for line in lines[1:]:
  119. res.append(subsequent_indent)
  120. res.append(line)
  121. res.append("\n")
  122. return "".join(res)
  123. # ==============================
  124. TP_MAINHEAD = 0
  125. TP_HEADTEXT = 1
  126. TP_BLANK = 2
  127. TP_SECHEAD = 3
  128. TP_ITEMFIRST = 4
  129. TP_ITEMBODY = 5
  130. TP_END = 6
  131. def head_parser(line):
  132. if re.match(r'^[A-Z]', line):
  133. return TP_MAINHEAD
  134. elif re.match(r'^ o ', line):
  135. return TP_SECHEAD
  136. elif re.match(r'^\s*$', line):
  137. return TP_BLANK
  138. else:
  139. return TP_HEADTEXT
  140. def body_parser(line):
  141. if re.match(r'^ o ', line):
  142. return TP_SECHEAD
  143. elif re.match(r'^ -',line):
  144. return TP_ITEMFIRST
  145. elif re.match(r'^ \S', line):
  146. return TP_ITEMBODY
  147. elif re.match(r'^\s*$', line):
  148. return TP_BLANK
  149. elif re.match(r'^Changes in', line):
  150. return TP_END
  151. else:
  152. print "Weird line %r"%line
  153. class ChangeLog(object):
  154. def __init__(self):
  155. self.mainhead = None
  156. self.headtext = []
  157. self.curgraf = None
  158. self.sections = []
  159. self.cursection = None
  160. self.lineno = 0
  161. def addLine(self, tp, line):
  162. self.lineno += 1
  163. if tp == TP_MAINHEAD:
  164. assert not self.mainhead
  165. self.mainhead = line
  166. elif tp == TP_HEADTEXT:
  167. if self.curgraf is None:
  168. self.curgraf = []
  169. self.headtext.append(self.curgraf)
  170. self.curgraf.append(line)
  171. elif tp == TP_BLANK:
  172. self.curgraf = None
  173. elif tp == TP_SECHEAD:
  174. self.cursection = [ self.lineno, line, [] ]
  175. self.sections.append(self.cursection)
  176. elif tp == TP_ITEMFIRST:
  177. item = ( self.lineno, [ [line] ])
  178. self.curgraf = item[1][0]
  179. self.cursection[2].append(item)
  180. elif tp == TP_ITEMBODY:
  181. if self.curgraf is None:
  182. self.curgraf = []
  183. self.cursection[2][-1][1].append(self.curgraf)
  184. self.curgraf.append(line)
  185. else:
  186. assert "This" is "unreachable"
  187. def lint_head(self, line, head):
  188. m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)
  189. if not m:
  190. print >>sys.stderr, "Weird header format on line %s"%line
  191. def lint_item(self, line, grafs, head_type):
  192. pass
  193. def lint(self):
  194. self.head_lines = {}
  195. for sec_line, sec_head, items in self.sections:
  196. head_type = self.lint_head(sec_line, sec_head)
  197. for item_line, grafs in items:
  198. self.lint_item(item_line, grafs, head_type)
  199. def dumpGraf(self,par,indent1,indent2=-1):
  200. if indent2 == -1:
  201. indent2 = indent1
  202. text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
  203. sys.stdout.write(fill(text,
  204. width=72,
  205. initial_indent=" "*indent1,
  206. subsequent_indent=" "*indent2))
  207. def dump(self):
  208. print self.mainhead
  209. for par in self.headtext:
  210. self.dumpGraf(par, 2)
  211. print
  212. for _,head,items in self.sections:
  213. if not head.endswith(':'):
  214. print >>sys.stderr, "adding : to %r"%head
  215. head = head + ":"
  216. print head
  217. for _,grafs in items:
  218. self.dumpGraf(grafs[0],4,6)
  219. for par in grafs[1:]:
  220. print
  221. self.dumpGraf(par,6,6)
  222. print
  223. print
  224. CL = ChangeLog()
  225. parser = head_parser
  226. if len(sys.argv) == 1:
  227. fname = 'ChangeLog'
  228. else:
  229. fname = sys.argv[1]
  230. fname_new = fname+".new"
  231. sys.stdin = open(fname, 'r')
  232. nextline = None
  233. for line in sys.stdin:
  234. line = line.rstrip()
  235. tp = parser(line)
  236. if tp == TP_SECHEAD:
  237. parser = body_parser
  238. elif tp == TP_END:
  239. nextline = line
  240. break
  241. CL.addLine(tp,line)
  242. CL.lint()
  243. sys.stdout = open(fname_new, 'w')
  244. CL.dump()
  245. if nextline is not None:
  246. print nextline
  247. for line in sys.stdin:
  248. sys.stdout.write(line)
  249. os.rename(fname_new, fname)