123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291 |
- #!/usr/bin/python
- # Copyright (c) 2014, The Tor Project, Inc.
- # See LICENSE for licensing information
- #
- # This script reformats a section of the changelog to wrap everything to
- # the right width and put blank lines in the right places. Eventually,
- # it might include a linter.
- #
- # To run it, pipe a section of the changelog (starting with "Changes
- # in Tor 0.x.y.z-alpha" through the script.)
- import os
- import re
- import sys
- # ==============================
- # Oh, look! It's a cruddy approximation to Knuth's elegant text wrapping
- # algorithm, with totally ad hoc parameters!
- #
- # We're trying to minimize:
- # The total of the cubes of ragged space on underflowed intermediate lines,
- # PLUS
- # 100 * the fourth power of overflowed characters
- # PLUS
- # .1 * a bit more than the cube of ragged space on the last line.
- #
- # We use an obvious dynamic programming algorithm to sorta approximate this.
- # It's not coded right or optimally, but it's fast enough for changelogs
- #
- # (Code found in an old directory of mine, lightly cleaned. -NM)
- NO_HYPHENATE=set("""
- pf-divert
- """.split())
- LASTLINE_UNDERFLOW_EXPONENT = 1
- LASTLINE_UNDERFLOW_PENALTY = 1
- UNDERFLOW_EXPONENT = 3
- UNDERFLOW_PENALTY = 1
- OVERFLOW_EXPONENT = 4
- OVERFLOW_PENALTY = 2000
- ORPHAN_PENALTY = 10000
- def generate_wrapping(words, divisions):
- lines = []
- last = 0
- for i in divisions:
- w = words[last:i]
- last = i
- line = " ".join(w).replace("\xff ","-").replace("\xff","-")
- lines.append(line)
- return lines
- def wrapping_quality(words, divisions, width1, width2):
- total = 0.0
- lines = generate_wrapping(words, divisions)
- for line in lines:
- length = len(line)
- if line is lines[0]:
- width = width1
- else:
- width = width2
- if length > width:
- total += OVERFLOW_PENALTY * (
- (length - width) ** OVERFLOW_EXPONENT )
- else:
- if line is lines[-1]:
- e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)
- if " " not in line:
- total += ORPHAN_PENALTY
- else:
- e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)
- total += p * ((width - length) ** e)
- return total
- def wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):
- wrapping_after = [ (0,), ]
- w1 = width - prefix_len1
- w2 = width - prefix_len2
- for i in range(1, len(words)+1):
- best_so_far = None
- best_score = 1e300
- for j in range(i):
- t = wrapping_after[j]
- t1 = t[:-1] + (i,)
- t2 = t + (i,)
- wq1 = wrapping_quality(words, t1, w1, w2)
- wq2 = wrapping_quality(words, t2, w1, w2)
- if wq1 < best_score:
- best_so_far = t1
- best_score = wq1
- if wq2 < best_score:
- best_so_far = t2
- best_score = wq2
- wrapping_after.append( best_so_far )
- lines = generate_wrapping(words, wrapping_after[-1])
- return lines
- def hyphenateable(word):
- if re.match(r'^[^\d\-].*-', word):
- stripped = re.sub(r'^\W+','',word)
- stripped = re.sub(r'\W+$','',word)
- return stripped not in NO_HYPHENATE
- else:
- return False
- def split_paragraph(s):
- "Split paragraph into words; tuned for Tor."
- r = []
- for word in s.split():
- if hyphenateable(word):
- while "-" in word:
- a,word = word.split("-",1)
- r.append(a+"\xff")
- r.append(word)
- return r
- def fill(text, width, initial_indent, subsequent_indent):
- words = split_paragraph(text)
- lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),
- width)
- res = [ initial_indent, lines[0], "\n" ]
- for line in lines[1:]:
- res.append(subsequent_indent)
- res.append(line)
- res.append("\n")
- return "".join(res)
- # ==============================
- TP_MAINHEAD = 0
- TP_HEADTEXT = 1
- TP_BLANK = 2
- TP_SECHEAD = 3
- TP_ITEMFIRST = 4
- TP_ITEMBODY = 5
- TP_END = 6
- def head_parser(line):
- if re.match(r'^[A-Z]', line):
- return TP_MAINHEAD
- elif re.match(r'^ o ', line):
- return TP_SECHEAD
- elif re.match(r'^\s*$', line):
- return TP_BLANK
- else:
- return TP_HEADTEXT
- def body_parser(line):
- if re.match(r'^ o ', line):
- return TP_SECHEAD
- elif re.match(r'^ -',line):
- return TP_ITEMFIRST
- elif re.match(r'^ \S', line):
- return TP_ITEMBODY
- elif re.match(r'^\s*$', line):
- return TP_BLANK
- elif re.match(r'^Changes in', line):
- return TP_END
- else:
- print "Weird line %r"%line
- class ChangeLog(object):
- def __init__(self):
- self.mainhead = None
- self.headtext = []
- self.curgraf = None
- self.sections = []
- self.cursection = None
- self.lineno = 0
- def addLine(self, tp, line):
- self.lineno += 1
- if tp == TP_MAINHEAD:
- assert not self.mainhead
- self.mainhead = line
- elif tp == TP_HEADTEXT:
- if self.curgraf is None:
- self.curgraf = []
- self.headtext.append(self.curgraf)
- self.curgraf.append(line)
- elif tp == TP_BLANK:
- self.curgraf = None
- elif tp == TP_SECHEAD:
- self.cursection = [ self.lineno, line, [] ]
- self.sections.append(self.cursection)
- elif tp == TP_ITEMFIRST:
- item = ( self.lineno, [ [line] ])
- self.curgraf = item[1][0]
- self.cursection[2].append(item)
- elif tp == TP_ITEMBODY:
- if self.curgraf is None:
- self.curgraf = []
- self.cursection[2][1][-1].append(self.curgraf)
- self.curgraf.append(line)
- else:
- assert "This" is "unreachable"
- def lint_head(self, line, head):
- m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)
- if not m:
- print >>sys.stderr, "Weird header format on line %s"%line
- def lint_item(self, line, grafs, head_type):
- pass
- def lint(self):
- self.head_lines = {}
- for sec_line, sec_head, items in self.sections:
- head_type = self.lint_head(sec_line, sec_head)
- for item_line, grafs in items:
- self.lint_item(item_line, grafs, head_type)
- def dumpGraf(self,par,indent1,indent2=-1):
- if indent2 == -1:
- indent2 = indent1
- text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)
- sys.stdout.write(fill(text,
- width=72,
- initial_indent=" "*indent1,
- subsequent_indent=" "*indent2))
- def dump(self):
- print self.mainhead
- for par in self.headtext:
- self.dumpGraf(par, 2)
- print
- for _,head,items in self.sections:
- if not head.endswith(':'):
- print >>sys.stderr, "adding : to %r"%head
- head = head + ":"
- print head
- for _,grafs in items:
- self.dumpGraf(grafs[0],4,6)
- for par in grafs[1:]:
- print
- self.dumpGraf(par,6,6)
- print
- print
- CL = ChangeLog()
- parser = head_parser
- sys.stdin = open('ChangeLog', 'r')
- for line in sys.stdin:
- line = line.rstrip()
- tp = parser(line)
- if tp == TP_SECHEAD:
- parser = body_parser
- elif tp == TP_END:
- nextline = line
- break
- CL.addLine(tp,line)
- CL.lint()
- sys.stdout = open('ChangeLog.new', 'w')
- CL.dump()
- print nextline
- for line in sys.stdin:
- sys.stdout.write(line)
- os.rename('ChangeLog.new', 'ChangeLog')
|