| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560 | #!/usr/bin/python# Copyright (c) 2014-2015, The Tor Project, Inc.# See LICENSE for licensing information## This script reformats a section of the changelog to wrap everything to# the right width and put blank lines in the right places.  Eventually,# it might include a linter.## To run it, pipe a section of the changelog (starting with "Changes# in Tor 0.x.y.z-alpha" through the script.)import osimport reimport sysimport optparse# ==============================# Oh, look!  It's a cruddy approximation to Knuth's elegant text wrapping# algorithm, with totally ad hoc parameters!## We're trying to minimize:#    The total of the cubes of ragged space on underflowed intermediate lines,#  PLUS#    100 * the fourth power of overflowed characters#  PLUS#    .1 * a bit more than the cube of ragged space on the last line.#  PLUS#    OPENPAREN_PENALTY for each line that starts with (## We use an obvious dynamic programming algorithm to sorta approximate this.# It's not coded right or optimally, but it's fast enough for changelogs## (Code found in an old directory of mine, lightly cleaned. -NM)NO_HYPHENATE=set("""pf-diverttor-resolvetor-gencerttor-fw-helper""".split())LASTLINE_UNDERFLOW_EXPONENT = 1LASTLINE_UNDERFLOW_PENALTY = 1UNDERFLOW_EXPONENT = 3UNDERFLOW_PENALTY = 1OVERFLOW_EXPONENT = 4OVERFLOW_PENALTY = 2000ORPHAN_PENALTY = 10000OPENPAREN_PENALTY = 200def generate_wrapping(words, divisions):    lines = []    last = 0    for i in divisions:        w = words[last:i]        last = i        line = " ".join(w).replace("\xff ","-").replace("\xff","-")        lines.append(line)    return linesdef wrapping_quality(words, divisions, width1, width2):    total = 0.0    lines = generate_wrapping(words, divisions)    for line in lines:        length = len(line)        if line is lines[0]:            width = width1        else:            width = width2        if line[0:1] == '(':            total += OPENPAREN_PENALTY        if length > width:            total += OVERFLOW_PENALTY * (                (length - width) ** OVERFLOW_EXPONENT )        else:            if line is lines[-1]:                e,p = (LASTLINE_UNDERFLOW_EXPONENT, LASTLINE_UNDERFLOW_PENALTY)                if " " not in line:                    total += ORPHAN_PENALTY            else:                e,p = (UNDERFLOW_EXPONENT, UNDERFLOW_PENALTY)            total += p * ((width - length) ** e)    return totaldef wrap_graf(words, prefix_len1=0, prefix_len2=0, width=72):    wrapping_after = [ (0,), ]    w1 = width - prefix_len1    w2 = width - prefix_len2    for i in range(1, len(words)+1):        best_so_far = None        best_score = 1e300        for j in range(i):            t = wrapping_after[j]            t1 = t[:-1] + (i,)            t2 = t + (i,)            wq1 = wrapping_quality(words, t1, w1, w2)            wq2 = wrapping_quality(words, t2, w1, w2)            if wq1 < best_score:                best_so_far = t1                best_score = wq1            if wq2 < best_score:                best_so_far = t2                best_score = wq2        wrapping_after.append( best_so_far )    lines = generate_wrapping(words, wrapping_after[-1])    return linesdef hyphenatable(word):    if "--" in word:        return False    if re.match(r'^[^\d\-]\D*-', word):        stripped = re.sub(r'^\W+','',word)        stripped = re.sub(r'\W+$','',word)        return stripped not in NO_HYPHENATE    else:        return Falsedef split_paragraph(s):    "Split paragraph into words; tuned for Tor."    r = []    for word in s.split():        if hyphenatable(word):            while "-" in word:                a,word = word.split("-",1)                r.append(a+"\xff")        r.append(word)    return rdef fill(text, width, initial_indent, subsequent_indent):    words = split_paragraph(text)    lines = wrap_graf(words, len(initial_indent), len(subsequent_indent),                      width)    res = [ initial_indent, lines[0], "\n" ]    for line in lines[1:]:        res.append(subsequent_indent)        res.append(line)        res.append("\n")    return "".join(res)# ==============================TP_MAINHEAD = 0TP_HEADTEXT = 1TP_BLANK = 2TP_SECHEAD = 3TP_ITEMFIRST = 4TP_ITEMBODY = 5TP_END = 6TP_PREHEAD = 7def head_parser(line):    if re.match(r'^Changes in', line):        return TP_MAINHEAD    elif re.match(r'^[A-Za-z]', line):        return TP_PREHEAD    elif re.match(r'^  o ', line):        return TP_SECHEAD    elif re.match(r'^\s*$', line):        return TP_BLANK    else:        return TP_HEADTEXTdef body_parser(line):    if re.match(r'^  o ', line):        return TP_SECHEAD    elif re.match(r'^    -',line):        return TP_ITEMFIRST    elif re.match(r'^      \S', line):        return TP_ITEMBODY    elif re.match(r'^\s*$', line):        return TP_BLANK    elif re.match(r'^Changes in', line):        return TP_END    elif re.match(r'^\s+\S', line):        return TP_HEADTEXT    else:        print "Weird line %r"%linedef clean_head(head):    return headdef head_score(s):    m = re.match(r'^ +o (.*)', s)    if not m:        print >>sys.stderr, "Can't score %r"%s        return 99999    lw = m.group(1).lower()    if lw.startswith("security") and "feature" not in lw:        score = -300    elif lw.startswith("deprecated version"):        score = -200    elif (('new' in lw and 'requirement' in lw) or          ('new' in lw and 'dependenc' in lw) or          ('build' in lw and 'requirement' in lw) or          ('removed' in lw and 'platform' in lw)):        score = -100    elif lw.startswith("major feature"):        score = 00    elif lw.startswith("major bug"):        score = 50    elif lw.startswith("major"):        score = 70    elif lw.startswith("minor feature"):        score = 200    elif lw.startswith("minor bug"):        score = 250    elif lw.startswith("minor"):        score = 270    else:        score = 1000    if 'secur' in lw:        score -= 2    if "(other)" in lw:        score += 2    if '(' not in lw:        score -= 1    return scoreclass ChangeLog(object):    def __init__(self, wrapText=True, blogOrder=True, drupalBreak=False):        self.prehead = []        self.mainhead = None        self.headtext = []        self.curgraf = None        self.sections = []        self.cursection = None        self.lineno = 0        self.wrapText = wrapText        self.blogOrder = blogOrder        self.drupalBreak = drupalBreak    def addLine(self, tp, line):        self.lineno += 1        if tp == TP_MAINHEAD:            assert not self.mainhead            self.mainhead = line        elif tp == TP_PREHEAD:            self.prehead.append(line)        elif tp == TP_HEADTEXT:            if self.curgraf is None:                self.curgraf = []                self.headtext.append(self.curgraf)            self.curgraf.append(line)        elif tp == TP_BLANK:            self.curgraf = None        elif tp == TP_SECHEAD:            self.cursection = [ self.lineno, line, [] ]            self.sections.append(self.cursection)        elif tp == TP_ITEMFIRST:            item = ( self.lineno, [ [line] ])            self.curgraf = item[1][0]            self.cursection[2].append(item)        elif tp == TP_ITEMBODY:            if self.curgraf is None:                self.curgraf = []                self.cursection[2][-1][1].append(self.curgraf)            self.curgraf.append(line)        else:            assert "This" is "unreachable"    def lint_head(self, line, head):        m = re.match(r'^ *o ([^\(]+)((?:\([^\)]+\))?):', head)        if not m:            print >>sys.stderr, "Weird header format on line %s"%line    def lint_item(self, line, grafs, head_type):        pass    def lint(self):        self.head_lines = {}        for sec_line, sec_head, items in self.sections:            head_type = self.lint_head(sec_line, sec_head)            for item_line, grafs in items:                self.lint_item(item_line, grafs, head_type)    def dumpGraf(self,par,indent1,indent2=-1):        if not self.wrapText:            for line in par:                print line            return        if indent2 == -1:            indent2 = indent1        text = " ".join(re.sub(r'\s+', ' ', line.strip()) for line in par)        sys.stdout.write(fill(text,                              width=72,                              initial_indent=" "*indent1,                              subsequent_indent=" "*indent2))    def dumpPreheader(self, graf):        self.dumpGraf(graf, 0)        print    def dumpMainhead(self, head):        print head    def dumpHeadGraf(self, graf):        self.dumpGraf(graf, 2)        print    def dumpSectionHeader(self, header):        print header    def dumpStartOfSections(self):        pass    def dumpEndOfSections(self):        pass    def dumpEndOfSection(self):        print    def dumpEndOfChangelog(self):        print    def dumpDrupalBreak(self):        pass    def dumpItem(self, grafs):        self.dumpGraf(grafs[0],4,6)        for par in grafs[1:]:            print            self.dumpGraf(par,6,6)    def collateAndSortSections(self):        heads = []        sectionsByHead = { }        for _, head, items in self.sections:            head = clean_head(head)            try:                s = sectionsByHead[head]            except KeyError:                s = sectionsByHead[head] = []                heads.append( (head_score(head), head.lower(), head, s) )            s.extend(items)        heads.sort()        self.sections = [ (0, head, items) for _1,_2,head,items in heads ]    def dump(self):        if self.prehead:            self.dumpPreheader(self.prehead)        if not self.blogOrder:            self.dumpMainhead(self.mainhead)        for par in self.headtext:            self.dumpHeadGraf(par)        if self.blogOrder:            self.dumpMainhead(self.mainhead)        drupalBreakAfter = None        if self.drupalBreak and len(self.sections) > 4:            drupalBreakAfter = self.sections[1][2]        self.dumpStartOfSections()        for _,head,items in self.sections:            if not head.endswith(':'):                print >>sys.stderr, "adding : to %r"%head                head = head + ":"            self.dumpSectionHeader(head)            for _,grafs in items:                self.dumpItem(grafs)            self.dumpEndOfSection()            if items is drupalBreakAfter:                self.dumpDrupalBreak()        self.dumpEndOfSections()        self.dumpEndOfChangelog()class HTMLChangeLog(ChangeLog):    def __init__(self, *args, **kwargs):        ChangeLog.__init__(self, *args, **kwargs)    def htmlText(self, graf):        for line in graf:            line = line.rstrip().replace("&","&")            line = line.rstrip().replace("<","<").replace(">",">")            sys.stdout.write(line.strip())            sys.stdout.write(" ")    def htmlPar(self, graf):        sys.stdout.write("<p>")        self.htmlText(graf)        sys.stdout.write("</p>\n")    def dumpPreheader(self, graf):        self.htmlPar(graf)    def dumpMainhead(self, head):        sys.stdout.write("<h2>%s</h2>"%head)    def dumpHeadGraf(self, graf):        self.htmlPar(graf)    def dumpSectionHeader(self, header):        header = header.replace(" o ", "", 1).lstrip()        sys.stdout.write("  <li>%s\n"%header)        sys.stdout.write("  <ul>\n")    def dumpEndOfSection(self):        sys.stdout.write("  </ul>\n\n")    def dumpEndOfChangelog(self):        pass    def dumpStartOfSections(self):        print "<ul>\n"    def dumpEndOfSections(self):        print "</ul>\n"    def dumpDrupalBreak(self):        print "\n</ul>\n"        print "<p> </p>"        print "\n<!--break-->\n\n"        print "<ul>"    def dumpItem(self, grafs):        grafs[0][0] = grafs[0][0].replace(" - ", "", 1).lstrip()        sys.stdout.write("  <li>")        if len(grafs) > 1:            for par in grafs:                self.htmlPar(par)        else:            self.htmlText(grafs[0])        printop = optparse.OptionParser(usage="usage: %prog [options] [filename]")op.add_option('-W', '--no-wrap', action='store_false',              dest='wrapText', default=True,              help='Do not re-wrap paragraphs')op.add_option('-S', '--no-sort', action='store_false',              dest='sort', default=True,              help='Do not sort or collate sections')op.add_option('-o', '--output', dest='output',              default='-', metavar='FILE', help="write output to FILE")op.add_option('-H', '--html', action='store_true',              dest='html', default=False,              help="generate an HTML fragment")op.add_option('-1', '--first', action='store_true',              dest='firstOnly', default=False,              help="write only the first section")op.add_option('-b', '--blog-header', action='store_true',              dest='blogOrder', default=False,              help="Write the header in blog order")op.add_option('-B', '--blog', action='store_true',              dest='blogFormat', default=False,              help="Set all other options as appropriate for a blog post")op.add_option('--inplace', action='store_true',              dest='inplace', default=False,              help="Alter the ChangeLog in place")op.add_option('--drupal-break', action='store_true',              dest='drupalBreak', default=False,              help='Insert a drupal-friendly <!--break--> as needed')options,args = op.parse_args()if options.blogFormat:    options.blogOrder = True    options.html = True    options.sort = False    options.wrapText = False    options.firstOnly = True    options.drupalBreak = Trueif len(args) > 1:    op.error("Too many arguments")elif len(args) == 0:    fname = 'ChangeLog'else:    fname = args[0]if options.inplace:    assert options.output == '-'    options.output = fnameif fname != '-':    sys.stdin = open(fname, 'r')nextline = Noneif options.html:    ChangeLogClass = HTMLChangeLogelse:    ChangeLogClass = ChangeLogCL = ChangeLogClass(wrapText=options.wrapText,                    blogOrder=options.blogOrder,                    drupalBreak=options.drupalBreak)parser = head_parserfor line in sys.stdin:    line = line.rstrip()    tp = parser(line)    if tp == TP_SECHEAD:        parser = body_parser    elif tp == TP_END:        nextline = line        break    CL.addLine(tp,line)CL.lint()if options.output != '-':    fname_new = options.output+".new"    fname_out = options.output    sys.stdout = open(fname_new, 'w')else:    fname_new = fname_out = Noneif options.sort:    CL.collateAndSortSections()CL.dump()if options.firstOnly:    sys.exit(0)if nextline is not None:    print nextlinefor line in sys.stdin:    sys.stdout.write(line)if fname_new is not None:    os.rename(fname_new, fname_out)
 |