Bladeren bron

Hack up the scripts/maint/*callgraph* scripts to do more, better

These scripts are now a little more bulletproof, cache data a little
better, and generate more information.  Notably, they search for the
vectors or edges to cut that would lower the size of the largest
SCC.
Nick Mathewson 10 jaren geleden
bovenliggende
commit
bb46630513
2 gewijzigde bestanden met toevoegingen van 132 en 13 verwijderingen
  1. 111 9
      scripts/maint/analyze_callgraph.py
  2. 21 4
      scripts/maint/display_callgraph.py

+ 111 - 9
scripts/maint/analyze_callgraph.py

@@ -13,7 +13,7 @@ class Parser:
   def enter_func(self, name):
     if self.infunc and not self.extern:
       self.calls.setdefault(self.infunc, set()).update( self.calledfns )
- 
+
     self.calledfns = set()
     self.infunc = name
     self.extern = False
@@ -23,7 +23,7 @@ class Parser:
     self.extern = False
     self.calledfns = set()
     for line in inp:
-       m = re.match(r"Call graph node for function: '([^']+)'", line) 
+       m = re.match(r"Call graph node for function: '([^']+)'", line)
        if m:
            self.enter_func(m.group(1))
            continue
@@ -42,18 +42,28 @@ class Parser:
 
 
 def transitive_closure(g):
+    passno = 0
     changed = True
     g = copy.deepcopy(g)
+    import random
     while changed:
+      passno += 1
       changed = False
-      print "X"
-      for k in g.keys():
+      keys = g.keys()
+      idx = 0
+      for k in keys:
+         idx += 1
+         print "Pass %d/?: %d/%d\r" %(passno, idx, len(keys)),
+         sys.stdout.flush()
          newset = g[k].copy()
          for fn in g[k]:
             newset.update(g.get(fn, set()))
          if len(newset) != len(g[k]):
             g[k].update( newset )
             changed = True
+
+      print
+
     return g
 
 def strongly_connected_components(g):
@@ -96,22 +106,114 @@ def strongly_connected_components(g):
 
   return all_sccs
 
+def biggest_component(sccs):
+  return max(len(c) for c in sccs)
+
+def connection_bottlenecks(callgraph):
+
+  callers = {}
+  for fn in callgraph:
+    for fn2 in callgraph[fn]:
+      callers.setdefault(fn2, set()).add(fn)
+
+  components = strongly_connected_components(callgraph)
+  components.sort(key=len)
+  big_component_fns = components[-1]
+  size = len(big_component_fns)
+
+  function_bottlenecks = fn_results = []
+
+  total = len(big_component_fns)
+  idx = 0
+  for fn in big_component_fns:
+    idx += 1
+    print "Pass 1/3: %d/%d\r"%(idx, total),
+    sys.stdout.flush()
+    cg2 = copy.deepcopy(callgraph)
+    del cg2[fn]
+
+    fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), fn) )
+
+  print
+  bcf_set = set(big_component_fns)
+
+  call_bottlenecks = fn_results = []
+  result_set = set()
+  total = len(big_component_fns)
+  idx = 0
+  for fn in big_component_fns:
+    fn_callers = callers[fn].intersection(bcf_set)
+    idx += 1
+    if len(fn_callers) != 1:
+      continue
+
+    print "Pass 2/3: %d/%d\r"%(idx, total),
+    sys.stdout.flush()
+
+    caller = fn_callers.pop()
+    assert len(fn_callers) == 0
+    cg2 = copy.deepcopy(callgraph)
+    cg2[caller].remove(fn)
+
+    fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), fn, "called by", caller) )
+    result_set.add( (caller, fn) )
+
+  print
+
+  total = len(big_component_fns)
+  idx = 0
+  for fn in big_component_fns:
+    fn_calls = callgraph[fn].intersection(bcf_set)
+    idx += 1
+    if len(fn_calls) != 1:
+      continue
+
+    print "Pass 3/3: %d/%d\r"%(idx, total),
+    sys.stdout.flush()
+
+    callee = fn_calls.pop()
+    if (fn, callee) in result_set:
+      continue
+
+    assert len(fn_calls) == 0
+    cg2 = copy.deepcopy(callgraph)
+    cg2[fn].remove(callee)
+
+    fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), callee, "called by", fn) )
+
+  print
+
+
+  return (function_bottlenecks, call_bottlenecks)
+
 if __name__ == '__main__':
     p = Parser()
     for fname in sys.argv[1:]:
       with open(fname, 'r') as f:
         p.parse_callgraph_file(f)
+
+    sys.stdout.flush
+
+    print "Building callgraph"
     callgraph = p.extract_callgraph()
 
+    print "Finding strongly connected components"
     sccs = strongly_connected_components(callgraph)
 
+    print "Finding the transitive closure of the callgraph.."
     closure = transitive_closure(callgraph)
 
+    print "Finding bottlenecks..."
+    bottlenecks = connection_bottlenecks(callgraph)
+
+    data = {
+      'callgraph' : callgraph,
+      'sccs' : sccs,
+      'closure' : closure,
+      'bottlenecks' : bottlenecks }
+
     with open('callgraph.pkl', 'w') as f:
-      cPickle.dump(callgraph, f)
+      cPickle.dump(data, f)
+
 
-    with open('callgraph_closure.pkl', 'w') as f:
-      cPickle.dump(closure, f)
 
-    with open('callgraph_sccs.pkl', 'w') as f:
-      cPickle.dump(sccs, f)

+ 21 - 4
scripts/maint/display_callgraph.py

@@ -2,9 +2,12 @@
 
 import cPickle
 
-callgraph = cPickle.load(open("callgraph.pkl"))
-closure = cPickle.load(open("callgraph_closure.pkl"))
-sccs = cPickle.load(open("callgraph_sccs.pkl"))
+data = cPickle.load(open("callgraph.pkl"))
+
+callgraph = data['callgraph']
+closure = data['closure']
+sccs = data['sccs']
+fn_bottle, call_bottle = data['bottlenecks']
 
 for n_reachable, fn in sorted(list((len(r), fn) for fn, r in closure.iteritems())):
    print "%s can reach %s other functions." %(fn, n_reachable)
@@ -13,10 +16,24 @@ for n_reachable, fn in sorted(list((len(r), fn) for fn, r in closure.iteritems()
 c = [ (len(component), component) for component in sccs ]
 c.sort()
 
+print "\n================================"
+
 for n, component in c:
    if n < 2:
       continue
-   print n, component
+   print "Strongly connected component of size %d:"%n
+   print component
+
+
+print "\n================================"
 
+print "====== Number of functions pulled into blob, by function in blob."
+fn_bottle.sort()
+for n, fn in fn_bottle[-30:]:
+   print "%3d: %s"%(n, fn)
 
+print "====== Number of functions pulled into blob, by call in blob."
+call_bottle.sort()
+for n, fn1, _, fn2 in call_bottle[-30:]:
+   print "%3d: %s -> %s "%(n, fn2, fn1)