analyze_callgraph.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. #!/usr/bin/python
  2. import re
  3. import sys
  4. import copy
  5. import cPickle
  6. import os
  7. class Parser:
  8. def __init__(self):
  9. self.calls = {}
  10. def enter_func(self, name):
  11. if self.infunc and not self.extern:
  12. self.calls.setdefault(self.infunc, set()).update( self.calledfns )
  13. self.calledfns = set()
  14. self.infunc = name
  15. self.extern = False
  16. def parse_callgraph_file(self, inp):
  17. self.infunc = None
  18. self.extern = False
  19. self.calledfns = set()
  20. for line in inp:
  21. m = re.match(r"Call graph node for function: '([^']+)'", line)
  22. if m:
  23. self.enter_func(m.group(1))
  24. continue
  25. m = re.match(r" CS<[^>]+> calls external node", line)
  26. if m:
  27. self.extern = True
  28. m = re.match(r" CS<[^>]+> calls function '([^']+)'", line)
  29. if m:
  30. self.calledfns.add(m.group(1))
  31. self.enter_func(None)
  32. def extract_callgraph(self):
  33. c = self.calls
  34. self.calls = {}
  35. return c
  36. def transitive_closure(g):
  37. passno = 0
  38. changed = True
  39. g = copy.deepcopy(g)
  40. import random
  41. while changed:
  42. passno += 1
  43. changed = False
  44. keys = g.keys()
  45. idx = 0
  46. for k in keys:
  47. idx += 1
  48. print "Pass %d/?: %d/%d\r" %(passno, idx, len(keys)),
  49. sys.stdout.flush()
  50. newset = g[k].copy()
  51. for fn in g[k]:
  52. newset.update(g.get(fn, set()))
  53. if len(newset) != len(g[k]):
  54. g[k].update( newset )
  55. changed = True
  56. print
  57. return g
  58. def strongly_connected_components(g):
  59. # From https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm, done stupidly.
  60. index_of = {}
  61. index = [ 0 ]
  62. lowlink = {}
  63. S = []
  64. onStack = set()
  65. all_sccs = []
  66. def strongconnect(fn):
  67. index_of[fn] = index[0]
  68. lowlink[fn] = index[0]
  69. index[0] += 1
  70. S.append(fn)
  71. onStack.add(fn)
  72. for w in g.get(fn, []):
  73. if w not in index_of:
  74. strongconnect(w)
  75. lowlink[fn] = min(lowlink[fn], lowlink[w])
  76. elif w in onStack:
  77. lowlink[fn] = min(lowlink[fn], index_of[w])
  78. if lowlink[fn] == index_of[fn]:
  79. this_scc = []
  80. all_sccs.append(this_scc)
  81. while True:
  82. w = S.pop()
  83. onStack.remove(w)
  84. this_scc.append(w)
  85. if w == fn:
  86. break
  87. for v in g.keys():
  88. if v not in index_of:
  89. strongconnect(v)
  90. return all_sccs
  91. def biggest_component(sccs):
  92. return max(len(c) for c in sccs)
  93. def connection_bottlenecks(callgraph):
  94. callers = {}
  95. for fn in callgraph:
  96. for fn2 in callgraph[fn]:
  97. callers.setdefault(fn2, set()).add(fn)
  98. components = strongly_connected_components(callgraph)
  99. components.sort(key=len)
  100. big_component_fns = components[-1]
  101. size = len(big_component_fns)
  102. function_bottlenecks = fn_results = []
  103. total = len(big_component_fns)
  104. idx = 0
  105. for fn in big_component_fns:
  106. idx += 1
  107. print "Pass 1/3: %d/%d\r"%(idx, total),
  108. sys.stdout.flush()
  109. cg2 = copy.deepcopy(callgraph)
  110. del cg2[fn]
  111. fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), fn) )
  112. print
  113. bcf_set = set(big_component_fns)
  114. call_bottlenecks = fn_results = []
  115. result_set = set()
  116. total = len(big_component_fns)
  117. idx = 0
  118. for fn in big_component_fns:
  119. fn_callers = callers[fn].intersection(bcf_set)
  120. idx += 1
  121. if len(fn_callers) != 1:
  122. continue
  123. print "Pass 2/3: %d/%d\r"%(idx, total),
  124. sys.stdout.flush()
  125. caller = fn_callers.pop()
  126. assert len(fn_callers) == 0
  127. cg2 = copy.deepcopy(callgraph)
  128. cg2[caller].remove(fn)
  129. fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), fn, "called by", caller) )
  130. result_set.add( (caller, fn) )
  131. print
  132. total = len(big_component_fns)
  133. idx = 0
  134. for fn in big_component_fns:
  135. fn_calls = callgraph[fn].intersection(bcf_set)
  136. idx += 1
  137. if len(fn_calls) != 1:
  138. continue
  139. print "Pass 3/3: %d/%d\r"%(idx, total),
  140. sys.stdout.flush()
  141. callee = fn_calls.pop()
  142. if (fn, callee) in result_set:
  143. continue
  144. assert len(fn_calls) == 0
  145. cg2 = copy.deepcopy(callgraph)
  146. cg2[fn].remove(callee)
  147. fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), callee, "called by", fn) )
  148. print
  149. return (function_bottlenecks, call_bottlenecks)
  150. if __name__ == '__main__':
  151. p = Parser()
  152. for fname in sys.argv[1:]:
  153. with open(fname, 'r') as f:
  154. p.parse_callgraph_file(f)
  155. sys.stdout.flush
  156. print "Building callgraph"
  157. callgraph = p.extract_callgraph()
  158. print "Finding strongly connected components"
  159. sccs = strongly_connected_components(callgraph)
  160. print "Finding the transitive closure of the callgraph.."
  161. closure = transitive_closure(callgraph)
  162. print "Finding bottlenecks..."
  163. bottlenecks = connection_bottlenecks(callgraph)
  164. data = {
  165. 'callgraph' : callgraph,
  166. 'sccs' : sccs,
  167. 'closure' : closure,
  168. 'bottlenecks' : bottlenecks }
  169. with open('callgraph.pkl', 'w') as f:
  170. cPickle.dump(data, f)