analyze_callgraph.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. #!/usr/bin/python
  2. import re
  3. import sys
  4. import copy
  5. import cPickle
  6. import os
  7. class Parser:
  8. def __init__(self):
  9. self.calls = {}
  10. self.definedIn = {}
  11. def enter_func(self, name):
  12. if self.infunc and not self.extern and self.calledfns:
  13. if self.infunc in self.definedIn:
  14. #print "{}: {} or {}?".format(
  15. # self.infunc, self.definedIn[self.infunc], self.module)
  16. self.definedIn[self.infunc] = 'nil'
  17. else:
  18. self.definedIn[self.infunc] = self.module
  19. self.calls.setdefault(self.infunc, set()).update( self.calledfns )
  20. self.calledfns = set()
  21. self.infunc = name
  22. self.extern = False
  23. def parse_callgraph_file(self, inp, module):
  24. self.infunc = None
  25. self.extern = False
  26. self.calledfns = set()
  27. self.module = module
  28. for line in inp:
  29. m = re.match(r"Call graph node for function: '([^']+)'", line)
  30. if m:
  31. self.enter_func(m.group(1))
  32. continue
  33. m = re.match(r" CS<[^>]+> calls external node", line)
  34. if m:
  35. self.extern = True
  36. m = re.match(r" CS<[^>]+> calls function '([^']+)'", line)
  37. if m:
  38. self.calledfns.add(m.group(1))
  39. self.enter_func(None)
  40. def extract_callgraph(self):
  41. c = self.calls
  42. self.calls = {}
  43. return c
  44. def transitive_closure(g):
  45. passno = 0
  46. changed = True
  47. g = copy.deepcopy(g)
  48. import random
  49. while changed:
  50. passno += 1
  51. changed = False
  52. keys = g.keys()
  53. idx = 0
  54. for k in keys:
  55. idx += 1
  56. print "Pass %d/?: %d/%d\r" %(passno, idx, len(keys)),
  57. sys.stdout.flush()
  58. newset = g[k].copy()
  59. for fn in g[k]:
  60. newset.update(g.get(fn, set()))
  61. if len(newset) != len(g[k]):
  62. g[k].update( newset )
  63. changed = True
  64. print
  65. return g
  66. def strongly_connected_components(g):
  67. # From https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm, done stupidly.
  68. index_of = {}
  69. index = [ 0 ]
  70. lowlink = {}
  71. S = []
  72. onStack = set()
  73. all_sccs = []
  74. def strongconnect(fn):
  75. index_of[fn] = index[0]
  76. lowlink[fn] = index[0]
  77. index[0] += 1
  78. S.append(fn)
  79. onStack.add(fn)
  80. for w in g.get(fn, []):
  81. if w not in index_of:
  82. strongconnect(w)
  83. lowlink[fn] = min(lowlink[fn], lowlink[w])
  84. elif w in onStack:
  85. lowlink[fn] = min(lowlink[fn], index_of[w])
  86. if lowlink[fn] == index_of[fn]:
  87. this_scc = []
  88. all_sccs.append(this_scc)
  89. while True:
  90. w = S.pop()
  91. onStack.remove(w)
  92. this_scc.append(w)
  93. if w == fn:
  94. break
  95. for v in g.keys():
  96. if v not in index_of:
  97. strongconnect(v)
  98. return all_sccs
  99. def biggest_component(sccs):
  100. return max(len(c) for c in sccs)
  101. def connection_bottlenecks(callgraph):
  102. callers = {}
  103. for fn in callgraph:
  104. for fn2 in callgraph[fn]:
  105. callers.setdefault(fn2, set()).add(fn)
  106. components = strongly_connected_components(callgraph)
  107. components.sort(key=len)
  108. big_component_fns = components[-1]
  109. size = len(big_component_fns)
  110. function_bottlenecks = fn_results = []
  111. total = len(big_component_fns)
  112. idx = 0
  113. for fn in big_component_fns:
  114. idx += 1
  115. print "Pass 1/3: %d/%d\r"%(idx, total),
  116. sys.stdout.flush()
  117. cg2 = copy.deepcopy(callgraph)
  118. del cg2[fn]
  119. fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), fn) )
  120. print
  121. bcf_set = set(big_component_fns)
  122. call_bottlenecks = fn_results = []
  123. result_set = set()
  124. total = len(big_component_fns)
  125. idx = 0
  126. for fn in big_component_fns:
  127. fn_callers = callers[fn].intersection(bcf_set)
  128. idx += 1
  129. if len(fn_callers) != 1:
  130. continue
  131. print "Pass 2/3: %d/%d\r"%(idx, total),
  132. sys.stdout.flush()
  133. caller = fn_callers.pop()
  134. assert len(fn_callers) == 0
  135. cg2 = copy.deepcopy(callgraph)
  136. cg2[caller].remove(fn)
  137. fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), fn, "called by", caller) )
  138. result_set.add( (caller, fn) )
  139. print
  140. total = len(big_component_fns)
  141. idx = 0
  142. for fn in big_component_fns:
  143. fn_calls = callgraph[fn].intersection(bcf_set)
  144. idx += 1
  145. if len(fn_calls) != 1:
  146. continue
  147. print "Pass 3/3: %d/%d\r"%(idx, total),
  148. sys.stdout.flush()
  149. callee = fn_calls.pop()
  150. if (fn, callee) in result_set:
  151. continue
  152. assert len(fn_calls) == 0
  153. cg2 = copy.deepcopy(callgraph)
  154. cg2[fn].remove(callee)
  155. fn_results.append( (size - biggest_component(strongly_connected_components(cg2)), callee, "called by", fn) )
  156. print
  157. return (function_bottlenecks, call_bottlenecks)
  158. if __name__ == '__main__':
  159. p = Parser()
  160. for fname in sys.argv[1:]:
  161. modname = re.sub(r'.*/', '', fname).replace('.callgraph', '.c')
  162. with open(fname, 'r') as f:
  163. p.parse_callgraph_file(f, modname)
  164. sys.stdout.flush()
  165. print "Building callgraph"
  166. callgraph = p.extract_callgraph()
  167. inModule = p.definedIn
  168. print "Deriving module callgraph"
  169. modCallgraph = {}
  170. for fn in callgraph:
  171. fnMod = inModule[fn]
  172. for called in callgraph[fn]:
  173. try:
  174. calledMod = inModule[called]
  175. except KeyError:
  176. continue
  177. modCallgraph.setdefault(fnMod, set()).add(calledMod)
  178. del modCallgraph['nil']
  179. print "Finding strongly connected components"
  180. sccs = strongly_connected_components(callgraph)
  181. print "Finding the transitive closure of the callgraph.."
  182. closure = transitive_closure(callgraph)
  183. print "Finding bottlenecks..."
  184. bottlenecks = connection_bottlenecks(callgraph)
  185. print "Finding module SCCs"
  186. modSCCS = strongly_connected_components(modCallgraph)
  187. print "Finding module TC"
  188. modTC = transitive_closure(modCallgraph)
  189. print "Finding module bottlenecks"
  190. modB = connection_bottlenecks(modCallgraph)
  191. data = {
  192. 'callgraph' : callgraph,
  193. 'sccs' : sccs,
  194. 'closure' : closure,
  195. 'bottlenecks' : bottlenecks,
  196. 'modules' : p.definedIn,
  197. 'modItems' : {
  198. 'callgraph' : modCallgraph,
  199. 'sccs' : modSCCS,
  200. 'closure' : modTC,
  201. 'bottlenecks' : modB,
  202. }
  203. }
  204. with open('callgraph.pkl', 'w') as f:
  205. cPickle.dump(data, f)