plot_T1s.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. #!/usr/bin/env python3
  2. import sys
  3. import subprocess
  4. import math
  5. import numpy as np
  6. import matplotlib.pyplot as plt
  7. def get_commit_xps(git_dir, paths):
  8. command = "git -C " + git_dir + " log " \
  9. "--full-history --reverse --no-merges --use-mailmap "\
  10. "--format='format:%ct %aN <%aE>' -- " + paths + \
  11. " | sort -n | cut -f2"
  12. lines = subprocess.check_output(command, shell=True,
  13. universal_newlines=True).strip()
  14. assert lines
  15. author_xps = {}
  16. xps = []
  17. for line in lines.splitlines():
  18. author = line.strip()
  19. if author not in author_xps:
  20. author_xps[author] = 1
  21. xps.append(author_xps[author])
  22. author_xps[author] += 1
  23. return xps
  24. def find_T1_at_l(commit_xps, num_vccs, l, precision):
  25. left = 0.0
  26. right = 1.0
  27. s = len(commit_xps) - num_vccs
  28. print("l: {} s: {}".format(l, s))
  29. while (right - left) > precision:
  30. T1_guess = (left + right) / 2
  31. mean = sum([1.0-T1_guess*c**-l for c in commit_xps])
  32. assert(s > mean)
  33. assert(math.log(s/mean) > 0)
  34. # Pr[S>=s] (i.e., the probability that a dist with T1_guess would have produced more vuln-free commits)
  35. p = math.exp(s - mean - s * math.log(s/mean))
  36. print(T1_guess, mean, p)
  37. if p < 0.05:
  38. # the probability of fewer good commits is <5%,
  39. # so this T1 would have produced more vulns with >95% prob.
  40. # we can lower our guess for T1
  41. right = T1_guess
  42. else:
  43. left = T1_guess
  44. return T1_guess
  45. def main(argv):
  46. git_dirs = argv[1].split(':')
  47. paths = argv[2].split(':')
  48. plot_path = argv[3]
  49. commit_xps = [xp for i in range(len(git_dirs)) for xp in
  50. get_commit_xps(git_dirs[i], paths[i])]
  51. num_vccs = int(argv[4])
  52. l_vals = np.arange(0.01, 0.2, 0.02)
  53. print(l_vals)
  54. T1s = [find_T1_at_l(commit_xps, num_vccs, l, 0.0001) for l in l_vals]
  55. print(T1s)
  56. plt.plot(l_vals, T1s)
  57. plt.savefig(plot_path)
  58. if __name__ == '__main__':
  59. main(sys.argv)