1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768 |
- #!/usr/bin/env python3
- import sys
- import subprocess
- import math
- import numpy as np
- import matplotlib.pyplot as plt
- def get_commit_xps(git_dir, paths):
- command = "git -C " + git_dir + " log " \
- "--full-history --reverse --no-merges --use-mailmap "\
- "--format='format:%ct %aN <%aE>' -- " + paths + \
- " | sort -n | cut -f2"
- lines = subprocess.check_output(command, shell=True,
- universal_newlines=True).strip()
- assert lines
- author_xps = {}
- xps = []
- for line in lines.splitlines():
- author = line.strip()
- if author not in author_xps:
- author_xps[author] = 1
- xps.append(author_xps[author])
- author_xps[author] += 1
- return xps
- def find_T1_at_l(commit_xps, num_vccs, l, precision):
- left = 0.0
- right = 1.0
- s = len(commit_xps) - num_vccs
- print("l: {} s: {}".format(l, s))
- while (right - left) > precision:
- T1_guess = (left + right) / 2
- mean = sum([1.0-T1_guess*c**-l for c in commit_xps])
- assert(s > mean)
- assert(math.log(s/mean) > 0)
- # Pr[S>=s] (i.e., the probability that a dist with T1_guess would have produced more vuln-free commits)
- p = math.exp(s - mean - s * math.log(s/mean))
- print(T1_guess, mean, p)
- if p < 0.05:
- # the probability of fewer good commits is <5%,
- # so this T1 would have produced more vulns with >95% prob.
- # we can lower our guess for T1
- right = T1_guess
- else:
- left = T1_guess
- return T1_guess
- def main(argv):
- git_dirs = argv[1].split(':')
- paths = argv[2].split(':')
- plot_path = argv[3]
- commit_xps = [xp for i in range(len(git_dirs)) for xp in
- get_commit_xps(git_dirs[i], paths[i])]
- num_vccs = int(argv[4])
- l_vals = np.arange(0.01, 0.2, 0.02)
- print(l_vals)
- T1s = [find_T1_at_l(commit_xps, num_vccs, l, 0.0001) for l in l_vals]
- print(T1s)
- plt.plot(l_vals, T1s)
- plt.savefig(plot_path)
- if __name__ == '__main__':
- main(sys.argv)
|