#!/usr/bin/env python3 import sys import subprocess import math import numpy as np import matplotlib.pyplot as plt def get_commit_xps(git_dir, paths): command = "git -C " + git_dir + " log " \ "--full-history --reverse --no-merges --use-mailmap "\ "--format='format:%ct %aN <%aE>' -- " + paths + \ " | sort -n | cut -f2" lines = subprocess.check_output(command, shell=True, universal_newlines=True).strip() assert lines author_xps = {} xps = [] for line in lines.splitlines(): author = line.strip() if author not in author_xps: author_xps[author] = 1 xps.append(author_xps[author]) author_xps[author] += 1 return xps def find_T1_at_l(commit_xps, num_vccs, l, precision): left = 0.0 right = 1.0 s = len(commit_xps) - num_vccs print("l: {} s: {}".format(l, s)) while (right - left) > precision: T1_guess = (left + right) / 2 mean = sum([1.0-T1_guess*c**-l for c in commit_xps]) assert(s > mean) assert(math.log(s/mean) > 0) # Pr[S>=s] (i.e., the probability that a dist with T1_guess would have produced more vuln-free commits) p = math.exp(s - mean - s * math.log(s/mean)) print(T1_guess, mean, p) if p < 0.05: # the probability of fewer good commits is <5%, # so this T1 would have produced more vulns with >95% prob. # we can lower our guess for T1 right = T1_guess else: left = T1_guess return T1_guess def main(argv): git_dirs = argv[1].split(':') paths = argv[2].split(':') plot_path = argv[3] commit_xps = [xp for i in range(len(git_dirs)) for xp in get_commit_xps(git_dirs[i], paths[i])] num_vccs = int(argv[4]) l_vals = np.arange(0.01, 0.2, 0.02) print(l_vals) T1s = [find_T1_at_l(commit_xps, num_vccs, l, 0.0001) for l in l_vals] print(T1s) plt.plot(l_vals, T1s) plt.savefig(plot_path) if __name__ == '__main__': main(sys.argv)