#!/usr/bin/env python3 import sys from decimal import Decimal import numpy as np import matplotlib.pyplot as plt from matplotlib.ticker import MaxNLocator import vcclib # n.b.: ell is l spelled out to prevent confusion with the 1 character def project_from_model(t1, ell, xs, counts): ys = [] total = 0 for x in xs: expected = t1 * (x)**-ell * counts[x-1].total total += expected ys.append(total) return ys def main(argv): # a file where each line is a VCC commit hash, followed by the issues it # contributed to, comma separated vcc_file = argv[1] git_dirs = argv[2].split(':') # the paths in the git dir to filter on (use "" or . to use everything) project_paths = argv[3].split(':') # the directory where experiences are stored exp_dirs = vcclib.expdirs(argv[4].split(':')) assert len(git_dirs) == len(exp_dirs) and \ len(git_dirs) == len(project_paths), \ "each git dir needs one project path and one experience dir" # the path+name of where to save the resulting plot plot_path = argv[5] model_t1 = Decimal(argv[6]) model_ell = Decimal(argv[7]) model_t1_err_low = Decimal(argv[8]) model_ell_err_low = Decimal(argv[9]) model_t1_err_up = Decimal(argv[10]) model_ell_err_up = Decimal(argv[11]) mt1_sig = vcclib.sigfigs([model_t1, model_t1_err_low, model_t1_err_up])[0] ml_sig = vcclib.sigfigs([model_ell, model_ell_err_low, model_ell_err_up])[0] model_t1_str = np.format_float_positional(mt1_sig, 3, fractional=False) model_ell_str = np.format_float_positional(-ml_sig, 3, fractional=False) vccs = vcclib.get_vccs(vcc_file) counts = vcclib.count_all_commits(git_dirs, project_paths, exp_dirs, vccs) cuml_vccs = [sum(c.vccs for c in counts[:j+1]) for j in range(len(counts))] cuml_tot = [sum(c.total for c in counts[:j+1]) for j in range(len(counts))] # skip values where there's no data to compare against offset = 0 for i in range(len(cuml_vccs)): if cuml_tot[i] != 0: offset = i break xs_empirical = [x+1 for x in range(offset, len(counts))] xs_model = [x+1 for x in range(len(counts))] ys_model = project_from_model(model_t1, model_ell, xs_model, counts) print(model_t1, model_ell) ys_err_low = project_from_model(model_t1_err_low, model_ell_err_low, xs_model, counts) ys_err_up = project_from_model(model_t1_err_up, model_ell_err_up, xs_model, counts) plt.rc('text', usetex=True) plt.rc('font', family='serif', size=18) ax = plt.figure().gca() ax.yaxis.set_major_locator(MaxNLocator(integer=True)) plt.plot(xs_empirical, cuml_vccs, 'm.', label=r"Empirical $v_{\le j}$") plt.plot(xs_model, ys_model, 'g--', label=r"$V_{\le j}=\sum_{k=0}^{j}" + model_t1_str + " c_k k^{" + model_ell_str + "}$") plt.fill_between(xs_model, ys_err_low, ys_err_up, color='green', alpha=0.2) plt.xlabel("$j=$ Experience") plt.ylabel("Vulnerabilities") plt.xlim(left=0) plt.legend(loc="lower right") plt.tight_layout() plt.savefig(plot_path) if __name__ == '__main__': main(sys.argv)