1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495 |
- #!/usr/bin/env python3
- import sys
- from decimal import Decimal
- import numpy as np
- import matplotlib.pyplot as plt
- from matplotlib.ticker import MaxNLocator
- import vcclib
- # n.b.: ell is l spelled out to prevent confusion with the 1 character
- def project_from_model(t1, ell, xs, counts):
- ys = []
- total = 0
- for x in xs:
- expected = t1 * (x)**-ell * counts[x-1].total
- total += expected
- ys.append(total)
- return ys
- def main(argv):
- # a file where each line is a VCC commit hash, followed by the issues it
- # contributed to, comma separated
- vcc_file = argv[1]
- git_dirs = argv[2].split(':')
- # the paths in the git dir to filter on (use "" or . to use everything)
- project_paths = argv[3].split(':')
- # the directory where experiences are stored
- exp_dirs = vcclib.expdirs(argv[4].split(':'))
- assert len(git_dirs) == len(exp_dirs) and \
- len(git_dirs) == len(project_paths), \
- "each git dir needs one project path and one experience dir"
- # the path+name of where to save the resulting plot
- plot_path = argv[5]
- model_t1 = Decimal(argv[6])
- model_ell = Decimal(argv[7])
- model_t1_err_low = Decimal(argv[8])
- model_ell_err_low = Decimal(argv[9])
- model_t1_err_up = Decimal(argv[10])
- model_ell_err_up = Decimal(argv[11])
- mt1_sig = vcclib.sigfigs([model_t1, model_t1_err_low, model_t1_err_up])[0]
- ml_sig = vcclib.sigfigs([model_ell, model_ell_err_low,
- model_ell_err_up])[0]
- model_t1_str = np.format_float_positional(mt1_sig, 3, fractional=False)
- model_ell_str = np.format_float_positional(-ml_sig, 3, fractional=False)
- vccs = vcclib.get_vccs(vcc_file)
- counts = vcclib.count_all_commits(git_dirs, project_paths, exp_dirs, vccs)
- cuml_vccs = [sum(c.vccs for c in counts[:j+1]) for j in range(len(counts))]
- cuml_tot = [sum(c.total for c in counts[:j+1]) for j in range(len(counts))]
- # skip values where there's no data to compare against
- offset = 0
- for i in range(len(cuml_vccs)):
- if cuml_tot[i] != 0:
- offset = i
- break
- xs_empirical = [x+1 for x in range(offset, len(counts))]
- xs_model = [x+1 for x in range(len(counts))]
- ys_model = project_from_model(model_t1, model_ell, xs_model, counts)
- print(model_t1, model_ell)
- ys_err_low = project_from_model(model_t1_err_low, model_ell_err_low,
- xs_model, counts)
- ys_err_up = project_from_model(model_t1_err_up, model_ell_err_up,
- xs_model, counts)
- plt.rc('text', usetex=True)
- plt.rc('font', family='serif', size=18)
- ax = plt.figure().gca()
- ax.yaxis.set_major_locator(MaxNLocator(integer=True))
- plt.plot(xs_empirical, cuml_vccs, 'm.',
- label=r"Empirical $v_{\le j}$")
- plt.plot(xs_model, ys_model, 'g--',
- label=r"$V_{\le j}=\sum_{k=0}^{j}" + model_t1_str + " c_k k^{" +
- model_ell_str + "}$")
- plt.fill_between(xs_model, ys_err_low, ys_err_up,
- color='green', alpha=0.2)
- plt.xlabel("$j=$ Experience")
- plt.ylabel("Vulnerabilities")
- plt.xlim(left=0)
- plt.legend(loc="lower right")
- plt.tight_layout()
- plt.savefig(plot_path)
- if __name__ == '__main__':
- main(sys.argv)
|