j3tracey
/
grading-on-a-curve


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
							#!/usr/bin/env python3

import sys
from decimal import Decimal
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

import vcclib

# n.b.: ell is l spelled out to prevent confusion with the 1 character


def project_from_model(t1, ell, xs, counts):
    ys = []
    total = 0
    for x in xs:
        expected = t1 * (x)**-ell * counts[x-1].total
        total += expected
        ys.append(total)
    return ys


def main(argv):
    # a file where each line is a VCC commit hash, followed by the issues it
    # contributed to, comma separated
    vcc_file = argv[1]
    git_dirs = argv[2].split(':')
    # the paths in the git dir to filter on (use "" or . to use everything)
    project_paths = argv[3].split(':')
    # the directory where experiences are stored
    exp_dirs = vcclib.expdirs(argv[4].split(':'))
    assert len(git_dirs) == len(exp_dirs) and \
        len(git_dirs) == len(project_paths), \
        "each git dir needs one project path and one experience dir"
    # the path+name of where to save the resulting plot
    plot_path = argv[5]

    model_t1 = Decimal(argv[6])
    model_ell = Decimal(argv[7])
    model_t1_err_low = Decimal(argv[8])
    model_ell_err_low = Decimal(argv[9])
    model_t1_err_up = Decimal(argv[10])
    model_ell_err_up = Decimal(argv[11])
    mt1_sig = vcclib.sigfigs([model_t1, model_t1_err_low, model_t1_err_up])[0]
    ml_sig = vcclib.sigfigs([model_ell, model_ell_err_low,
                             model_ell_err_up])[0]

    model_t1_str = np.format_float_positional(mt1_sig, 3, fractional=False)
    model_ell_str = np.format_float_positional(-ml_sig, 3, fractional=False)

    vccs = vcclib.get_vccs(vcc_file)

    counts = vcclib.count_all_commits(git_dirs, project_paths, exp_dirs, vccs)
    cuml_vccs = [sum(c.vccs for c in counts[:j+1]) for j in range(len(counts))]
    cuml_tot = [sum(c.total for c in counts[:j+1]) for j in range(len(counts))]

    # skip values where there's no data to compare against
    offset = 0
    for i in range(len(cuml_vccs)):
        if cuml_tot[i] != 0:
            offset = i
            break

    xs_empirical = [x+1 for x in range(offset, len(counts))]
    xs_model = [x+1 for x in range(len(counts))]
    ys_model = project_from_model(model_t1, model_ell, xs_model, counts)
    print(model_t1, model_ell)
    ys_err_low = project_from_model(model_t1_err_low, model_ell_err_low,
                                    xs_model, counts)
    ys_err_up = project_from_model(model_t1_err_up, model_ell_err_up,
                                   xs_model, counts)

    plt.rc('text', usetex=True)
    plt.rc('font', family='serif', size=18)
    ax = plt.figure().gca()
    ax.yaxis.set_major_locator(MaxNLocator(integer=True))

    plt.plot(xs_empirical, cuml_vccs, 'm.',
             label=r"Empirical $v_{\le j}$")
    plt.plot(xs_model, ys_model, 'g--',
             label=r"$V_{\le j}=\sum_{k=0}^{j}" + model_t1_str + " c_k k^{" +
             model_ell_str + "}$")
    plt.fill_between(xs_model, ys_err_low, ys_err_up,
                     color='green', alpha=0.2)
    plt.xlabel("$j=$ Experience")
    plt.ylabel("Vulnerabilities")
    plt.xlim(left=0)
    plt.legend(loc="lower right")
    plt.tight_layout()
    plt.savefig(plot_path)


if __name__ == '__main__':
    main(sys.argv)