model-vs-real.py 3.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. #!/usr/bin/env python3
  2. import sys
  3. from decimal import Decimal
  4. import numpy as np
  5. import matplotlib.pyplot as plt
  6. from matplotlib.ticker import MaxNLocator
  7. import vcclib
  8. # n.b.: ell is l spelled out to prevent confusion with the 1 character
  9. def project_from_model(t1, ell, xs, counts):
  10. ys = []
  11. total = 0
  12. for x in xs:
  13. expected = t1 * (x)**-ell * counts[x-1].total
  14. total += expected
  15. ys.append(total)
  16. return ys
  17. def main(argv):
  18. # a file where each line is a VCC commit hash, followed by the issues it
  19. # contributed to, comma separated
  20. vcc_file = argv[1]
  21. git_dirs = argv[2].split(':')
  22. # the paths in the git dir to filter on (use "" or . to use everything)
  23. project_paths = argv[3].split(':')
  24. # the directory where experiences are stored
  25. exp_dirs = vcclib.expdirs(argv[4].split(':'))
  26. assert len(git_dirs) == len(exp_dirs) and \
  27. len(git_dirs) == len(project_paths), \
  28. "each git dir needs one project path and one experience dir"
  29. # the path+name of where to save the resulting plot
  30. plot_path = argv[5]
  31. model_t1 = Decimal(argv[6])
  32. model_ell = Decimal(argv[7])
  33. model_t1_err_low = Decimal(argv[8])
  34. model_ell_err_low = Decimal(argv[9])
  35. model_t1_err_up = Decimal(argv[10])
  36. model_ell_err_up = Decimal(argv[11])
  37. mt1_sig = vcclib.sigfigs([model_t1, model_t1_err_low, model_t1_err_up])[0]
  38. ml_sig = vcclib.sigfigs([model_ell, model_ell_err_low,
  39. model_ell_err_up])[0]
  40. model_t1_str = np.format_float_positional(mt1_sig, 3, fractional=False)
  41. model_ell_str = np.format_float_positional(-ml_sig, 3, fractional=False)
  42. vccs = vcclib.get_vccs(vcc_file)
  43. counts = vcclib.count_all_commits(git_dirs, project_paths, exp_dirs, vccs)
  44. cuml_vccs = [sum(c.vccs for c in counts[:j+1]) for j in range(len(counts))]
  45. cuml_tot = [sum(c.total for c in counts[:j+1]) for j in range(len(counts))]
  46. # skip values where there's no data to compare against
  47. offset = 0
  48. for i in range(len(cuml_vccs)):
  49. if cuml_tot[i] != 0:
  50. offset = i
  51. break
  52. xs_empirical = [x+1 for x in range(offset, len(counts))]
  53. xs_model = [x+1 for x in range(len(counts))]
  54. ys_model = project_from_model(model_t1, model_ell, xs_model, counts)
  55. print(model_t1, model_ell)
  56. ys_err_low = project_from_model(model_t1_err_low, model_ell_err_low,
  57. xs_model, counts)
  58. ys_err_up = project_from_model(model_t1_err_up, model_ell_err_up,
  59. xs_model, counts)
  60. plt.rc('text', usetex=True)
  61. plt.rc('font', family='serif', size=18)
  62. ax = plt.figure().gca()
  63. ax.yaxis.set_major_locator(MaxNLocator(integer=True))
  64. plt.plot(xs_empirical, cuml_vccs, 'm.',
  65. label=r"Empirical $v_{\le j}$")
  66. plt.plot(xs_model, ys_model, 'g--',
  67. label=r"$V_{\le j}=\sum_{k=0}^{j}" + model_t1_str + " c_k k^{" +
  68. model_ell_str + "}$")
  69. plt.fill_between(xs_model, ys_err_low, ys_err_up,
  70. color='green', alpha=0.2)
  71. plt.xlabel("$j=$ Experience")
  72. plt.ylabel("Vulnerabilities")
  73. plt.xlim(left=0)
  74. plt.legend(loc="lower right")
  75. plt.tight_layout()
  76. plt.savefig(plot_path)
  77. if __name__ == '__main__':
  78. main(sys.argv)