assemble_coupling_features.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. """
  2. Script to extract coupling features from code maat analysis files.
  3. """
  4. __author__ = "Oscar Svensson"
  5. __copyright__ = "Copyright (c) 2018 Axis Communications AB"
  6. __license__ = "MIT"
  7. import csv
  8. import os
  9. from git import Repo
  10. import numpy as np
  11. from tqdm import tqdm
  12. def save_features(features, res_path):
  13. """
  14. Save the coupling features to a csv file.
  15. """
  16. print("Saving to {}".format(os.path.abspath(res_path)))
  17. with open(os.path.abspath(res_path), 'w') as feat_file:
  18. feat_writer = csv.writer(feat_file)
  19. feat_writer.writerow([
  20. "commit", "number_of_cruical_files",
  21. "number_of_moderate_risk_cruical_files",
  22. "number_of_high_risk_cruical_files",
  23. "number_of_non_modified_change_couplings"
  24. ])
  25. for feature in features:
  26. feat_writer.writerow(feature)
  27. def get_features():
  28. """
  29. Get the coupling features from a number of files.
  30. """
  31. commits = list(REPO.iter_commits('master'))
  32. couplings = {}
  33. features = []
  34. for hexsha in os.listdir("/h/oskars/data_all"):
  35. couplings[hexsha] = os.path.join(
  36. os.path.join("/h/oskars/data_all", hexsha),
  37. "{}_coupling.log.res".format(hexsha))
  38. features.append([commits[0].hexsha, 0, 0, 0])
  39. for i in tqdm(range(1, len(commits))):
  40. first = commits[i - 1]
  41. second = commits[i]
  42. diff = first.diff(second)
  43. paths = [d.b_path for d in diff]
  44. cruical_moderate = 0
  45. cruical_high = 0
  46. cruical_files = 0
  47. cruical_non_modified_couplings = 0
  48. if second.hexsha in couplings:
  49. cruical_commits = 0
  50. cruical_degrees = []
  51. with open(couplings[second.hexsha], 'r') as csvfile:
  52. coup_rows = csv.reader(csvfile)
  53. files = {}
  54. file_coupling_graph = {}
  55. next(coup_rows)
  56. for row in coup_rows:
  57. degree = float(row[2])
  58. # Is this correct?
  59. in_files = bool(row[0] in files)
  60. if in_files and files[row[0]] > degree:
  61. files[row[0]] = degree
  62. elif not in_files:
  63. files[row[0]] = degree
  64. is_in_coupling_graph = bool(row[0] in file_coupling_graph)
  65. if is_in_coupling_graph and degree >= 75:
  66. file_coupling_graph[row[0]].append(row[1])
  67. elif degree >= 50:
  68. file_coupling_graph[row[0]] = [row[1]]
  69. # Is this correct?
  70. in_files = bool(row[1] in files)
  71. if in_files and files[row[1]] > degree:
  72. files[row[1]] = degree
  73. elif not in_files:
  74. files[row[1]] = degree
  75. is_in_coupling_graph = bool(row[1] in file_coupling_graph)
  76. if is_in_coupling_graph and degree >= 75:
  77. file_coupling_graph[row[1]].append(row[0])
  78. elif degree >= 50:
  79. file_coupling_graph[row[1]] = [row[0]]
  80. for path in paths:
  81. if path in files:
  82. cruical_commits = cruical_commits + 1
  83. cruical_degrees.append(files[path])
  84. cruical_files = cruical_files + 1
  85. # Check for all non modified cruical non coupled files.
  86. set_path = set(paths)
  87. for path in paths:
  88. if path in file_coupling_graph:
  89. file_couplings = set(file_coupling_graph[path])
  90. cruical_non_modified_couplings = cruical_non_modified_couplings + len(
  91. file_couplings - set_path)
  92. inds = np.digitize(cruical_degrees, [25, 50, 75, 100])
  93. cruical_moderate = sum([1 for i in inds if i == 3])
  94. cruical_high = sum([1 for i in inds if i == 4])
  95. features.append([
  96. second.hexsha,
  97. str(cruical_files),
  98. str(cruical_moderate),
  99. str(cruical_high),
  100. str(cruical_non_modified_couplings)
  101. ])
  102. return features
  103. if __name__ == "__main__":
  104. global REPO
  105. REPO = Repo("../../jenkins")
  106. REPO = Repo("./repos/jenkins")
  107. FEATURES = get_features()
  108. save_features(FEATURES, './results/coupling_features.csv')