assemble_purpose_features.py 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. """
  2. Script to extract the purpose features.
  3. """
  4. __author__ = "Oscar Svensson"
  5. __copyright__ = "Copyright (c) 2018 Axis Communications AB"
  6. __license__ = "MIT"
  7. import csv
  8. import re
  9. from argparse import ArgumentParser
  10. from tqdm import tqdm
  11. from pygit2 import Repository, GIT_SORT_TOPOLOGICAL, GIT_SORT_REVERSE
  12. PATTERNS = [r"bug", r"fix", r"defect", r"patch"]
  13. def is_fix(message):
  14. """
  15. Check if a message contains any of the fix patterns.
  16. """
  17. for pattern in PATTERNS:
  18. if re.search(pattern, message):
  19. return True
  20. return False
  21. def get_purpose_features(repo_path, branch):
  22. """
  23. Extract the purpose features for each commit.
  24. """
  25. repo = Repository(repo_path)
  26. head = repo.references.get(branch)
  27. commits = list(
  28. repo.walk(head.target, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE))
  29. features = []
  30. for _, commit in enumerate(tqdm(commits)):
  31. message = commit.message
  32. fix = 1.0 if (is_fix(message)) else 0.0
  33. feat = []
  34. feat.append(str(commit.hex))
  35. feat.append(str(fix))
  36. features.append(feat)
  37. return features
  38. def save_features(purpose_features, path="./results/purpose_features.csv"):
  39. """
  40. Save the purpose features to a csv file.
  41. """
  42. with open(path, 'w') as csv_file:
  43. writer = csv.writer(csv_file)
  44. writer.writerow(["commit", "purpose"])
  45. for row in purpose_features:
  46. if row:
  47. writer.writerow([row[0], row[1]])
  48. if __name__ == "__main__":
  49. PARSER = ArgumentParser(
  50. description="Utility to extract purpose features from" +
  51. " a repository or a single commit.")
  52. PARSER.add_argument(
  53. "--repository",
  54. "-r",
  55. type=str,
  56. default="./repos/jenkins",
  57. help="Path to local git repository.")
  58. PARSER.add_argument(
  59. "--branch",
  60. "-b",
  61. type=str,
  62. default="refs/heads/master",
  63. help="Which branch to use.")
  64. ARGS = PARSER.parse_args()
  65. REPOPATH = ARGS.repository
  66. BRANCH = ARGS.branch
  67. FEATURES = get_purpose_features(REPOPATH, BRANCH)
  68. save_features(FEATURES)