find_bugzilla_fixes.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. """ Identify bugfixes in Bugzilla repository given a list of issues """
  2. __author__ = "Justin Tracey and Kristian Berg"
  3. __copyright__ = "Copyright (c) 2018 Axis Communications AB"
  4. __license__ = "MIT"
  5. import os
  6. import json
  7. import argparse
  8. import subprocess
  9. import datetime
  10. from get_bugzilla_patches import get_title_lines
  11. class Commit:
  12. def __init__(self, git_path=None, git_hash=None, author_date=None):
  13. self.git_path = git_path
  14. self.git_hash = git_hash
  15. self.author_date = author_date
  16. def files(self):
  17. return subprocess.check_output(['git', '-C', self.git_path,
  18. 'diff-tree', '--no-commit-id',
  19. '--name-only', '-r', self.git_hash],
  20. universal_newlines=True)
  21. def find_bug_fixes(issue_path, git_path):
  22. """ Identify bugfixes in Bugzilla repository given a list of issues """
  23. progress = 0
  24. no_matches = []
  25. matches_per_issue = {}
  26. total_matches = 0
  27. issue_list = build_issue_list(issue_path)
  28. for key in issue_list:
  29. nbr = key.split('-')[1]
  30. matches = []
  31. patterns = list(get_title_lines(nbr))
  32. for pattern in patterns:
  33. commits = subprocess.check_output(['git', '-C', git_path, 'log',
  34. '--date=iso',
  35. '--format=format:%H|%ad',
  36. '--grep={}'.format(pattern),
  37. '-F'],
  38. universal_newlines=True).strip()
  39. for commit in commits.splitlines():
  40. if commit:
  41. commit = Commit(git_path, *(commit.split('|')))
  42. matches.append(commit)
  43. total_matches += len(matches)
  44. matches_per_issue[key] = len(matches)
  45. if matches:
  46. selected_commit = commit_selector_heuristic(matches)
  47. if not selected_commit:
  48. no_matches.append(key)
  49. else:
  50. issue_list[key]['hash'] = selected_commit.git_hash
  51. issue_list[key]['commitdate'] = selected_commit.author_date
  52. else:
  53. no_matches.append(key)
  54. progress += 1
  55. if progress % 10 == 0:
  56. print(progress, end='\r')
  57. print('Total issues: ' + str(len(issue_list)))
  58. print('Issues matched to a bugfix: ' +
  59. str(len(issue_list) - len(no_matches)))
  60. print('Percent of issues matched to a bugfix: ' +
  61. str((len(issue_list) - len(no_matches)) / len(issue_list)))
  62. for key in no_matches:
  63. issue_list.pop(key)
  64. return issue_list
  65. def build_issue_list(path):
  66. """ Helper method for find_bug_fixes """
  67. issue_list = {}
  68. for filename in os.listdir(path):
  69. with open(path + '/' + filename) as f:
  70. for issue in json.loads(f.read())['issues']:
  71. issue_list[issue['key']] = {}
  72. created_date = issue['fields']['created'].replace('T', ' ')
  73. created_date = created_date.replace('.000', ' ')
  74. issue_list[issue['key']]['creationdate'] = created_date
  75. res_date = issue['fields']['resolutiondate'].replace('T', ' ')
  76. res_date = res_date.replace('.000', ' ')
  77. issue_list[issue['key']]['resolutiondate'] = res_date
  78. return issue_list
  79. suffixes = ["c", "C", "cc", "cpp", "cxx", "c++",
  80. "h", ".H", "hh", "hpp", "hxx", "h++"]
  81. def commit_selector_heuristic(commits):
  82. """ SZZUnleashed only allows one fix commit per issue.
  83. We follow its norm of using the most recent associated commit.
  84. We also filter on commits touching C/C++ files.
  85. """
  86. def touches_c_file(commit):
  87. return any(filename for filename in commit.files().splitlines()
  88. if filename.split('.')[-1] in suffixes)
  89. commits = [c for c in commits if touches_c_file(c)]
  90. # the weird string manipulation is to fix timezones formatted as +0000
  91. # (that git produces) to +00:00 (that python wants)
  92. return min(commits, key=lambda x:
  93. datetime.datetime.fromisoformat(x.author_date[:-2] + ':' +
  94. x.author_date[-2:]),
  95. default=None)
  96. def main():
  97. """ Main method """
  98. parser = argparse.ArgumentParser(
  99. description="Identify bugfixes. Use this script together with a git "
  100. "repo and a path with issues. The issue directory is created and "
  101. "populated using the fetch-bugzilla.py script.")
  102. parser.add_argument('--git-path', type=str,
  103. help='Path to local git repository')
  104. parser.add_argument('--issue-list', type=str,
  105. help='Path to directory containing issue json files')
  106. args = parser.parse_args()
  107. issue_list = find_bug_fixes(args.issue_list, args.git_path)
  108. with open('issue_list.json', 'w') as f:
  109. f.write(json.dumps(issue_list))
  110. if __name__ == '__main__':
  111. main()