find_bug_fixes.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. """ Identify bugfixes in Jenkins repository given a list of issues """
  2. __author__ = "Kristian Berg"
  3. __copyright__ = "Copyright (c) 2018 Axis Communications AB"
  4. __license__ = "MIT"
  5. import os
  6. import json
  7. import re
  8. import argparse
  9. def find_bug_fixes(issue_path, gitlog_path, gitlog_pattern):
  10. """ Identify bugfixes in Jenkins repository given a list of issues """
  11. i = 0 # Used to display progress
  12. no_matches = []
  13. matches_per_issue = {}
  14. total_matches = 0
  15. issue_list = build_issue_list(issue_path)
  16. with open(gitlog_path) as f:
  17. gitlog = json.loads(f.read())
  18. for key in issue_list:
  19. nbr = key.split('-')[1]
  20. matches = []
  21. for commit in gitlog:
  22. pattern = gitlog_pattern.format(nbr=nbr)
  23. if re.search(pattern, commit):
  24. if re.search(r'#{nbr}\D'.format(nbr=nbr), commit) \
  25. and not re.search('[Ff]ix', commit):
  26. pass
  27. else:
  28. matches.append(commit)
  29. total_matches += len(matches)
  30. matches_per_issue[key] = len(matches)
  31. if matches:
  32. selected_commit = commit_selector_heuristic(matches)
  33. if not selected_commit:
  34. no_matches.append(key)
  35. else:
  36. issue_list[key]['hash'] = \
  37. re.search('(?<=^commit )[a-z0-9]+(?=\n)', \
  38. selected_commit).group(0)
  39. issue_list[key]['commitdate'] = \
  40. re.search('(?<=\nDate: )[0-9 -:+]+(?=\n)',\
  41. selected_commit).group(0)
  42. else:
  43. no_matches.append(key)
  44. # Progress counter
  45. i += 1
  46. if i % 10 == 0:
  47. print(i, end='\r')
  48. print('Total issues: ' + str(len(issue_list)))
  49. print('Issues matched to a bugfix: ' + str(len(issue_list) - len(no_matches)))
  50. print('Percent of issues matched to a bugfix: ' + \
  51. str((len(issue_list) - len(no_matches)) / len(issue_list)))
  52. for key in no_matches:
  53. issue_list.pop(key)
  54. return issue_list
  55. def build_issue_list(path):
  56. """ Helper method for find_bug_fixes """
  57. issue_list = {}
  58. for filename in os.listdir(path):
  59. with open(path + '/' + filename) as f:
  60. for issue in json.loads(f.read())['issues']:
  61. issue_list[issue['key']] = {}
  62. created_date = issue['fields']['created'].replace('T', ' ')
  63. created_date = created_date.replace('.000', ' ')
  64. issue_list[issue['key']]['creationdate'] = created_date
  65. res_date = issue['fields']['resolutiondate'].replace('T', ' ')
  66. res_date = res_date.replace('.000', ' ')
  67. issue_list[issue['key']]['resolutiondate'] = res_date
  68. return issue_list
  69. def commit_selector_heuristic(commits):
  70. """ Helper method for find_bug_fixes.
  71. Commits are assumed to be ordered in reverse chronological order.
  72. Given said order, pick first commit that does not match the pattern.
  73. If all commits match, return newest one. """
  74. for commit in commits:
  75. if not re.search('[Mm]erge|[Cc]herry|[Nn]oting', commit):
  76. return commit
  77. return commits[0]
  78. def main():
  79. """ Main method """
  80. parser = argparse.ArgumentParser(description="""Identify bugfixes. Use this script together with a
  81. gitlog.json and a path with issues. The gitlog.json
  82. is created using the git_log_to_array.py script and
  83. the issue directory is created and populated using
  84. the fetch.py script.""")
  85. parser.add_argument('--gitlog', type=str,
  86. help='Path to json file containing gitlog')
  87. parser.add_argument('--issue-list', type=str,
  88. help='Path to directory containing issue json files')
  89. parser.add_argument('--gitlog-pattern', type=str,
  90. help='Pattern to match a bugfix')
  91. args = parser.parse_args()
  92. issue_list = find_bug_fixes(args.issue_list, args.gitlog, args.gitlog_pattern)
  93. with open('issue_list.json', 'w') as f:
  94. f.write(json.dumps(issue_list))
  95. if __name__ == '__main__':
  96. main()