""" Identify bugfixes in Bugzilla repository given a list of issues """ __author__ = "Justin Tracey and Kristian Berg" __copyright__ = "Copyright (c) 2018 Axis Communications AB" __license__ = "MIT" import os import json import argparse import subprocess import datetime from get_bugzilla_patches import get_title_lines class Commit: def __init__(self, git_path=None, git_hash=None, author_date=None): self.git_path = git_path self.git_hash = git_hash self.author_date = author_date def files(self): return subprocess.check_output(['git', '-C', self.git_path, 'diff-tree', '--no-commit-id', '--name-only', '-r', self.git_hash], universal_newlines=True) def find_bug_fixes(issue_path, git_path): """ Identify bugfixes in Bugzilla repository given a list of issues """ progress = 0 no_matches = [] matches_per_issue = {} total_matches = 0 issue_list = build_issue_list(issue_path) for key in issue_list: nbr = key.split('-')[1] matches = [] patterns = list(get_title_lines(nbr)) for pattern in patterns: commits = subprocess.check_output(['git', '-C', git_path, 'log', '--date=iso', '--format=format:%H|%ad', '--grep={}'.format(pattern), '-F'], universal_newlines=True).strip() for commit in commits.splitlines(): if commit: commit = Commit(git_path, *(commit.split('|'))) matches.append(commit) total_matches += len(matches) matches_per_issue[key] = len(matches) if matches: selected_commit = commit_selector_heuristic(matches) if not selected_commit: no_matches.append(key) else: issue_list[key]['hash'] = selected_commit.git_hash issue_list[key]['commitdate'] = selected_commit.author_date else: no_matches.append(key) progress += 1 if progress % 10 == 0: print(progress, end='\r') print('Total issues: ' + str(len(issue_list))) print('Issues matched to a bugfix: ' + str(len(issue_list) - len(no_matches))) print('Percent of issues matched to a bugfix: ' + str((len(issue_list) - len(no_matches)) / len(issue_list))) for key in no_matches: issue_list.pop(key) return issue_list def build_issue_list(path): """ Helper method for find_bug_fixes """ issue_list = {} for filename in os.listdir(path): with open(path + '/' + filename) as f: for issue in json.loads(f.read())['issues']: issue_list[issue['key']] = {} created_date = issue['fields']['created'].replace('T', ' ') created_date = created_date.replace('.000', ' ') issue_list[issue['key']]['creationdate'] = created_date res_date = issue['fields']['resolutiondate'].replace('T', ' ') res_date = res_date.replace('.000', ' ') issue_list[issue['key']]['resolutiondate'] = res_date return issue_list suffixes = ["c", "C", "cc", "cpp", "cxx", "c++", "h", ".H", "hh", "hpp", "hxx", "h++"] def commit_selector_heuristic(commits): """ SZZUnleashed only allows one fix commit per issue. We follow its norm of using the most recent associated commit. We also filter on commits touching C/C++ files. """ def touches_c_file(commit): return any(filename for filename in commit.files().splitlines() if filename.split('.')[-1] in suffixes) commits = [c for c in commits if touches_c_file(c)] # the weird string manipulation is to fix timezones formatted as +0000 # (that git produces) to +00:00 (that python wants) return min(commits, key=lambda x: datetime.datetime.fromisoformat(x.author_date[:-2] + ':' + x.author_date[-2:]), default=None) def main(): """ Main method """ parser = argparse.ArgumentParser( description="Identify bugfixes. Use this script together with a git " "repo and a path with issues. The issue directory is created and " "populated using the fetch-bugzilla.py script.") parser.add_argument('--git-path', type=str, help='Path to local git repository') parser.add_argument('--issue-list', type=str, help='Path to directory containing issue json files') args = parser.parse_args() issue_list = find_bug_fixes(args.issue_list, args.git_path) with open('issue_list.json', 'w') as f: f.write(json.dumps(issue_list)) if __name__ == '__main__': main()