123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- """ Identify bugfixes in Bugzilla repository given a list of issues """
- __author__ = "Justin Tracey and Kristian Berg"
- __copyright__ = "Copyright (c) 2018 Axis Communications AB"
- __license__ = "MIT"
- import os
- import json
- import argparse
- import subprocess
- import datetime
- from get_bugzilla_patches import get_title_lines
- class Commit:
- def __init__(self, git_path=None, git_hash=None, author_date=None):
- self.git_path = git_path
- self.git_hash = git_hash
- self.author_date = author_date
- def files(self):
- return subprocess.check_output(['git', '-C', self.git_path,
- 'diff-tree', '--no-commit-id',
- '--name-only', '-r', self.git_hash],
- universal_newlines=True)
- def find_bug_fixes(issue_path, git_path):
- """ Identify bugfixes in Bugzilla repository given a list of issues """
- progress = 0
- no_matches = []
- matches_per_issue = {}
- total_matches = 0
- issue_list = build_issue_list(issue_path)
- for key in issue_list:
- nbr = key.split('-')[1]
- matches = []
- patterns = list(get_title_lines(nbr))
- for pattern in patterns:
- commits = subprocess.check_output(['git', '-C', git_path, 'log',
- '--date=iso',
- '--format=format:%H|%ad',
- '--grep={}'.format(pattern),
- '-F'],
- universal_newlines=True).strip()
- for commit in commits.splitlines():
- if commit:
- commit = Commit(git_path, *(commit.split('|')))
- matches.append(commit)
- total_matches += len(matches)
- matches_per_issue[key] = len(matches)
- if matches:
- selected_commit = commit_selector_heuristic(matches)
- if not selected_commit:
- no_matches.append(key)
- else:
- issue_list[key]['hash'] = selected_commit.git_hash
- issue_list[key]['commitdate'] = selected_commit.author_date
- else:
- no_matches.append(key)
- progress += 1
- if progress % 10 == 0:
- print(progress, end='\r')
- print('Total issues: ' + str(len(issue_list)))
- print('Issues matched to a bugfix: ' +
- str(len(issue_list) - len(no_matches)))
- print('Percent of issues matched to a bugfix: ' +
- str((len(issue_list) - len(no_matches)) / len(issue_list)))
- for key in no_matches:
- issue_list.pop(key)
- return issue_list
- def build_issue_list(path):
- """ Helper method for find_bug_fixes """
- issue_list = {}
- for filename in os.listdir(path):
- with open(path + '/' + filename) as f:
- for issue in json.loads(f.read())['issues']:
- issue_list[issue['key']] = {}
- created_date = issue['fields']['created'].replace('T', ' ')
- created_date = created_date.replace('.000', ' ')
- issue_list[issue['key']]['creationdate'] = created_date
- res_date = issue['fields']['resolutiondate'].replace('T', ' ')
- res_date = res_date.replace('.000', ' ')
- issue_list[issue['key']]['resolutiondate'] = res_date
- return issue_list
- suffixes = ["c", "C", "cc", "cpp", "cxx", "c++",
- "h", ".H", "hh", "hpp", "hxx", "h++"]
- def commit_selector_heuristic(commits):
- """ SZZUnleashed only allows one fix commit per issue.
- We follow its norm of using the most recent associated commit.
- We also filter on commits touching C/C++ files.
- """
- def touches_c_file(commit):
- return any(filename for filename in commit.files().splitlines()
- if filename.split('.')[-1] in suffixes)
- commits = [c for c in commits if touches_c_file(c)]
- # the weird string manipulation is to fix timezones formatted as +0000
- # (that git produces) to +00:00 (that python wants)
- return min(commits, key=lambda x:
- datetime.datetime.fromisoformat(x.author_date[:-2] + ':' +
- x.author_date[-2:]),
- default=None)
- def main():
- """ Main method """
- parser = argparse.ArgumentParser(
- description="Identify bugfixes. Use this script together with a git "
- "repo and a path with issues. The issue directory is created and "
- "populated using the fetch-bugzilla.py script.")
- parser.add_argument('--git-path', type=str,
- help='Path to local git repository')
- parser.add_argument('--issue-list', type=str,
- help='Path to directory containing issue json files')
- args = parser.parse_args()
- issue_list = find_bug_fixes(args.issue_list, args.git_path)
- with open('issue_list.json', 'w') as f:
- f.write(json.dumps(issue_list))
- if __name__ == '__main__':
- main()
|