1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283 |
- """
- Script to extract the purpose features.
- """
- __author__ = "Oscar Svensson"
- __copyright__ = "Copyright (c) 2018 Axis Communications AB"
- __license__ = "MIT"
- import csv
- import re
- from argparse import ArgumentParser
- from tqdm import tqdm
- from pygit2 import Repository, GIT_SORT_TOPOLOGICAL, GIT_SORT_REVERSE
- PATTERNS = [r"bug", r"fix", r"defect", r"patch"]
- def is_fix(message):
- """
- Check if a message contains any of the fix patterns.
- """
- for pattern in PATTERNS:
- if re.search(pattern, message):
- return True
- return False
- def get_purpose_features(repo_path, branch):
- """
- Extract the purpose features for each commit.
- """
- repo = Repository(repo_path)
- head = repo.references.get(branch)
- commits = list(
- repo.walk(head.target, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE))
- features = []
- for _, commit in enumerate(tqdm(commits)):
- message = commit.message
- fix = 1.0 if (is_fix(message)) else 0.0
- feat = []
- feat.append(str(commit.hex))
- feat.append(str(fix))
- features.append(feat)
- return features
- def save_features(purpose_features, path="./results/purpose_features.csv"):
- """
- Save the purpose features to a csv file.
- """
- with open(path, 'w') as csv_file:
- writer = csv.writer(csv_file)
- writer.writerow(["commit", "purpose"])
- for row in purpose_features:
- if row:
- writer.writerow([row[0], row[1]])
- if __name__ == "__main__":
- PARSER = ArgumentParser(
- description="Utility to extract purpose features from" +
- " a repository or a single commit.")
- PARSER.add_argument(
- "--repository",
- "-r",
- type=str,
- default="./repos/jenkins",
- help="Path to local git repository.")
- PARSER.add_argument(
- "--branch",
- "-b",
- type=str,
- default="refs/heads/master",
- help="Which branch to use.")
- ARGS = PARSER.parse_args()
- REPOPATH = ARGS.repository
- BRANCH = ARGS.branch
- FEATURES = get_purpose_features(REPOPATH, BRANCH)
- save_features(FEATURES)
|