|
@@ -0,0 +1,292 @@
|
|
|
+"""
|
|
|
+Script that runs several docker containers which in turn runs an analysis on
|
|
|
+a git repository.
|
|
|
+"""
|
|
|
+__author__ = "Oscar Svensson"
|
|
|
+__copyright__ = "Copyright (c) 2018 Axis Communications AB"
|
|
|
+__license__ = "MIT"
|
|
|
+
|
|
|
+import os
|
|
|
+import sys
|
|
|
+import shutil
|
|
|
+import time
|
|
|
+
|
|
|
+from argparse import ArgumentParser
|
|
|
+from distutils.dir_util import copy_tree
|
|
|
+from multiprocessing import Process, cpu_count
|
|
|
+from git import Repo
|
|
|
+from tqdm import tqdm
|
|
|
+
|
|
|
+import docker
|
|
|
+
|
|
|
+def start_container(client, image, name, repo_dir, result_dir):
|
|
|
+ """
|
|
|
+ Function that starts a docker container and links the repo into it and
|
|
|
+ a directory where the results are stored.
|
|
|
+ """
|
|
|
+ for container in client.containers.list(all=True):
|
|
|
+ if name == container.name:
|
|
|
+ if container.status == "running":
|
|
|
+ container.kill()
|
|
|
+ container.remove()
|
|
|
+
|
|
|
+ path = os.path.abspath('./')
|
|
|
+
|
|
|
+ container = client.containers.run(
|
|
|
+ image,
|
|
|
+ name=name,
|
|
|
+ stdin_open=True,
|
|
|
+ detach=True,
|
|
|
+ volumes={
|
|
|
+ str(path + "/scripts"): {
|
|
|
+ 'bind': '/root/scripts',
|
|
|
+ 'mode': 'rw'
|
|
|
+ },
|
|
|
+ result_dir: {
|
|
|
+ 'bind': '/root/results',
|
|
|
+ 'mode': 'rw'
|
|
|
+ },
|
|
|
+ os.path.abspath(repo_dir): {
|
|
|
+ 'bind': '/root/repo',
|
|
|
+ 'mode': 'rw'
|
|
|
+ }
|
|
|
+ },
|
|
|
+ command="bash")
|
|
|
+
|
|
|
+ return container
|
|
|
+
|
|
|
+def run_command(container, command):
|
|
|
+ """
|
|
|
+ Function that executes a command inside a container.
|
|
|
+ """
|
|
|
+ return container.exec_run(
|
|
|
+ cmd="bash -c \"" + command + "\"", tty=True, privileged=True)
|
|
|
+
|
|
|
+
|
|
|
+def run_analysis(t_id, container, commits):
|
|
|
+ """
|
|
|
+ Function that runs a command inside all docker container.
|
|
|
+ """
|
|
|
+ for commit in tqdm(
|
|
|
+ commits, desc="Progress process {}".format(t_id), position=t_id):
|
|
|
+ run_command(container,
|
|
|
+ "/root/scripts/analyse_commit {}".format(commit))
|
|
|
+
|
|
|
+def copy_repo(src, dest):
|
|
|
+ """
|
|
|
+ Helper function to copy a repository to another destination.
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ shutil.copytree(src, dest)
|
|
|
+ except shutil.Error as exp:
|
|
|
+ print("Directory not copied. Error: {}".format(exp))
|
|
|
+ except OSError as exp:
|
|
|
+ print("Directory not copied. Error: {}".format(exp))
|
|
|
+
|
|
|
+def partion_commits(commits, partitions):
|
|
|
+ """
|
|
|
+ Function that divides commits into evenly partitions.
|
|
|
+ """
|
|
|
+ quote, remainder = divmod(len(commits), partitions)
|
|
|
+ chunk_commits = [(i * quote + min(i, remainder), (i + 1) * quote + min(i + 1, remainder) - 1)
|
|
|
+ for i in range(partitions)]
|
|
|
+ chunk_commits[-1] = (chunk_commits[-1][0], chunk_commits[-1][1] + 1)
|
|
|
+
|
|
|
+ commits = [[commit for commit in commits[chunk[0]:chunk[1]]]
|
|
|
+ for chunk in chunk_commits]
|
|
|
+ return commits
|
|
|
+
|
|
|
+def start_analysis(image, result_dir, commits=None, cpus=cpu_count()):
|
|
|
+ """
|
|
|
+ This function starts a docker container that can analyze a git repository. It starts several
|
|
|
+ containers if the cpus are more than one.
|
|
|
+ """
|
|
|
+ client = docker.from_env()
|
|
|
+ repo = Repo(REPO)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ if not os.path.exists("./repos"):
|
|
|
+ os.makedirs("./repos")
|
|
|
+
|
|
|
+ repo_name = os.path.basename(os.path.normpath(REPO))
|
|
|
+
|
|
|
+ for cpu in range(cpus):
|
|
|
+ copy_repo(REPO, "./repos/{}{}".format(repo_name, cpu))
|
|
|
+
|
|
|
+
|
|
|
+ if not commits:
|
|
|
+ commits = [
|
|
|
+ str(commit.hexsha) for commit in list(repo.iter_commits('master'))
|
|
|
+ ]
|
|
|
+
|
|
|
+ commits = partion_commits(commits, cpus)
|
|
|
+
|
|
|
+ containers = []
|
|
|
+ for cpu in range(cpus):
|
|
|
+ container = start_container(
|
|
|
+ client,
|
|
|
+ image=image,
|
|
|
+ name="analysis_{}_cpu_{}".format(repo_name, cpu),
|
|
|
+ repo_dir="./repos/{}{}".format(repo_name, cpu),
|
|
|
+ result_dir=result_dir + "/data{}".format(cpu))
|
|
|
+ containers.append(container)
|
|
|
+
|
|
|
+ processes = [
|
|
|
+ Process(target=run_analysis, args=(i, containers[i], commits[i]))
|
|
|
+ for i in range(cpus)
|
|
|
+ ]
|
|
|
+ for process in processes:
|
|
|
+ process.start()
|
|
|
+ for process in processes:
|
|
|
+ process.join()
|
|
|
+
|
|
|
+ for container in containers:
|
|
|
+ print(container.status)
|
|
|
+ print(container.name)
|
|
|
+ if (container.status != "exited" or container.status != "dead"):
|
|
|
+ container.kill()
|
|
|
+ container.remove()
|
|
|
+
|
|
|
+ shutil.rmtree("./repos", ignore_errors=True)
|
|
|
+
|
|
|
+def parse_commits(commit_file):
|
|
|
+ """
|
|
|
+ Read the commits from a file and reutrn the content.
|
|
|
+ """
|
|
|
+ if not os.path.exists(commit_file):
|
|
|
+ print("commit_file doesn't exist!!", file=sys.stderr)
|
|
|
+ sys.exit(1)
|
|
|
+
|
|
|
+ commits = []
|
|
|
+ with open(commit_file, 'r') as cfile:
|
|
|
+ commits = [line.strip() for line in cfile.readlines()]
|
|
|
+ return commits
|
|
|
+
|
|
|
+def assemble_directories(result_path, cpus=cpu_count()):
|
|
|
+ """
|
|
|
+ Copy all results into a single directory.
|
|
|
+ """
|
|
|
+ result_path = os.path.abspath(result_path)
|
|
|
+ paths = ["{}/data{}".format(result_path, i) for i in range(cpus)]
|
|
|
+
|
|
|
+ if not all([os.path.exists(p) for p in paths]):
|
|
|
+ print("data paths doesn't exists!", file=sys.stderr)
|
|
|
+ return
|
|
|
+
|
|
|
+ files = []
|
|
|
+
|
|
|
+ for path in paths:
|
|
|
+ for item in os.listdir(path):
|
|
|
+ commit = os.path.join(path, item)
|
|
|
+ corrupt = False if (len(os.listdir(commit)) == 2) else True
|
|
|
+
|
|
|
+ if (os.path.isdir(commit) and not corrupt):
|
|
|
+ files.append((commit, item))
|
|
|
+
|
|
|
+ print("Saving all analysed commits into a single directory: {}/data_all".
|
|
|
+ format(result_path))
|
|
|
+ if not os.path.exists("{}/data_all".format(result_path)):
|
|
|
+ os.makedirs("{}/data_all".format(result_path))
|
|
|
+
|
|
|
+ for file_tuple in files:
|
|
|
+ if not os.path.exists("{}/data_all/{}".format(result_path, file_tuple[1])):
|
|
|
+ copy_tree(file_tuple[0], "{}/data_all/{}".format(result_path, file_tuple[1]))
|
|
|
+
|
|
|
+def check_for_missing_commits(repo_path, result_path):
|
|
|
+ """
|
|
|
+ Controller function that checks if all commits has been analyzed.
|
|
|
+ """
|
|
|
+ result_dir = os.path.abspath(result_path)
|
|
|
+ if not os.path.exists(result_path):
|
|
|
+ print("Result path doesn't exist!", file=sys.stderr)
|
|
|
+ return
|
|
|
+
|
|
|
+ repo = Repo(repo_path)
|
|
|
+
|
|
|
+ current_commits = []
|
|
|
+ for item in os.listdir(result_dir):
|
|
|
+ current_commits.append(item)
|
|
|
+
|
|
|
+ all_repo_commits = [c.hexsha for c in list(repo.iter_commits('master'))]
|
|
|
+
|
|
|
+ missing_commits = set(all_repo_commits) - set(current_commits)
|
|
|
+
|
|
|
+ if missing_commits:
|
|
|
+ with open("./missing_commits.txt", 'w') as cfile:
|
|
|
+ for commit in missing_commits:
|
|
|
+ cfile.write(commit)
|
|
|
+ cfile.write('\n')
|
|
|
+ print("Wrote missing commits to missing_commits.txt")
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ PARSER = ArgumentParser(description="Utility to run several docker " +
|
|
|
+ "containers onto a git repository. " +
|
|
|
+ "Each container is given a set of " +
|
|
|
+ "commits and is instructed to run " +
|
|
|
+ "an analysis on each one of them.")
|
|
|
+ PARSER.add_argument(
|
|
|
+ "--analyse", "-a", action="store_true", help="Run an analysation.")
|
|
|
+ PARSER.add_argument(
|
|
|
+ "--image",
|
|
|
+ "-i",
|
|
|
+ type=str,
|
|
|
+ default="code-maat",
|
|
|
+ help="Specification of which image to use.")
|
|
|
+ PARSER.add_argument(
|
|
|
+ "--repo-dir",
|
|
|
+ "-r",
|
|
|
+ type=str,
|
|
|
+ default="../../jenkins",
|
|
|
+ help="Specification of which repo to use.")
|
|
|
+ PARSER.add_argument(
|
|
|
+ "--result-dir",
|
|
|
+ "-rd",
|
|
|
+ type=str,
|
|
|
+ default="/h/oskars",
|
|
|
+ help="Specification of where to store the result.")
|
|
|
+ PARSER.add_argument(
|
|
|
+ "--commits",
|
|
|
+ "-c",
|
|
|
+ type=str,
|
|
|
+ default=None,
|
|
|
+ help="Direction to a file containing commits to analyse.")
|
|
|
+ PARSER.add_argument(
|
|
|
+ "--assemble",
|
|
|
+ "-as",
|
|
|
+ action="store_true",
|
|
|
+ help="Assemble the results into a single directory.")
|
|
|
+ PARSER.add_argument(
|
|
|
+ "--missing-commits",
|
|
|
+ "-mc",
|
|
|
+ action="store_true",
|
|
|
+ help="Check for non analysed commits.")
|
|
|
+
|
|
|
+ ARGS = PARSER.parse_args()
|
|
|
+
|
|
|
+ global REPO
|
|
|
+ REPO = os.path.abspath(ARGS.repo_dir)
|
|
|
+
|
|
|
+ if ARGS.commits:
|
|
|
+ COMMITS = parse_commits(ARGS.commits)
|
|
|
+ else:
|
|
|
+ COMMITS = []
|
|
|
+
|
|
|
+ CLIENT = docker.from_env()
|
|
|
+ if ARGS.analyse:
|
|
|
+ print("Starting the analysis using {} cpus...".format(cpu_count()))
|
|
|
+ START = time.time()
|
|
|
+ if COMMITS:
|
|
|
+ start_analysis(ARGS.image, ARGS.result_dir, commits=COMMITS)
|
|
|
+ else:
|
|
|
+ start_analysis(ARGS.image, ARGS.result_dir)
|
|
|
+ STOP = time.time()
|
|
|
+ print("Done in {}".format(
|
|
|
+ time.strftime('%H:%M:%S', time.gmtime(STOP - START))))
|
|
|
+ print("Results can be found in {}".format(
|
|
|
+ ARGS.result_dir + "/data{" +
|
|
|
+ ','.join(["{}".format(i) for i in range(cpu_count())]) + "}"))
|
|
|
+ if ARGS.assemble:
|
|
|
+ assemble_directories(ARGS.result_dir)
|
|
|
+ if ARGS.missing_commits:
|
|
|
+ check_for_missing_commits(ARGS.repo_dir, ARGS.result_dir)
|