""" Generate train and test set. """
__author__ = "Kristian Berg"
__copyright__ = "Copyright (c) 2018 Axis Communications AB"
__license__ = "MIT"

import subprocess
import re
import json
from datetime import datetime, timedelta

# TODO: give update parameter as fraction
def build_sets(path, sgap=timedelta(days=200), gap=timedelta(days=150),
               egap=timedelta(days=150), update=timedelta(days=400),
               testdur=timedelta(days=70), traindur=timedelta(days=2000)):
    # Determine date of oldest commit in repository
    command = ['git', 'log', '--reverse', '--date=iso']
    startdate = datetime_of_commit(path, command=command)

    # Determine date of newest commit in repository
    command = ['git', 'log', '--date=iso']
    enddate = datetime_of_commit(path, command=command)

    # Add start and end gaps
    startdate += sgap
    enddate -= egap

    # Print stuff
    print('Start: ' + str(startdate))
    print('End: ' + str(enddate))
    print('Duration: ' + str(enddate - startdate))
    print('len(training) len(testing)')

    # Build list of commit hashes from oldest to newest
    command = ['git', 'rev-list', '--reverse', 'HEAD']
    res = subprocess.run(command, cwd=path, stdout=subprocess.PIPE)
    gitrevlist = res.stdout.decode('utf-8')
    hashes = gitrevlist.split()

    # Initiate loop variables
    trainsets = []
    testsets = []
    training = []
    testing = []
    train_index = 0
    test_index = 0
    tsplit = startdate + traindur

    # Adjust start index to correspond to start date
    commitdate = datetime_of_commit(path, hash=hashes[train_index])
    while commitdate < startdate:
        train_index += 1
        commitdate = datetime_of_commit(path, hash=hashes[train_index])

    # TODO: Last few commits are not used
    while tsplit + gap + testdur < enddate:
        # Set test index to correspond to appropriate date
        test_index = train_index
        commitdate = datetime_of_commit(path, hash=hashes[test_index])
        while commitdate < tsplit + gap:
            test_index += 1
            commitdate = datetime_of_commit(path, hash=hashes[test_index])

        # Build training set
        commitdate = datetime_of_commit(path, hash=hashes[train_index])
        while commitdate < tsplit:
            training.append(hashes[train_index])
            train_index += 1
            commitdate = datetime_of_commit(path, hash=hashes[train_index])
        trainsets.append(list(training))

        # Build test set
        testing = []
        commitdate = datetime_of_commit(path, hash=hashes[test_index])
        while commitdate < tsplit + gap + testdur:
            testing.append(hashes[test_index])
            test_index += 1
            commitdate = datetime_of_commit(path, hash=hashes[test_index])
        testsets.append(list(testing))

        # Print stuff
        print(str(len(training)) + ' ' + str(len(testing)))

        # Loop update
        tsplit += update

    # Write results to file
    with open('trainsets.json', 'w') as f:
        f.write(json.dumps(trainsets))
    with open('testsets.json', 'w') as f:
        f.write(json.dumps(testsets))

# Returns date of specific commit given a hash
# OR date of first commit result given a command
def datetime_of_commit(path, hash=None, command=None):
    # Check that either hash or command parameter has a value
    if hash:
        command = ['git', 'show', '--quiet', '--date=iso', hash]
    elif command:
        if command[0] != 'git':
            raise ValueError('Not a git command')
        elif '--date=iso' not in command:
            raise ValueError('Command needs to specify --date=iso')
    else:
        raise ValueError('Either hash or command parameter is needed')

    # Get date of commit
    res = subprocess.run(command, cwd=path, stdout=subprocess.PIPE)
    gitlog = res.stdout.decode('utf-8', errors='ignore')
    match = re.search('(?<=\nDate:   )[0-9-+: ]+(?=\n)', gitlog).group(0)
    date = datetime.strptime(match, '%Y-%m-%d %H:%M:%S %z')
    return date

if __name__ == '__main__':
    build_sets('/home/kristiab/Git/jenkins')