#!/bin/bash

## How to Use
# Takes 0 inputs, and outputs the data from our techniques.
# Can also take optional flags to run subset of commands, see -h output.

## Fields to Maintain
# git repos of projects we analyze
# N.b: "webrender_bindings", the only Rust code without its own git repo,
#      is handled specially in the last step of this script.
declare -A repos
repos["firefox"]='https://github.com/mozilla/gecko-dev.git'
repos["webrender"]='https://github.com/servo/webrender.git'
repos["servo"]='https://github.com/jtracey/servo-mirror.git'
repos["mp4parse-rust"]='https://github.com/mozilla/mp4parse-rust.git'
#repos["qcms"]='https://github.com/FirefoxGraphics/qcms.git'
repos["encoding_rs"]='https://github.com/hsivonen/encoding_rs.git'
repos["mapped_hyph"]='https://github.com/jfkthame/mapped_hyph.git'
#repos["chardetng"]='https://github.com/hsivonen/chardetng.git'
#repos["shift_or_euc"]='https://github.com/hsivonen/shift_or_euc'
repos["cubeb-coreaudio-rs"]='https://github.com/mozilla/cubeb-coreaudio-rs.git'

# git hash of the most recent commit in the tree we consider
declare -A commits
commits["firefox"]=e5d3122984cea27576ad55b9898f2ec46529c5c9
commits["webrender"]=cb2b55394892ef9ea1e89dbe41fd3a8cebd61468
commits["servo"]=b1578947ef369a1810d1a83373f68bfd7fe23fe1
commits["mp4parse-rust"]=4f70fc9ec2b43f17003c476dcc0ad1737ae100dc
#commits["qcms"]=f2fdcde3912967fa06a5fff0957eebc7901c0645
commits["encoding_rs"]=a962ef4f8e569ccf5a22104d19cc10e8a0b458e6
commits["mapped_hyph"]=c7651a0cffff41996ad13c44f689bd9cd2192c01
#commits["chardetng"]=143dadde20e283a46ef33ba960b517a3283a3d22
commits["cubeb-coreaudio-rs"]=3ea3897147fa52ee3586b81d6d48315f0fba2777

# whitespace-separated list of C++ projects to analyze
cprojects="layers css stagefright qcms uconv hyphenation japanese-encoding cubeb-macos"

# whitespace-separated list of Rust projects to analyze
# (webrender_bindings is in gecko, stylo is in the servo git repo)
# you would also need to update the following scripts/files:
# /code/fetch_bugzilla_bugs/fetch_bugs.sh
# /code/fetch_bugzilla_bugs/filter.sh
# /data/hand-annotated/relevant-dirs.csv
rustprojects="webrender webrender_bindings servo mp4parse-rust encoding_rs mapped_hyph cubeb-coreaudio-rs"

## Project Directory Structure
# (data dirs are made prior to being populated)
# directory this script is in
scriptDir="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)"
# root dir of this project
rootDir="$(dirname $scriptDir)"
# dir that stores all code, scripts, etc. (that we run, not analyze)
codeDir="$rootDir/code/"
# dir where we store all generated results and the few hand-annotated files
dataDir="$rootDir/data/"
# dir to clone repos into
repoDir="$dataDir/repos/"
# dir to store structured representation of issues
issuesDir="$dataDir/issues/"
# dir to store structured representation of fixes
fixesDir="$dataDir/fixes/"
# dir to store the output of SZZ
resultsDir="$dataDir/szz-results/"
# dir to store our best guess of which commits introduced which bugs
bugIntroDir="$dataDir/introducers/"
# dir to store the results of comparison of SZZ with available ground truth
groundDir="$dataDir/ground-truth/"
# dir where we store hand-annotated data
# (i.e., data not generated by this script)
annotatedDir="$dataDir/hand-annotated/"
# dir where we store structured data about issues, their fixes, and inducers
bugzillaDir="$dataDir/bugzilla-data/"
# dir where we store the experience of each contributor for each project
experienceDir="$dataDir/experiences/"
# dir where we store the learning curve plots
plotDir="$dataDir/learning-curve-plots/"

set -e

if (($# == 0)); then
    allFlag=true
fi
while getopts hacdistuv opt; do
    case $opt in
        h)
            echo "usage: $0 [flags]"
            echo "  -h   print this help and exit"
            echo "  -a   run all steps (this is the default if no args given)"
            echo "  -c   clone each project git repo into $repoDir, if it doesn't already exist"
            echo "  -d   download relevant issues"
            echo "  -i   identify fix commits"
            echo "  -s   run SZZ"
            echo "  -t   compare to ground truth"
            echo "  -u   update experience files"
            echo "  -v   create and visualize learning curves"
            exit
            ;;
        a) allFlag=true ;;
        c) cloneFlag=true ;;
        d) downloadFlag=true ;;
        i) identifyFlag=true ;;
        s) szzFlag=true ;;
        t) truthFlag=true ;;
        u) experienceFlag=true ;;
        v) learningFlag=true ;;
        \?) echo "unknown flag: -$OPTARG" >&2 ;;
    esac
done


# Step -1: check dependencies (only if running everything)
if [ "$allFlag" = true ] ; then
    missing=false
    for dependency in git jq python3 gradle java javac ; do
                      if ! type "$dependency" > /dev/null; then
                          echo "Missing dependency: $dependency" >&2
                          missing=true
                      fi
    done
    for pyMod in numpy scipy matplotlib.pyplot urllib3 ; do
        if ! python3 -c "import $pyMod" > /dev/null; then
            echo "Missing python module: $pyMod"
            missing=true
        fi
    done
    # dvipng texlive-latex-extra texlive-fonts-recommended cm-super
    if [ "$missing" = true ] ; then
        echo "Aborting due to missing dependencies." >&2
        exit 1
    fi
    git_major=$(git --version | cut -f1 -d.)
    git_minor=$(git --version | cut -f2 -d.)
    if [ $git_major -lt 2 ] || [ $git_minor -lt 37 ] ; then
        echo "Aborting: git needs --since-as-filter, added in 2.37" >&2
        exit 1
    fi
fi


# Step 0: for each repo that doesn't already exist:
#  - clone
#  - uncap rename limits
#  - generate the .mailmap file
for repo in "${!repos[@]}" ; do
    if ! [ -d "$repoDir/$repo" ] && ([ "$allFlag" = true ] || [ "$cloneFlag" = true ])
    then
        mkdir -p "$repoDir"
        git clone "${repos[$repo]}" "$repoDir/$repo"
        cd "$repoDir/$repo"
        git config diff.renameLimit 0
        git checkout "${commits[$repo]}"
        python3 "$codeDir/author-identities.py" . | sort > .mailmap
        cd "$rootDir"
    elif [ "$cloneFlag" = true ] ; then
        echo "You're trying to clone $repo, but it seems to already exist." >&2
        echo "Remove or move $repoDir/$repo if you really want to clone it again." >&2
        echo "Continuing as though this succeeded." >&2
    fi
done


# Step 1: get (filtered) issues
if [ "$allFlag" = true ] || [ "$downloadFlag" = true ] ; then
    # Pulls and structures bug data we need, as available from Bugzilla
    "$codeDir"/fetch_bugzilla_bugs/fetch_bugs.sh "$codeDir" "$issuesDir"
    # Additional filters based on conditions not visible in Bugzilla metadata
    "$codeDir"/fetch_bugzilla_bugs/filter.sh "$codeDir" "$issuesDir" "$repoDir/firefox"
fi


# Step 2: identify fix commits
if [ "$allFlag" = true ] || [ "$identifyFlag" = true ] ; then
    mkdir -p "$fixesDir"
    cd "$dataDir"
    for project in $cprojects ; do
        echo "getting $project fixes"
        python3 "$codeDir/fetch_bugzilla_bugs/find_bugzilla_fixes.py" \
                --git-path="$repoDir/firefox" \
                --issue-list="$issuesDir/$project-issues/"
        mv issue_list.json "$fixesDir/$project.json"
    done
    cd "$rootDir"
fi


# Step 3: run SZZ
if [ "$allFlag" = true ] || [ "$szzFlag" = true ] ; then
    cd "$codeDir/szz"
    mkdir -p "$resultsDir"
    gradle fatJar
    for project in $cprojects ; do
        echo "running SZZ on $project"
        rm -rf "$resultsDir/$project"
        java -Xmx5g -jar ./build/libs/szz_find_bug_introducers-0.1.jar -i \
             "$fixesDir/$project.json" -r "$repoDir/firefox" -d 1
        mv results "$resultsDir/$project"
        rm -r issues
    done
    cd "$rootDir"
fi


# Step 4: compare to ground truth
if [ "$allFlag" = true ] || [ "$truthFlag" = true ] ; then
    #for project in $cprojects ; do
    #    echo "pulling approval-reqs for $project"
    #    "$codeDir"/fetch_bugzilla_bugs/fetch-all-approval-reqs.sh \
    #              "$fixesDir/$project.json" "$repoDir/firefox" "$bugzillaDir" "$codeDir"
    #done
    cd "$resultsDir"
    mkdir -p "$groundDir"
    mkdir -p "$bugIntroDir"
    for project in $cprojects ; do
        echo "comparing results for $project"
        mkdir -p "$bugIntroDir/$project"
        "$codeDir"/compare_results-v2.sh \
                  "$project" \
                  "$fixesDir/$project.json" \
                  "$resultsDir/$project/fix_and_introducers_pairs.json" \
                  "$annotatedDir/c++.csv" \
                  "$bugIntroDir/$project/" \
                  > "$groundDir/$project.csv"
    done
    cd "$rootDir"
fi


# Step 5: update/generate experience files
if [ "$allFlag" = true ] || [ "$experienceFlag" = true ] ; then
    mkdir -p "$plotDir"
    for repo in "${!repos[@]}" ; do
        rm -rf "$experienceDir/$repo"
        mkdir -p "$experienceDir/$repo"
        "$codeDir"/learning_curves/genExp.sh "$repoDir/$repo" \
                  "${commits[$repo]}" "$experienceDir/$repo"
    done
fi


# Step 6: generate and plot learning curve
if [ "$allFlag" = true ] || [ "$learningFlag" = true ] ; then
    rm -rf "$repoDir/webrender_bindings" "$experienceDir/webrender_bindings"
    ln -s "$repoDir/firefox" "$repoDir/webrender_bindings"
    ln -s "$experienceDir/firefox" "$experienceDir/webrender_bindings"
    echo "creating and plotting learning curves..."
    "$codeDir/learning_curves/grid_search.sh" \
        "$codeDir" "$dataDir" "$annotatedDir" "$repoDir" \
        "$experienceDir" "$plotDir" "$rustprojects"
fi