#!/bin/bash ## How to Use # Takes 0 inputs, and outputs the data from our techniques. # Can also take optional flags to run subset of commands, see -h output. ## Fields to Maintain # git repos of projects we analyze # N.b: "webrender_bindings", the only Rust code without its own git repo, # is handled specially in the last step of this script. declare -A repos repos["firefox"]='https://github.com/mozilla/gecko-dev.git' repos["webrender"]='https://github.com/servo/webrender.git' repos["servo"]='https://github.com/jtracey/servo-mirror.git' repos["mp4parse-rust"]='https://github.com/mozilla/mp4parse-rust.git' #repos["qcms"]='https://github.com/FirefoxGraphics/qcms.git' repos["encoding_rs"]='https://github.com/hsivonen/encoding_rs.git' repos["mapped_hyph"]='https://github.com/jfkthame/mapped_hyph.git' #repos["chardetng"]='https://github.com/hsivonen/chardetng.git' #repos["shift_or_euc"]='https://github.com/hsivonen/shift_or_euc' repos["cubeb-coreaudio-rs"]='https://github.com/mozilla/cubeb-coreaudio-rs.git' # git hash of the most recent commit in the tree we consider declare -A commits commits["firefox"]=e5d3122984cea27576ad55b9898f2ec46529c5c9 commits["webrender"]=cb2b55394892ef9ea1e89dbe41fd3a8cebd61468 commits["servo"]=b1578947ef369a1810d1a83373f68bfd7fe23fe1 commits["mp4parse-rust"]=4f70fc9ec2b43f17003c476dcc0ad1737ae100dc #commits["qcms"]=f2fdcde3912967fa06a5fff0957eebc7901c0645 commits["encoding_rs"]=a962ef4f8e569ccf5a22104d19cc10e8a0b458e6 commits["mapped_hyph"]=c7651a0cffff41996ad13c44f689bd9cd2192c01 #commits["chardetng"]=143dadde20e283a46ef33ba960b517a3283a3d22 commits["cubeb-coreaudio-rs"]=3ea3897147fa52ee3586b81d6d48315f0fba2777 # whitespace-separated list of C++ projects to analyze cprojects="layers css stagefright qcms uconv hyphenation japanese-encoding cubeb-macos" # whitespace-separated list of Rust projects to analyze # (webrender_bindings is in gecko, stylo is in the servo git repo) # you would also need to update the following scripts/files: # /code/fetch_bugzilla_bugs/fetch_bugs.sh # /code/fetch_bugzilla_bugs/filter.sh # /data/hand-annotated/relevant-dirs.csv rustprojects="webrender webrender_bindings servo mp4parse-rust encoding_rs mapped_hyph cubeb-coreaudio-rs" ## Project Directory Structure # (data dirs are made prior to being populated) # directory this script is in scriptDir="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)" # root dir of this project rootDir="$(dirname $scriptDir)" # dir that stores all code, scripts, etc. (that we run, not analyze) codeDir="$rootDir/code/" # dir where we store all generated results and the few hand-annotated files dataDir="$rootDir/data/" # dir to clone repos into repoDir="$dataDir/repos/" # dir to store structured representation of issues issuesDir="$dataDir/issues/" # dir to store structured representation of fixes fixesDir="$dataDir/fixes/" # dir to store the output of SZZ resultsDir="$dataDir/szz-results/" # dir to store our best guess of which commits introduced which bugs bugIntroDir="$dataDir/introducers/" # dir to store the results of comparison of SZZ with available ground truth groundDir="$dataDir/ground-truth/" # dir where we store hand-annotated data # (i.e., data not generated by this script) annotatedDir="$dataDir/hand-annotated/" # dir where we store structured data about issues, their fixes, and inducers bugzillaDir="$dataDir/bugzilla-data/" # dir where we store the experience of each contributor for each project experienceDir="$dataDir/experiences/" # dir where we store the learning curve plots plotDir="$dataDir/learning-curve-plots/" set -e if (($# == 0)); then allFlag=true fi while getopts hacdistuv opt; do case $opt in h) echo "usage: $0 [flags]" echo " -h print this help and exit" echo " -a run all steps (this is the default if no args given)" echo " -c clone each project git repo into $repoDir, if it doesn't already exist" echo " -d download relevant issues" echo " -i identify fix commits" echo " -s run SZZ" echo " -t compare to ground truth" echo " -u update experience files" echo " -v create and visualize learning curves" exit ;; a) allFlag=true ;; c) cloneFlag=true ;; d) downloadFlag=true ;; i) identifyFlag=true ;; s) szzFlag=true ;; t) truthFlag=true ;; u) experienceFlag=true ;; v) learningFlag=true ;; \?) echo "unknown flag: -$OPTARG" >&2 ;; esac done # Step -1: check dependencies (only if running everything) if [ "$allFlag" = true ] ; then missing=false for dependency in git jq python3 gradle java javac ; do if ! type "$dependency" > /dev/null; then echo "Missing dependency: $dependency" >&2 missing=true fi done for pyMod in numpy scipy matplotlib.pyplot urllib3 ; do if ! python3 -c "import $pyMod" > /dev/null; then echo "Missing python module: $pyMod" missing=true fi done # dvipng texlive-latex-extra texlive-fonts-recommended cm-super if [ "$missing" = true ] ; then echo "Aborting due to missing dependencies." >&2 exit 1 fi git_major=$(git --version | cut -f1 -d.) git_minor=$(git --version | cut -f2 -d.) if [ $git_major -lt 2 ] || [ $git_minor -lt 37 ] ; then echo "Aborting: git needs --since-as-filter, added in 2.37" >&2 exit 1 fi fi # Step 0: for each repo that doesn't already exist: # - clone # - uncap rename limits # - generate the .mailmap file for repo in "${!repos[@]}" ; do if ! [ -d "$repoDir/$repo" ] && ([ "$allFlag" = true ] || [ "$cloneFlag" = true ]) then mkdir -p "$repoDir" git clone "${repos[$repo]}" "$repoDir/$repo" cd "$repoDir/$repo" git config diff.renameLimit 0 git checkout "${commits[$repo]}" python3 "$codeDir/author-identities.py" . | sort > .mailmap cd "$rootDir" elif [ "$cloneFlag" = true ] ; then echo "You're trying to clone $repo, but it seems to already exist." >&2 echo "Remove or move $repoDir/$repo if you really want to clone it again." >&2 echo "Continuing as though this succeeded." >&2 fi done # Step 1: get (filtered) issues if [ "$allFlag" = true ] || [ "$downloadFlag" = true ] ; then # Pulls and structures bug data we need, as available from Bugzilla "$codeDir"/fetch_bugzilla_bugs/fetch_bugs.sh "$codeDir" "$issuesDir" # Additional filters based on conditions not visible in Bugzilla metadata "$codeDir"/fetch_bugzilla_bugs/filter.sh "$codeDir" "$issuesDir" "$repoDir/firefox" fi # Step 2: identify fix commits if [ "$allFlag" = true ] || [ "$identifyFlag" = true ] ; then mkdir -p "$fixesDir" cd "$dataDir" for project in $cprojects ; do echo "getting $project fixes" python3 "$codeDir/fetch_bugzilla_bugs/find_bugzilla_fixes.py" \ --git-path="$repoDir/firefox" \ --issue-list="$issuesDir/$project-issues/" mv issue_list.json "$fixesDir/$project.json" done cd "$rootDir" fi # Step 3: run SZZ if [ "$allFlag" = true ] || [ "$szzFlag" = true ] ; then cd "$codeDir/szz" mkdir -p "$resultsDir" gradle fatJar for project in $cprojects ; do echo "running SZZ on $project" rm -rf "$resultsDir/$project" java -Xmx5g -jar ./build/libs/szz_find_bug_introducers-0.1.jar -i \ "$fixesDir/$project.json" -r "$repoDir/firefox" -d 1 mv results "$resultsDir/$project" rm -r issues done cd "$rootDir" fi # Step 4: compare to ground truth if [ "$allFlag" = true ] || [ "$truthFlag" = true ] ; then #for project in $cprojects ; do # echo "pulling approval-reqs for $project" # "$codeDir"/fetch_bugzilla_bugs/fetch-all-approval-reqs.sh \ # "$fixesDir/$project.json" "$repoDir/firefox" "$bugzillaDir" "$codeDir" #done cd "$resultsDir" mkdir -p "$groundDir" mkdir -p "$bugIntroDir" for project in $cprojects ; do echo "comparing results for $project" mkdir -p "$bugIntroDir/$project" "$codeDir"/compare_results-v2.sh \ "$project" \ "$fixesDir/$project.json" \ "$resultsDir/$project/fix_and_introducers_pairs.json" \ "$annotatedDir/c++.csv" \ "$bugIntroDir/$project/" \ > "$groundDir/$project.csv" done cd "$rootDir" fi # Step 5: update/generate experience files if [ "$allFlag" = true ] || [ "$experienceFlag" = true ] ; then mkdir -p "$plotDir" for repo in "${!repos[@]}" ; do rm -rf "$experienceDir/$repo" mkdir -p "$experienceDir/$repo" "$codeDir"/learning_curves/genExp.sh "$repoDir/$repo" \ "${commits[$repo]}" "$experienceDir/$repo" done fi # Step 6: generate and plot learning curve if [ "$allFlag" = true ] || [ "$learningFlag" = true ] ; then rm -rf "$repoDir/webrender_bindings" "$experienceDir/webrender_bindings" ln -s "$repoDir/firefox" "$repoDir/webrender_bindings" ln -s "$experienceDir/firefox" "$experienceDir/webrender_bindings" echo "creating and plotting learning curves..." "$codeDir/learning_curves/grid_search.sh" \ "$codeDir" "$dataDir" "$annotatedDir" "$repoDir" \ "$experienceDir" "$plotDir" "$rustprojects" fi