123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249 |
- #!/bin/bash
- ## How to Use
- # Takes 0 inputs, and outputs the data from our techniques.
- # Can also take optional flags to run subset of commands, see -h output.
- ## Fields to Maintain
- # git repos of projects we analyze
- # N.b: "webrender_bindings", the only Rust code without its own git repo,
- # is handled specially in the last step of this script.
- declare -A repos
- repos["firefox"]='https://github.com/mozilla/gecko-dev.git'
- repos["webrender"]='https://github.com/servo/webrender.git'
- repos["servo"]='https://github.com/jtracey/servo-mirror.git'
- repos["mp4parse-rust"]='https://github.com/mozilla/mp4parse-rust.git'
- #repos["qcms"]='https://github.com/FirefoxGraphics/qcms.git'
- repos["encoding_rs"]='https://github.com/hsivonen/encoding_rs.git'
- repos["mapped_hyph"]='https://github.com/jfkthame/mapped_hyph.git'
- #repos["chardetng"]='https://github.com/hsivonen/chardetng.git'
- #repos["shift_or_euc"]='https://github.com/hsivonen/shift_or_euc'
- repos["cubeb-coreaudio-rs"]='https://github.com/mozilla/cubeb-coreaudio-rs.git'
- # git hash of the most recent commit in the tree we consider
- declare -A commits
- commits["firefox"]=e5d3122984cea27576ad55b9898f2ec46529c5c9
- commits["webrender"]=cb2b55394892ef9ea1e89dbe41fd3a8cebd61468
- commits["servo"]=b1578947ef369a1810d1a83373f68bfd7fe23fe1
- commits["mp4parse-rust"]=4f70fc9ec2b43f17003c476dcc0ad1737ae100dc
- #commits["qcms"]=f2fdcde3912967fa06a5fff0957eebc7901c0645
- commits["encoding_rs"]=a962ef4f8e569ccf5a22104d19cc10e8a0b458e6
- commits["mapped_hyph"]=c7651a0cffff41996ad13c44f689bd9cd2192c01
- #commits["chardetng"]=143dadde20e283a46ef33ba960b517a3283a3d22
- commits["cubeb-coreaudio-rs"]=3ea3897147fa52ee3586b81d6d48315f0fba2777
- # whitespace-separated list of C++ projects to analyze
- cprojects="layers css stagefright qcms uconv hyphenation japanese-encoding cubeb-macos"
- # whitespace-separated list of Rust projects to analyze
- # (webrender_bindings is in gecko, stylo is in the servo git repo)
- # you would also need to update the following scripts/files:
- # /code/fetch_bugzilla_bugs/fetch_bugs.sh
- # /code/fetch_bugzilla_bugs/filter.sh
- # /data/hand-annotated/relevant-dirs.csv
- rustprojects="webrender webrender_bindings servo mp4parse-rust encoding_rs mapped_hyph cubeb-coreaudio-rs"
- ## Project Directory Structure
- # (data dirs are made prior to being populated)
- # directory this script is in
- scriptDir="$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)"
- # root dir of this project
- rootDir="$(dirname $scriptDir)"
- # dir that stores all code, scripts, etc. (that we run, not analyze)
- codeDir="$rootDir/code/"
- # dir where we store all generated results and the few hand-annotated files
- dataDir="$rootDir/data/"
- # dir to clone repos into
- repoDir="$dataDir/repos/"
- # dir to store structured representation of issues
- issuesDir="$dataDir/issues/"
- # dir to store structured representation of fixes
- fixesDir="$dataDir/fixes/"
- # dir to store the output of SZZ
- resultsDir="$dataDir/szz-results/"
- # dir to store our best guess of which commits introduced which bugs
- bugIntroDir="$dataDir/introducers/"
- # dir to store the results of comparison of SZZ with available ground truth
- groundDir="$dataDir/ground-truth/"
- # dir where we store hand-annotated data
- # (i.e., data not generated by this script)
- annotatedDir="$dataDir/hand-annotated/"
- # dir where we store structured data about issues, their fixes, and inducers
- bugzillaDir="$dataDir/bugzilla-data/"
- # dir where we store the experience of each contributor for each project
- experienceDir="$dataDir/experiences/"
- # dir where we store the learning curve plots
- plotDir="$dataDir/learning-curve-plots/"
- set -e
- if (($# == 0)); then
- allFlag=true
- fi
- while getopts hacdistuv opt; do
- case $opt in
- h)
- echo "usage: $0 [flags]"
- echo " -h print this help and exit"
- echo " -a run all steps (this is the default if no args given)"
- echo " -c clone each project git repo into $repoDir, if it doesn't already exist"
- echo " -d download relevant issues"
- echo " -i identify fix commits"
- echo " -s run SZZ"
- echo " -t compare to ground truth"
- echo " -u update experience files"
- echo " -v create and visualize learning curves"
- exit
- ;;
- a) allFlag=true ;;
- c) cloneFlag=true ;;
- d) downloadFlag=true ;;
- i) identifyFlag=true ;;
- s) szzFlag=true ;;
- t) truthFlag=true ;;
- u) experienceFlag=true ;;
- v) learningFlag=true ;;
- \?) echo "unknown flag: -$OPTARG" >&2 ;;
- esac
- done
- # Step -1: check dependencies (only if running everything)
- if [ "$allFlag" = true ] ; then
- missing=false
- for dependency in git jq python3 gradle java javac ; do
- if ! type "$dependency" > /dev/null; then
- echo "Missing dependency: $dependency" >&2
- missing=true
- fi
- done
- for pyMod in numpy scipy matplotlib.pyplot urllib3 ; do
- if ! python3 -c "import $pyMod" > /dev/null; then
- echo "Missing python module: $pyMod"
- missing=true
- fi
- done
- # dvipng texlive-latex-extra texlive-fonts-recommended cm-super
- if [ "$missing" = true ] ; then
- echo "Aborting due to missing dependencies." >&2
- exit 1
- fi
- git_major=$(git --version | cut -f1 -d.)
- git_minor=$(git --version | cut -f2 -d.)
- if [ $git_major -lt 2 ] || [ $git_minor -lt 37 ] ; then
- echo "Aborting: git needs --since-as-filter, added in 2.37" >&2
- exit 1
- fi
- fi
- # Step 0: for each repo that doesn't already exist:
- # - clone
- # - uncap rename limits
- # - generate the .mailmap file
- for repo in "${!repos[@]}" ; do
- if ! [ -d "$repoDir/$repo" ] && ([ "$allFlag" = true ] || [ "$cloneFlag" = true ])
- then
- mkdir -p "$repoDir"
- git clone "${repos[$repo]}" "$repoDir/$repo"
- cd "$repoDir/$repo"
- git config diff.renameLimit 0
- git checkout "${commits[$repo]}"
- python3 "$codeDir/author-identities.py" . | sort > .mailmap
- cd "$rootDir"
- elif [ "$cloneFlag" = true ] ; then
- echo "You're trying to clone $repo, but it seems to already exist." >&2
- echo "Remove or move $repoDir/$repo if you really want to clone it again." >&2
- echo "Continuing as though this succeeded." >&2
- fi
- done
- # Step 1: get (filtered) issues
- if [ "$allFlag" = true ] || [ "$downloadFlag" = true ] ; then
- # Pulls and structures bug data we need, as available from Bugzilla
- "$codeDir"/fetch_bugzilla_bugs/fetch_bugs.sh "$codeDir" "$issuesDir"
- # Additional filters based on conditions not visible in Bugzilla metadata
- "$codeDir"/fetch_bugzilla_bugs/filter.sh "$codeDir" "$issuesDir" "$repoDir/firefox"
- fi
- # Step 2: identify fix commits
- if [ "$allFlag" = true ] || [ "$identifyFlag" = true ] ; then
- mkdir -p "$fixesDir"
- cd "$dataDir"
- for project in $cprojects ; do
- echo "getting $project fixes"
- python3 "$codeDir/fetch_bugzilla_bugs/find_bugzilla_fixes.py" \
- --git-path="$repoDir/firefox" \
- --issue-list="$issuesDir/$project-issues/"
- mv issue_list.json "$fixesDir/$project.json"
- done
- cd "$rootDir"
- fi
- # Step 3: run SZZ
- if [ "$allFlag" = true ] || [ "$szzFlag" = true ] ; then
- cd "$codeDir/szz"
- mkdir -p "$resultsDir"
- gradle fatJar
- for project in $cprojects ; do
- echo "running SZZ on $project"
- rm -rf "$resultsDir/$project"
- java -Xmx5g -jar ./build/libs/szz_find_bug_introducers-0.1.jar -i \
- "$fixesDir/$project.json" -r "$repoDir/firefox" -d 1
- mv results "$resultsDir/$project"
- rm -r issues
- done
- cd "$rootDir"
- fi
- # Step 4: compare to ground truth
- if [ "$allFlag" = true ] || [ "$truthFlag" = true ] ; then
- #for project in $cprojects ; do
- # echo "pulling approval-reqs for $project"
- # "$codeDir"/fetch_bugzilla_bugs/fetch-all-approval-reqs.sh \
- # "$fixesDir/$project.json" "$repoDir/firefox" "$bugzillaDir" "$codeDir"
- #done
- cd "$resultsDir"
- mkdir -p "$groundDir"
- mkdir -p "$bugIntroDir"
- for project in $cprojects ; do
- echo "comparing results for $project"
- mkdir -p "$bugIntroDir/$project"
- "$codeDir"/compare_results-v2.sh \
- "$project" \
- "$fixesDir/$project.json" \
- "$resultsDir/$project/fix_and_introducers_pairs.json" \
- "$annotatedDir/c++.csv" \
- "$bugIntroDir/$project/" \
- > "$groundDir/$project.csv"
- done
- cd "$rootDir"
- fi
- # Step 5: update/generate experience files
- if [ "$allFlag" = true ] || [ "$experienceFlag" = true ] ; then
- mkdir -p "$plotDir"
- for repo in "${!repos[@]}" ; do
- rm -rf "$experienceDir/$repo"
- mkdir -p "$experienceDir/$repo"
- "$codeDir"/learning_curves/genExp.sh "$repoDir/$repo" \
- "${commits[$repo]}" "$experienceDir/$repo"
- done
- fi
- # Step 6: generate and plot learning curve
- if [ "$allFlag" = true ] || [ "$learningFlag" = true ] ; then
- rm -rf "$repoDir/webrender_bindings" "$experienceDir/webrender_bindings"
- ln -s "$repoDir/firefox" "$repoDir/webrender_bindings"
- ln -s "$experienceDir/firefox" "$experienceDir/webrender_bindings"
- echo "creating and plotting learning curves..."
- "$codeDir/learning_curves/grid_search.sh" \
- "$codeDir" "$dataDir" "$annotatedDir" "$repoDir" \
- "$experienceDir" "$plotDir" "$rustprojects"
- fi
|