123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183 |
- #!/bin/bash
- CODE_DIR="$1"
- DATA_DIR="$2"
- ANNOTATED_DIR="$3"
- REPO_DIR="$4"
- EXP_DIR="$5"
- PLOT_DIR="$6"
- RUST_PROJECTS="$7"
- GRAD_DESCENT_DIR="$DATA_DIR/gradient_descent/"
- mkdir -p "$GRAD_DESCENT_DIR"
- # assumes:
- # "$ANNOTATED_DIR/rust-blame.csv"
- # "$ANNOTATED_DIR/relevant-dirs.csv"
- # "$REPO_DIR/firefox"
- # "$EXP_DIR/firefox"
- # "$REPO_DIR/{$RUST_PROJECTS}"
- # "$EXP_DIR/{$RUST_PROJECTS}"
- t1_guesses="0.000001:0.00001:0.0001:0.001:0.01:0.1:0.2:0.4:0.8:0.99"
- l_guesses_1="0.0:0.1:0.2:0.3:0.4:0.5"
- l_guesses_2="-0.1:-0.2:-0.3:-0.4:-0.5"
- rust_repos=$(for project in $RUST_PROJECTS ; do
- echo -n "$REPO_DIR/$project/:"
- done | head -c -1)
- rust_exps=$(for project in $RUST_PROJECTS ; do
- echo -n "$EXP_DIR/$project/:"
- done | head -c -1)
- rust_relevant=$(for project in $RUST_PROJECTS ; do
- echo -n "$(grep "^$project," "$ANNOTATED_DIR/relevant-dirs.csv" | cut -d, -f2):"
- done | head -c -1)
- # mozilla is kind enough to have no files with spaces in names in gecko-dev
- c_relevant="$(tr '\n' ' ' <"$ANNOTATED_DIR/relevant-c++")"
- echo "running gradient descent (this will take a while)..."
- (
- python3 "$CODE_DIR/learning_curves/learningCurve_gradientDescent.py" \
- "$ANNOTATED_DIR/rust-blame.csv" \
- "$rust_repos" \
- "$rust_relevant" \
- "$rust_exps" \
- "$t1_guesses" \
- "$l_guesses_1" > "$GRAD_DESCENT_DIR/grid_search.rust.1.cuml.txt" &&
- echo "thread1 rust complete"
- ) &
- (
- python3 "$CODE_DIR/learning_curves/learningCurve_gradientDescent.py" \
- "$ANNOTATED_DIR/rust-blame.csv" \
- "$rust_repos" \
- "$rust_relevant" \
- "$rust_exps" \
- "$t1_guesses" \
- "$l_guesses_2" > "$GRAD_DESCENT_DIR/grid_search.rust.2.cuml.txt" &&
- echo "thread2 rust complete"
- ) &
- if [ $(nproc) -lt 4 ] ; then wait ; fi
- (
- python3 "$CODE_DIR/learning_curves/learningCurve_gradientDescent.py" \
- "$ANNOTATED_DIR/c++-blame.csv" \
- "$REPO_DIR/firefox/" \
- "$c_relevant" \
- "$EXP_DIR/firefox/" \
- "$t1_guesses" \
- "$l_guesses_1" > "$GRAD_DESCENT_DIR/grid_search.c.1.cuml.txt" &&
- echo "thread1 c complete"
- ) &
- (
- python3 "$CODE_DIR/learning_curves/learningCurve_gradientDescent.py" \
- "$ANNOTATED_DIR/c++-blame.csv" \
- "$REPO_DIR/firefox/" \
- "$c_relevant" \
- "$EXP_DIR/firefox/" \
- "$t1_guesses" \
- "$l_guesses_2" > "$GRAD_DESCENT_DIR/grid_search.c.2.cuml.txt" &&
- echo "thread2 c complete"
- ) &
- wait
- echo "processing complete"
- echo
- cat "$GRAD_DESCENT_DIR/grid_search.rust.1.cuml.txt" "$GRAD_DESCENT_DIR/grid_search.rust.2.cuml.txt" > "$GRAD_DESCENT_DIR/grid_search.rust.cuml.txt"
- cat "$GRAD_DESCENT_DIR/grid_search.c.1.cuml.txt" "$GRAD_DESCENT_DIR/grid_search.c.2.cuml.txt" > "$GRAD_DESCENT_DIR/grid_search.c.cuml.txt"
- rm "$GRAD_DESCENT_DIR/grid_search.rust.1.cuml.txt" "$GRAD_DESCENT_DIR/grid_search.rust.2.cuml.txt" "$GRAD_DESCENT_DIR/grid_search.c.1.cuml.txt" "$GRAD_DESCENT_DIR/grid_search.c.2.cuml.txt"
- function get_t1_l() {
- bias="$1"
- file="$2"
- best=$(grep "^$bias" "$file" | sort -nk 8 | head -1)
- t1=$(echo $best | cut -f4 -d' ')
- l=$(echo $best | cut -f5 -d' ')
- echo $t1 $l
- }
- c_t1_l="$(get_t1_l 0 "$GRAD_DESCENT_DIR/grid_search.c.cuml.txt")"
- c_t1_l_err_low="$(get_t1_l -1 "$GRAD_DESCENT_DIR/grid_search.c.cuml.txt")"
- c_t1_l_err_up="$(get_t1_l 1 "$GRAD_DESCENT_DIR/grid_search.c.cuml.txt")"
- c_t1="$(echo $c_t1_l | cut -f1 -d' ')"
- c_l="$(echo $c_t1_l | cut -f2 -d' ')"
- c_t1_err_low="$(echo $c_t1_l_err_low | cut -f1 -d' ')"
- c_l_err_low="$(echo $c_t1_l_err_low | cut -f2 -d' ')"
- c_t1_err_up="$(echo $c_t1_l_err_up | cut -f1 -d' ')"
- c_l_err_up="$(echo $c_t1_l_err_up | cut -f2 -d' ')"
- echo "C++ T1: $c_t1 ($c_t1_err_low, $c_t1_err_up)"
- echo "C++ l: $c_l ($c_l_err_low, $c_l_err_up)"
- rust_t1_l="$(get_t1_l 0 "$GRAD_DESCENT_DIR/grid_search.rust.cuml.txt")"
- rust_t1_l_err_low="$(get_t1_l -1 "$GRAD_DESCENT_DIR/grid_search.rust.cuml.txt")"
- rust_t1_l_err_up="$(get_t1_l 1 "$GRAD_DESCENT_DIR/grid_search.rust.cuml.txt")"
- rust_t1="$(echo $rust_t1_l | cut -f1 -d' ')"
- rust_l="$(echo $rust_t1_l | cut -f2 -d' ')"
- rust_t1_err_low="$(echo $rust_t1_l_err_low | cut -f1 -d' ')"
- rust_l_err_low="$(echo $rust_t1_l_err_low | cut -f2 -d' ')"
- rust_t1_err_up="$(echo $rust_t1_l_err_up | cut -f1 -d' ')"
- rust_l_err_up="$(echo $rust_t1_l_err_up | cut -f2 -d' ')"
- echo "Rust T1: $rust_t1 ($rust_t1_err_low, $rust_t1_err_up)"
- echo "Rust l: $rust_l ($rust_l_err_low, $rust_l_err_up)"
- echo
- echo "plotting data..."
- python3 "$CODE_DIR/learning_curves/model-vs-real.py" \
- "$ANNOTATED_DIR/c++-blame.csv" \
- "$REPO_DIR/firefox/" \
- "$c_relevant" \
- "$EXP_DIR/firefox/" \
- "$PLOT_DIR/C++.pdf" \
- $c_t1 $c_l $c_t1_err_low $c_l_err_low $c_t1_err_up $c_l_err_up &
- python3 "$CODE_DIR/learning_curves/model-vs-real.py" \
- "$ANNOTATED_DIR/rust-blame.csv" \
- "$rust_repos" \
- "$rust_relevant" \
- "$rust_exps" \
- "$PLOT_DIR/Rust.pdf" \
- $rust_t1 $rust_l $rust_t1_err_low $rust_l_err_low \
- $rust_t1_err_up $rust_l_err_up &
- cpp_tex='C\nolinebreak\hspace{-.05em}\raisebox{.4ex}{\relsize{-3}{\textbf{+}}}\nolinebreak\hspace{-.10em}\raisebox{.4ex}{\relsize{-3}{\textbf{+}}}\xspace'
- python3 "$CODE_DIR/learning_curves/model-vs-model.py" \
- "$PLOT_DIR/C++-vs-Rust.pdf" \
- "$cpp_tex" \
- "$c_t1" "$c_l" "$c_t1_err_low" "$c_l_err_low" \
- "$c_t1_err_up" "$c_l_err_up" \
- "Rust" "$rust_t1" "$rust_l" "$rust_t1_err_low" "$rust_l_err_low" \
- "$rust_t1_err_up" "$rust_l_err_up" \
- 200
- python3 "$CODE_DIR/learning_curves/plot-experience.py" \
- "$REPO_DIR/firefox/" \
- "$c_relevant" \
- "$EXP_DIR/firefox/" \
- "$cpp_tex" \
- "$rust_repos" \
- "$rust_relevant" \
- "$rust_exps" \
- "Rust" \
- "$PLOT_DIR/experience.frac.pdf" \
- "$PLOT_DIR/experience.tot.pdf"
- wait
- echo "plotting complete"
- echo "calculating number of 0-exp. VCCs to flip sign of Rust l..."
- python3 "$CODE_DIR/learning_curves/learningCurve_gradientDescent.py" \
- "$ANNOTATED_DIR/rust-blame.csv" \
- "$rust_repos" \
- "$rust_relevant" \
- "$rust_exps" \
- "$t1_guesses" \
- "-0.3:-0.2:-0.1:0:0.1:0.2:0.3" \
- "search"
|