grid_search.sh 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. #!/bin/bash
  2. CODE_DIR="$1"
  3. DATA_DIR="$2"
  4. ANNOTATED_DIR="$3"
  5. REPO_DIR="$4"
  6. EXP_DIR="$5"
  7. PLOT_DIR="$6"
  8. RUST_PROJECTS="$7"
  9. GRAD_DESCENT_DIR="$DATA_DIR/gradient_descent/"
  10. mkdir -p "$GRAD_DESCENT_DIR"
  11. # assumes:
  12. # "$ANNOTATED_DIR/rust-blame.csv"
  13. # "$ANNOTATED_DIR/relevant-dirs.csv"
  14. # "$REPO_DIR/firefox"
  15. # "$EXP_DIR/firefox"
  16. # "$REPO_DIR/{$RUST_PROJECTS}"
  17. # "$EXP_DIR/{$RUST_PROJECTS}"
  18. t1_guesses="0.000001:0.00001:0.0001:0.001:0.01:0.1:0.2:0.4:0.8:0.99"
  19. l_guesses_1="0.0:0.1:0.2:0.3:0.4:0.5"
  20. l_guesses_2="-0.1:-0.2:-0.3:-0.4:-0.5"
  21. rust_repos=$(for project in $RUST_PROJECTS ; do
  22. echo -n "$REPO_DIR/$project/:"
  23. done | head -c -1)
  24. rust_exps=$(for project in $RUST_PROJECTS ; do
  25. echo -n "$EXP_DIR/$project/:"
  26. done | head -c -1)
  27. rust_relevant=$(for project in $RUST_PROJECTS ; do
  28. echo -n "$(grep "^$project," "$ANNOTATED_DIR/relevant-dirs.csv" | cut -d, -f2):"
  29. done | head -c -1)
  30. # mozilla is kind enough to have no files with spaces in names in gecko-dev
  31. c_relevant="$(tr '\n' ' ' <"$ANNOTATED_DIR/relevant-c++")"
  32. echo "running gradient descent (this will take a while)..."
  33. (
  34. python3 "$CODE_DIR/learning_curves/learningCurve_gradientDescent.py" \
  35. "$ANNOTATED_DIR/rust-blame.csv" \
  36. "$rust_repos" \
  37. "$rust_relevant" \
  38. "$rust_exps" \
  39. "$t1_guesses" \
  40. "$l_guesses_1" > "$GRAD_DESCENT_DIR/grid_search.rust.1.cuml.txt" &&
  41. echo "thread1 rust complete"
  42. ) &
  43. (
  44. python3 "$CODE_DIR/learning_curves/learningCurve_gradientDescent.py" \
  45. "$ANNOTATED_DIR/rust-blame.csv" \
  46. "$rust_repos" \
  47. "$rust_relevant" \
  48. "$rust_exps" \
  49. "$t1_guesses" \
  50. "$l_guesses_2" > "$GRAD_DESCENT_DIR/grid_search.rust.2.cuml.txt" &&
  51. echo "thread2 rust complete"
  52. ) &
  53. if [ $(nproc) -lt 4 ] ; then wait ; fi
  54. (
  55. python3 "$CODE_DIR/learning_curves/learningCurve_gradientDescent.py" \
  56. "$ANNOTATED_DIR/c++-blame.csv" \
  57. "$REPO_DIR/firefox/" \
  58. "$c_relevant" \
  59. "$EXP_DIR/firefox/" \
  60. "$t1_guesses" \
  61. "$l_guesses_1" > "$GRAD_DESCENT_DIR/grid_search.c.1.cuml.txt" &&
  62. echo "thread1 c complete"
  63. ) &
  64. (
  65. python3 "$CODE_DIR/learning_curves/learningCurve_gradientDescent.py" \
  66. "$ANNOTATED_DIR/c++-blame.csv" \
  67. "$REPO_DIR/firefox/" \
  68. "$c_relevant" \
  69. "$EXP_DIR/firefox/" \
  70. "$t1_guesses" \
  71. "$l_guesses_2" > "$GRAD_DESCENT_DIR/grid_search.c.2.cuml.txt" &&
  72. echo "thread2 c complete"
  73. ) &
  74. wait
  75. echo "processing complete"
  76. echo
  77. cat "$GRAD_DESCENT_DIR/grid_search.rust.1.cuml.txt" "$GRAD_DESCENT_DIR/grid_search.rust.2.cuml.txt" > "$GRAD_DESCENT_DIR/grid_search.rust.cuml.txt"
  78. cat "$GRAD_DESCENT_DIR/grid_search.c.1.cuml.txt" "$GRAD_DESCENT_DIR/grid_search.c.2.cuml.txt" > "$GRAD_DESCENT_DIR/grid_search.c.cuml.txt"
  79. rm "$GRAD_DESCENT_DIR/grid_search.rust.1.cuml.txt" "$GRAD_DESCENT_DIR/grid_search.rust.2.cuml.txt" "$GRAD_DESCENT_DIR/grid_search.c.1.cuml.txt" "$GRAD_DESCENT_DIR/grid_search.c.2.cuml.txt"
  80. function get_t1_l() {
  81. bias="$1"
  82. file="$2"
  83. best=$(grep "^$bias" "$file" | sort -nk 8 | head -1)
  84. t1=$(echo $best | cut -f4 -d' ')
  85. l=$(echo $best | cut -f5 -d' ')
  86. echo $t1 $l
  87. }
  88. c_t1_l="$(get_t1_l 0 "$GRAD_DESCENT_DIR/grid_search.c.cuml.txt")"
  89. c_t1_l_err_low="$(get_t1_l -1 "$GRAD_DESCENT_DIR/grid_search.c.cuml.txt")"
  90. c_t1_l_err_up="$(get_t1_l 1 "$GRAD_DESCENT_DIR/grid_search.c.cuml.txt")"
  91. c_t1="$(echo $c_t1_l | cut -f1 -d' ')"
  92. c_l="$(echo $c_t1_l | cut -f2 -d' ')"
  93. c_t1_err_low="$(echo $c_t1_l_err_low | cut -f1 -d' ')"
  94. c_l_err_low="$(echo $c_t1_l_err_low | cut -f2 -d' ')"
  95. c_t1_err_up="$(echo $c_t1_l_err_up | cut -f1 -d' ')"
  96. c_l_err_up="$(echo $c_t1_l_err_up | cut -f2 -d' ')"
  97. echo "C++ T1: $c_t1 ($c_t1_err_low, $c_t1_err_up)"
  98. echo "C++ l: $c_l ($c_l_err_low, $c_l_err_up)"
  99. rust_t1_l="$(get_t1_l 0 "$GRAD_DESCENT_DIR/grid_search.rust.cuml.txt")"
  100. rust_t1_l_err_low="$(get_t1_l -1 "$GRAD_DESCENT_DIR/grid_search.rust.cuml.txt")"
  101. rust_t1_l_err_up="$(get_t1_l 1 "$GRAD_DESCENT_DIR/grid_search.rust.cuml.txt")"
  102. rust_t1="$(echo $rust_t1_l | cut -f1 -d' ')"
  103. rust_l="$(echo $rust_t1_l | cut -f2 -d' ')"
  104. rust_t1_err_low="$(echo $rust_t1_l_err_low | cut -f1 -d' ')"
  105. rust_l_err_low="$(echo $rust_t1_l_err_low | cut -f2 -d' ')"
  106. rust_t1_err_up="$(echo $rust_t1_l_err_up | cut -f1 -d' ')"
  107. rust_l_err_up="$(echo $rust_t1_l_err_up | cut -f2 -d' ')"
  108. echo "Rust T1: $rust_t1 ($rust_t1_err_low, $rust_t1_err_up)"
  109. echo "Rust l: $rust_l ($rust_l_err_low, $rust_l_err_up)"
  110. echo
  111. echo "plotting data..."
  112. python3 "$CODE_DIR/learning_curves/model-vs-real.py" \
  113. "$ANNOTATED_DIR/c++-blame.csv" \
  114. "$REPO_DIR/firefox/" \
  115. "$c_relevant" \
  116. "$EXP_DIR/firefox/" \
  117. "$PLOT_DIR/C++.pdf" \
  118. $c_t1 $c_l $c_t1_err_low $c_l_err_low $c_t1_err_up $c_l_err_up &
  119. python3 "$CODE_DIR/learning_curves/model-vs-real.py" \
  120. "$ANNOTATED_DIR/rust-blame.csv" \
  121. "$rust_repos" \
  122. "$rust_relevant" \
  123. "$rust_exps" \
  124. "$PLOT_DIR/Rust.pdf" \
  125. $rust_t1 $rust_l $rust_t1_err_low $rust_l_err_low \
  126. $rust_t1_err_up $rust_l_err_up &
  127. cpp_tex='C\nolinebreak\hspace{-.05em}\raisebox{.4ex}{\relsize{-3}{\textbf{+}}}\nolinebreak\hspace{-.10em}\raisebox{.4ex}{\relsize{-3}{\textbf{+}}}\xspace'
  128. python3 "$CODE_DIR/learning_curves/model-vs-model.py" \
  129. "$PLOT_DIR/C++-vs-Rust.pdf" \
  130. "$cpp_tex" \
  131. "$c_t1" "$c_l" "$c_t1_err_low" "$c_l_err_low" \
  132. "$c_t1_err_up" "$c_l_err_up" \
  133. "Rust" "$rust_t1" "$rust_l" "$rust_t1_err_low" "$rust_l_err_low" \
  134. "$rust_t1_err_up" "$rust_l_err_up" \
  135. 200
  136. python3 "$CODE_DIR/learning_curves/plot-experience.py" \
  137. "$REPO_DIR/firefox/" \
  138. "$c_relevant" \
  139. "$EXP_DIR/firefox/" \
  140. "$cpp_tex" \
  141. "$rust_repos" \
  142. "$rust_relevant" \
  143. "$rust_exps" \
  144. "Rust" \
  145. "$PLOT_DIR/experience.frac.pdf" \
  146. "$PLOT_DIR/experience.tot.pdf"
  147. wait
  148. echo "plotting complete"
  149. echo "calculating number of 0-exp. VCCs to flip sign of Rust l..."
  150. python3 "$CODE_DIR/learning_curves/learningCurve_gradientDescent.py" \
  151. "$ANNOTATED_DIR/rust-blame.csv" \
  152. "$rust_repos" \
  153. "$rust_relevant" \
  154. "$rust_exps" \
  155. "$t1_guesses" \
  156. "-0.3:-0.2:-0.1:0:0.1:0.2:0.3" \
  157. "search"