Переглянути джерело

Merge branch 'bug1772' into maint-0.2.2

Roger Dingledine 13 роки тому
батько
коміт
3cbe463e96
6 змінених файлів з 110 додано та 122 видалено
  1. 11 0
      changes/bug1772
  2. 78 59
      src/or/circuitbuild.c
  3. 1 0
      src/or/circuitbuild.h
  4. 10 10
      src/or/circuituse.c
  5. 4 33
      src/or/or.h
  6. 6 20
      src/test/test.c

+ 11 - 0
changes/bug1772

@@ -0,0 +1,11 @@
+ o Minor bugfixes:
+   - Simplify the logic that causes us to decide if the network is unavailable
+     for purposes of recording circuit build times. If we receive no cells
+     whatsoever for the entire duration of a circuit's full measured lifetime,
+     the network is probably down. This should hopefully reduce some of the
+     cases where we see ridiculous circuit build timeouts for people with spotty
+     wireless connections. Fixes bug 1772; bugfix on 0.2.2.2-alpha.
+   - Prevent the circuit build timeout from becoming larger than the maximum
+     build time we have ever seen. Also, prevent the measurement time period
+     from becoming larger than twice that value. Fixes bug 1772; bugfix on
+     0.2.2.2-alpha

+ 78 - 59
src/or/circuitbuild.c

@@ -149,6 +149,14 @@ circuit_build_times_min_circs_to_observe(void)
   return num;
 }
 
+/** Return true iff <b>cbt</b> has recorded enough build times that we
+ * want to start acting on the timeout it implies. */
+int
+circuit_build_times_enough_to_compute(circuit_build_times_t *cbt)
+{
+  return cbt->total_build_times >= circuit_build_times_min_circs_to_observe();
+}
+
 double
 circuit_build_times_quantile_cutoff(void)
 {
@@ -292,8 +300,8 @@ circuit_build_times_reset(circuit_build_times_t *cbt)
 /**
  * Initialize the buildtimes structure for first use.
  *
- * Sets the initial timeout value based to either the
- * config setting or BUILD_TIMEOUT_INITIAL_VALUE.
+ * Sets the initial timeout values based on either the config setting,
+ * the consensus param, or the default (CBT_DEFAULT_TIMEOUT_INITIAL_VALUE).
  */
 void
 circuit_build_times_init(circuit_build_times_t *cbt)
@@ -306,6 +314,7 @@ circuit_build_times_init(circuit_build_times_t *cbt)
   control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESET);
 }
 
+#if 0
 /**
  * Rewind our build time history by n positions.
  */
@@ -332,6 +341,7 @@ circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n)
           "Rewound history by %d places. Current index: %d. "
           "Total: %d", n, cbt->build_times_idx, cbt->total_build_times);
 }
+#endif
 
 /**
  * Add a new build time value <b>time</b> to the set of build times. Time
@@ -916,9 +926,7 @@ int
 circuit_build_times_needs_circuits(circuit_build_times_t *cbt)
 {
   /* Return true if < MIN_CIRCUITS_TO_OBSERVE */
-  if (cbt->total_build_times < circuit_build_times_min_circs_to_observe())
-    return 1;
-  return 0;
+  return !circuit_build_times_enough_to_compute(cbt);
 }
 
 /**
@@ -933,7 +941,11 @@ circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt)
 }
 
 /**
- * Called to indicate that the network showed some signs of liveness.
+ * Called to indicate that the network showed some signs of liveness,
+ * i.e. we received a cell.
+ *
+ * This is used by circuit_build_times_network_check_live() to decide
+ * if we should record the circuit build timeout or not.
  *
  * This function is called every time we receive a cell. Avoid
  * syscalls, events, and other high-intensity work.
@@ -941,14 +953,26 @@ circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt)
 void
 circuit_build_times_network_is_live(circuit_build_times_t *cbt)
 {
-  cbt->liveness.network_last_live = approx_time();
-  cbt->liveness.nonlive_discarded = 0;
+  time_t now = approx_time();
+  if (cbt->liveness.nonlive_timeouts > 0) {
+    log_notice(LD_CIRC,
+               "Tor now sees network activity. Restoring circuit build "
+               "timeout recording. Network was down for %d seconds "
+               "during %d circuit attempts.",
+               (int)(now - cbt->liveness.network_last_live),
+               cbt->liveness.nonlive_timeouts);
+  }
+  cbt->liveness.network_last_live = now;
   cbt->liveness.nonlive_timeouts = 0;
 }
 
 /**
  * Called to indicate that we completed a circuit. Because this circuit
  * succeeded, it doesn't count as a timeout-after-the-first-hop.
+ *
+ * This is used by circuit_build_times_network_check_changed() to determine
+ * if we had too many recent timeouts and need to reset our learned timeout
+ * to something higher.
  */
 void
 circuit_build_times_network_circ_success(circuit_build_times_t *cbt)
@@ -961,6 +985,10 @@ circuit_build_times_network_circ_success(circuit_build_times_t *cbt)
 /**
  * A circuit just timed out. If it failed after the first hop, record it
  * in our history for later deciding if the network speed has changed.
+ *
+ * This is used by circuit_build_times_network_check_changed() to determine
+ * if we had too many recent timeouts and need to reset our learned timeout
+ * to something higher.
  */
 static void
 circuit_build_times_network_timeout(circuit_build_times_t *cbt,
@@ -977,6 +1005,9 @@ circuit_build_times_network_timeout(circuit_build_times_t *cbt,
  * A circuit was just forcibly closed. If there has been no recent network
  * activity at all, but this circuit was launched back when we thought the
  * network was live, increment the number of "nonlive" circuit timeouts.
+ *
+ * This is used by circuit_build_times_network_check_live() to decide
+ * if we should record the circuit build timeout or not.
  */
 static void
 circuit_build_times_network_close(circuit_build_times_t *cbt,
@@ -986,15 +1017,8 @@ circuit_build_times_network_close(circuit_build_times_t *cbt,
   /*
    * Check if this is a timeout that was for a circuit that spent its
    * entire existence during a time where we have had no network activity.
-   *
-   * Also double check that it is a valid timeout after we have possibly
-   * just recently reset cbt->close_ms.
-   *
-   * We use close_ms here because timeouts aren't actually counted as timeouts
-   * until close_ms elapses.
    */
-  if (cbt->liveness.network_last_live <= start_time &&
-          start_time <= (now - cbt->close_ms/1000.0)) {
+  if (cbt->liveness.network_last_live < start_time) {
     if (did_onehop) {
       char last_live_buf[ISO_TIME_LEN+1];
       char start_time_buf[ISO_TIME_LEN+1];
@@ -1009,12 +1033,25 @@ circuit_build_times_network_close(circuit_build_times_t *cbt,
                now_buf);
     }
     cbt->liveness.nonlive_timeouts++;
+    if (cbt->liveness.nonlive_timeouts == 1) {
+      log_notice(LD_CIRC,
+                 "Tor has not observed any network activity for the past %d "
+                 "seconds. Disabling circuit build timeout code.",
+                 (int)(now - cbt->liveness.network_last_live));
+    } else {
+      log_info(LD_CIRC,
+             "Got non-live timeout. Current count is: %d",
+             cbt->liveness.nonlive_timeouts);
+    }
   }
 }
 
 /**
- * Returns false if the network has not received a cell or tls handshake
- * in the past NETWORK_NOTLIVE_TIMEOUT_COUNT circuits.
+ * When the network is not live, we do not record circuit build times.
+ *
+ * The network is considered not live if there has been at least one
+ * circuit build that began and ended (had its close_ms measurement
+ * period expire) since we last received a cell.
  *
  * Also has the side effect of rewinding the circuit time history
  * in the case of recent liveness changes.
@@ -1022,45 +1059,8 @@ circuit_build_times_network_close(circuit_build_times_t *cbt,
 int
 circuit_build_times_network_check_live(circuit_build_times_t *cbt)
 {
-  time_t now = approx_time();
-  if (cbt->liveness.nonlive_timeouts >= CBT_NETWORK_NONLIVE_DISCARD_COUNT) {
-    if (!cbt->liveness.nonlive_discarded) {
-      cbt->liveness.nonlive_discarded = 1;
-      log_notice(LD_CIRC, "Network is no longer live (too many recent "
-                "circuit timeouts). Dead for %ld seconds.",
-                (long int)(now - cbt->liveness.network_last_live));
-      /* Only discard NETWORK_NONLIVE_TIMEOUT_COUNT-1 because we stopped
-       * counting after that */
-      circuit_build_times_rewind_history(cbt,
-                     CBT_NETWORK_NONLIVE_TIMEOUT_COUNT-1);
-      control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_DISCARD);
-    }
-    return 0;
-  } else if (cbt->liveness.nonlive_timeouts >=
-                CBT_NETWORK_NONLIVE_TIMEOUT_COUNT) {
-    if (cbt->timeout_ms < circuit_build_times_get_initial_timeout()) {
-      log_notice(LD_CIRC,
-                "Network is flaky. No activity for %ld seconds. "
-                "Temporarily raising timeout to %lds.",
-                (long int)(now - cbt->liveness.network_last_live),
-                tor_lround(circuit_build_times_get_initial_timeout()/1000));
-      cbt->liveness.suspended_timeout = cbt->timeout_ms;
-      cbt->liveness.suspended_close_timeout = cbt->close_ms;
-      cbt->close_ms = cbt->timeout_ms
-                    = circuit_build_times_get_initial_timeout();
-      control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_SUSPENDED);
-    }
-
+  if (cbt->liveness.nonlive_timeouts > 0) {
     return 0;
-  } else if (cbt->liveness.suspended_timeout > 0) {
-    log_notice(LD_CIRC,
-              "Network activity has resumed. "
-              "Resuming circuit timeout calculations.");
-    cbt->timeout_ms = cbt->liveness.suspended_timeout;
-    cbt->close_ms = cbt->liveness.suspended_close_timeout;
-    cbt->liveness.suspended_timeout = 0;
-    cbt->liveness.suspended_close_timeout = 0;
-    control_event_buildtimeout_set(cbt, BUILDTIMEOUT_SET_EVENT_RESUME);
   }
 
   return 1;
@@ -1069,7 +1069,8 @@ circuit_build_times_network_check_live(circuit_build_times_t *cbt)
 /**
  * Returns true if we have seen more than MAX_RECENT_TIMEOUT_COUNT of
  * the past RECENT_CIRCUITS time out after the first hop. Used to detect
- * if the network connection has changed significantly.
+ * if the network connection has changed significantly, and if so,
+ * resets our circuit build timeout to the default.
  *
  * Also resets the entire timeout history in this case and causes us
  * to restart the process of building test circuits and estimating a
@@ -1228,9 +1229,9 @@ circuit_build_times_count_timeout(circuit_build_times_t *cbt,
 static int
 circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
 {
-  if (cbt->total_build_times < circuit_build_times_min_circs_to_observe()) {
+  build_time_t max_time;
+  if (!circuit_build_times_enough_to_compute(cbt))
     return 0;
-  }
 
   if (!circuit_build_times_update_alpha(cbt))
     return 0;
@@ -1241,11 +1242,29 @@ circuit_build_times_set_timeout_worker(circuit_build_times_t *cbt)
   cbt->close_ms = circuit_build_times_calculate_timeout(cbt,
                                 circuit_build_times_close_quantile());
 
+  max_time = circuit_build_times_max(cbt);
+
   /* Sometimes really fast guard nodes give us such a steep curve
    * that this ends up being not that much greater than timeout_ms.
    * Make it be at least 1 min to handle this case. */
   cbt->close_ms = MAX(cbt->close_ms, circuit_build_times_initial_timeout());
 
+  if (cbt->timeout_ms > max_time) {
+    log_notice(LD_CIRC,
+               "Circuit build timeout of %dms is beyond the maximum build "
+               "time we have ever observed. Capping it to %dms.",
+               (int)cbt->timeout_ms, max_time);
+    cbt->timeout_ms = max_time;
+  }
+
+  if (max_time < INT32_MAX/2 && cbt->close_ms > 2*max_time) {
+    log_notice(LD_CIRC,
+               "Circuit build measurement period of %dms is more than twice "
+               "the maximum build time we have ever observed. Capping it to "
+               "%dms.", (int)cbt->close_ms, 2*max_time);
+    cbt->close_ms = 2*max_time;
+  }
+
   cbt->have_computed_timeout = 1;
   return 1;
 }

+ 1 - 0
src/or/circuitbuild.h

@@ -79,6 +79,7 @@ void entries_retry_all(or_options_t *options);
 void entry_guards_free_all(void);
 
 extern circuit_build_times_t circ_times;
+int circuit_build_times_enough_to_compute(circuit_build_times_t *cbt);
 void circuit_build_times_update_state(circuit_build_times_t *cbt,
                                       or_state_t *state);
 int circuit_build_times_parse_state(circuit_build_times_t *cbt,

+ 10 - 10
src/or/circuituse.c

@@ -34,8 +34,6 @@ extern circuit_t *global_circuitlist; /* from circuitlist.c */
 static void circuit_expire_old_circuits_clientside(time_t now);
 static void circuit_increment_failure_count(void);
 
-long int lround(double x);
-
 /** Return 1 if <b>circ</b> could be returned by circuit_get_best().
  * Else return 0.
  */
@@ -280,13 +278,14 @@ void
 circuit_expire_building(time_t now)
 {
   circuit_t *victim, *next_circ = global_circuitlist;
-  /* circ_times.timeout is BUILD_TIMEOUT_INITIAL_VALUE if we haven't
-   * decided on a customized one yet */
-  time_t general_cutoff = now - lround(circ_times.timeout_ms/1000);
-  time_t begindir_cutoff = now - lround(circ_times.timeout_ms/2000);
-  time_t fourhop_cutoff = now - lround(4*circ_times.timeout_ms/3000);
-  time_t cannibalize_cutoff = now - lround(circ_times.timeout_ms/2000);
-  time_t close_cutoff = now - lround(circ_times.close_ms/1000);
+  /* circ_times.timeout_ms and circ_times.close_ms are from
+   * circuit_build_times_get_initial_timeout() if we haven't computed
+   * custom timeouts yet */
+  time_t general_cutoff = now - tor_lround(circ_times.timeout_ms/1000);
+  time_t begindir_cutoff = now - tor_lround(circ_times.timeout_ms/2000);
+  time_t fourhop_cutoff = now - tor_lround(4*circ_times.timeout_ms/3000);
+  time_t cannibalize_cutoff = now - tor_lround(circ_times.timeout_ms/2000);
+  time_t close_cutoff = now - tor_lround(circ_times.close_ms/1000);
   time_t introcirc_cutoff = begindir_cutoff;
   cpath_build_state_t *build_state;
 
@@ -385,7 +384,8 @@ circuit_expire_building(time_t now)
         continue;
       }
 
-      if (circuit_timeout_want_to_count_circ(TO_ORIGIN_CIRCUIT(victim))) {
+      if (circuit_timeout_want_to_count_circ(TO_ORIGIN_CIRCUIT(victim)) &&
+          circuit_build_times_enough_to_compute(&circ_times)) {
         /* Circuits are allowed to last longer for measurement.
          * Switch their purpose and wait. */
         if (victim->purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {

+ 4 - 33
src/or/or.h

@@ -2967,26 +2967,6 @@ typedef uint32_t build_time_t;
 /** Save state every 10 circuits */
 #define CBT_SAVE_STATE_EVERY 10
 
-/* Circuit Build Timeout network liveness constants */
-
-/**
- * Have we received a cell in the last N circ attempts?
- *
- * This tells us when to temporarily switch back to
- * BUILD_TIMEOUT_INITIAL_VALUE until we start getting cells,
- * at which point we switch back to computing the timeout from
- * our saved history.
- */
-#define CBT_NETWORK_NONLIVE_TIMEOUT_COUNT 3
-
-/**
- * This tells us when to toss out the last streak of N timeouts.
- *
- * If instead we start getting cells, we switch back to computing the timeout
- * from our saved history.
- */
-#define CBT_NETWORK_NONLIVE_DISCARD_COUNT (CBT_NETWORK_NONLIVE_TIMEOUT_COUNT*2)
-
 /* Circuit build times consensus parameters */
 
 /**
@@ -3005,8 +2985,9 @@ typedef uint32_t build_time_t;
  * Maximum count of timeouts that finish the first hop in the past
  * RECENT_CIRCUITS before calculating a new timeout.
  *
- * This tells us to abandon timeout history and set
- * the timeout back to BUILD_TIMEOUT_INITIAL_VALUE.
+ * This tells us whether to abandon timeout history and set
+ * the timeout back to whatever circuit_build_times_get_initial_timeout()
+ * gives us.
  */
 #define CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT (CBT_DEFAULT_RECENT_CIRCUITS*9/10)
 
@@ -3027,9 +3008,7 @@ double circuit_build_times_quantile_cutoff(void);
 #define CBT_DEFAULT_TIMEOUT_INITIAL_VALUE (60*1000)
 int32_t circuit_build_times_initial_timeout(void);
 
-#if CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT < 1 || \
-    CBT_NETWORK_NONLIVE_DISCARD_COUNT < 1 || \
-    CBT_NETWORK_NONLIVE_TIMEOUT_COUNT < 1
+#if CBT_DEFAULT_MAX_RECENT_TIMEOUT_COUNT < 1
 #error "RECENT_CIRCUITS is set too low."
 #endif
 
@@ -3039,8 +3018,6 @@ typedef struct {
   time_t network_last_live;
   /** If the network is not live, how many timeouts has this caused? */
   int nonlive_timeouts;
-  /** If the network is not live, have we yet discarded our history? */
-  int nonlive_discarded;
   /** Circular array of circuits that have made it to the first hop. Slot is
    * 1 if circuit timed out, 0 if circuit succeeded */
   int8_t *timeouts_after_firsthop;
@@ -3048,12 +3025,6 @@ typedef struct {
   int num_recent_circs;
   /** Index into circular array. */
   int after_firsthop_idx;
-  /** Timeout gathering is suspended if non-zero. The old timeout value
-    * is stored here in that case. */
-  double suspended_timeout;
-  /** Timeout gathering is suspended if non-zero. The old close value
-    * is stored here in that case. */
-  double suspended_close_timeout;
 } network_liveness_t;
 
 /** Structure for circuit build times history */

+ 6 - 20
src/test/test.c

@@ -499,28 +499,14 @@ test_circuit_timeout(void)
 
     build_times_idx = estimate.build_times_idx;
     total_build_times = estimate.total_build_times;
-    for (i = 0; i < CBT_NETWORK_NONLIVE_TIMEOUT_COUNT; i++) {
-      test_assert(circuit_build_times_network_check_live(&estimate));
-      test_assert(circuit_build_times_network_check_live(&final));
-
-      circuit_build_times_count_close(&estimate, 0,
-                 (time_t)(approx_time()-estimate.close_ms/1000.0-1));
-      circuit_build_times_count_close(&final, 0,
-                 (time_t)(approx_time()-final.close_ms/1000.0-1));
-    }
-
-    test_assert(!circuit_build_times_network_check_live(&estimate));
-    test_assert(!circuit_build_times_network_check_live(&final));
 
-    for ( ; i < CBT_NETWORK_NONLIVE_DISCARD_COUNT; i++) {
-      circuit_build_times_count_close(&estimate, 0,
-                (time_t)(approx_time()-estimate.close_ms/1000.0-1));
+    test_assert(circuit_build_times_network_check_live(&estimate));
+    test_assert(circuit_build_times_network_check_live(&final));
 
-      if (i < CBT_NETWORK_NONLIVE_DISCARD_COUNT-1) {
-        circuit_build_times_count_close(&final, 0,
-                (time_t)(approx_time()-final.close_ms/1000.0-1));
-      }
-    }
+    circuit_build_times_count_close(&estimate, 0,
+            (time_t)(approx_time()-estimate.close_ms/1000.0-1));
+    circuit_build_times_count_close(&final, 0,
+            (time_t)(approx_time()-final.close_ms/1000.0-1));
 
     test_assert(!circuit_build_times_network_check_live(&estimate));
     test_assert(!circuit_build_times_network_check_live(&final));