Bläddra i källkod

Implement and document new network liveness algorithm.

Based on irc discussion with arma.
Mike Perry 15 år sedan
förälder
incheckning
f39bedf250
5 ändrade filer med 363 tillägg och 156 borttagningar
  1. 14 11
      doc/spec/proposals/151-path-selection-improvements.txt
  2. 176 96
      src/or/circuitbuild.c
  3. 10 2
      src/or/circuituse.c
  4. 68 20
      src/or/or.h
  5. 95 27
      src/or/test.c

+ 14 - 11
doc/spec/proposals/151-path-selection-improvements.txt

@@ -89,17 +89,20 @@ Implementation
     We attempt to detect both network connectivity loss and drastic
     We attempt to detect both network connectivity loss and drastic
     changes in the timeout characteristics.
     changes in the timeout characteristics.
 
 
-    If more than MAX_RECENT_TIMEOUT_RATE (80%) of the past
+    We assume that we've had network connectivity loss if 3 circuits
-    RECENT_CIRCUITS (20) time out, we assume the network connection
+    timeout and we've recieved no cells or TLS handshakes since those
-    has changed, and we discard all buildtimes history and compute
+    circuits began. We then set the timeout to 60 seconds and stop
-    a new timeout by estimating a new Pareto curve using the
+    counting timeouts.
-    position on the Pareto Quartile function for the ratio of
+
-    timeouts.
+    If 3 more circuits timeout and the network still has not been
-
+    live within this new 60 second timeout window, we then discard
-    Network connectivity loss is detected by recording a timestamp every
+    the previous timeouts during this period from our history.
-    time Tor either completes a TLS connection or receives a cell. If
+
-    this timestamp is more than CircuitBuildTimeout*RECENT_CIRCUITS/3
+    To detect changing network conditions, we keep a history of
-    seconds in the past, circuit timeouts are no longer counted.
+    the timeout or non-timeout status of the past RECENT_CIRCUITS (20)
+    that successfully completed more than one hop. If more than 75%
+    of these circuits timeout, we discard all buildtimes history,
+    reset the timeout to 60, and then begin recomputing the timeout.
 
 
   Testing
   Testing
 
 

+ 176 - 96
src/or/circuitbuild.c

@@ -112,10 +112,30 @@ circuitbuild_running_unit_tests(void)
   unit_tests = 1;
   unit_tests = 1;
 }
 }
 
 
+/**
+ * Return the initial default or configured timeout in milliseconds
+ */
+static double
+circuit_build_times_get_initial_timeout(void)
+{
+  double timeout;
+  if (!unit_tests && get_options()->CircuitBuildTimeout) {
+    timeout = get_options()->CircuitBuildTimeout*1000;
+    if (timeout < BUILD_TIMEOUT_MIN_VALUE) {
+      log_warn(LD_CIRC, "Config CircuitBuildTimeout too low. Setting to %ds",
+               BUILD_TIMEOUT_MIN_VALUE/1000);
+      timeout = BUILD_TIMEOUT_MIN_VALUE;
+    }
+  } else {
+    timeout = BUILD_TIMEOUT_INITIAL_VALUE;
+  }
+  return timeout;
+}
+
 /**
 /**
  * Reset the build time state.
  * Reset the build time state.
  *
  *
- * Leave estimated paramters, timeout, and network liveness in tact
+ * Leave estimated parameters, timeout and network liveness in tact
  * for future use.
  * for future use.
  */
  */
 void
 void
@@ -138,17 +158,49 @@ void
 circuit_build_times_init(circuit_build_times_t *cbt)
 circuit_build_times_init(circuit_build_times_t *cbt)
 {
 {
   memset(cbt, 0, sizeof(*cbt));
   memset(cbt, 0, sizeof(*cbt));
+  cbt->timeout_ms = circuit_build_times_get_initial_timeout();
+}
 
 
-  if (!unit_tests && get_options()->CircuitBuildTimeout) {
+/**
-    cbt->timeout_ms = get_options()->CircuitBuildTimeout*1000;
+ * Rewind our timeout history by n positions.
-    if (cbt->timeout_ms < BUILD_TIMEOUT_MIN_VALUE) {
+ */
-      log_warn(LD_CIRC, "Config CircuitBuildTimeout too low. Setting to %ds",
+static void
-               BUILD_TIMEOUT_MIN_VALUE/1000);
+circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n)
-      cbt->timeout_ms = BUILD_TIMEOUT_MIN_VALUE;
+{
+  int i = 0;
+
+  if (cbt->pre_timeouts) {
+    if (cbt->pre_timeouts > n) {
+      cbt->pre_timeouts -= n;
+    } else {
+      cbt->pre_timeouts = 0;
     }
     }
+    log_info(LD_CIRC,
+             "Rewound history by %d places. Current index: %d. Total: %d. "
+             "Pre-timeouts: %d", n, cbt->build_times_idx,
+             cbt->total_build_times, cbt->pre_timeouts);
+
+    tor_assert(cbt->build_times_idx == 0);
+    tor_assert(cbt->total_build_times == 0);
+    return;
+  }
+
+  cbt->build_times_idx -= n;
+  cbt->build_times_idx %= NCIRCUITS_TO_OBSERVE;
+
+  for (i = 0; i < n; i++) {
+    cbt->circuit_build_times[(i+cbt->build_times_idx)%NCIRCUITS_TO_OBSERVE]=0;
+  }
+
+  if (cbt->total_build_times > n) {
+    cbt->total_build_times -= n;
   } else {
   } else {
-    cbt->timeout_ms = BUILD_TIMEOUT_INITIAL_VALUE;
+    cbt->total_build_times = 0;
   }
   }
+
+  log_info(LD_CIRC,
+          "Rewound history by %d places. Current index: %d. "
+          "Total: %d", n, cbt->build_times_idx, cbt->total_build_times);
 }
 }
 
 
 /**
 /**
@@ -202,8 +254,9 @@ circuit_build_times_max(circuit_build_times_t *cbt)
   return max_build_time;
   return max_build_time;
 }
 }
 
 
+#if 0
 /** Return minimum circuit build time */
 /** Return minimum circuit build time */
-static build_time_t
+build_time_t
 circuit_build_times_min(circuit_build_times_t *cbt)
 circuit_build_times_min(circuit_build_times_t *cbt)
 {
 {
   int i = 0;
   int i = 0;
@@ -218,6 +271,7 @@ circuit_build_times_min(circuit_build_times_t *cbt)
   }
   }
   return min_build_time;
   return min_build_time;
 }
 }
+#endif
 
 
 /**
 /**
  * Calculate and return a histogram for the set of build times.
  * Calculate and return a histogram for the set of build times.
@@ -589,7 +643,7 @@ circuit_build_times_count_pretimeouts(circuit_build_times_t *cbt)
     double timeout_quantile = 1.0-
     double timeout_quantile = 1.0-
           ((double)cbt->pre_timeouts)/
           ((double)cbt->pre_timeouts)/
                     (cbt->pre_timeouts+cbt->total_build_times);
                     (cbt->pre_timeouts+cbt->total_build_times);
-    cbt->Xm = circuit_build_times_min(cbt);
+    cbt->Xm = circuit_build_times_mode(cbt);
     tor_assert(cbt->Xm > 0);
     tor_assert(cbt->Xm > 0);
     /* Use current timeout to get an estimate on alpha */
     /* Use current timeout to get an estimate on alpha */
     circuit_build_times_initial_alpha(cbt, timeout_quantile,
     circuit_build_times_initial_alpha(cbt, timeout_quantile,
@@ -630,28 +684,85 @@ circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt)
 void
 void
 circuit_build_times_network_is_live(circuit_build_times_t *cbt)
 circuit_build_times_network_is_live(circuit_build_times_t *cbt)
 {
 {
-  cbt->network_last_live = approx_time();
+  cbt->liveness.network_last_live = approx_time();
+  cbt->liveness.nonlive_discarded = 0;
+  cbt->liveness.nonlive_timeouts = 0;
+}
+
+/**
+ * Called to indicate that we completed a circuit
+ */
+void
+circuit_build_times_network_circ_success(circuit_build_times_t *cbt)
+{
+  cbt->liveness.onehop_timeouts[cbt->liveness.onehop_idx] = 0;
+  cbt->liveness.onehop_idx++;
+  cbt->liveness.onehop_idx %= RECENT_CIRCUITS;
+}
+
+/**
+ * Count a circuit that timed out with no network activity at all
+ */
+void
+circuit_build_times_network_timeout(circuit_build_times_t *cbt,
+                                    int did_onehop, time_t start_time)
+{
+  time_t now = approx_time();
+  /* Only count this as a valid attempt if it was both started before
+   * the network was dead and the network has been dead for at least
+   * a full timeout interval. */
+  if (cbt->liveness.network_last_live <= (now - cbt->timeout_ms/1000.0)
+          && cbt->liveness.network_last_live <= start_time) {
+    cbt->liveness.nonlive_timeouts++;
+  }
+
+  /* Check for one-hop timeout */
+  if (did_onehop) {
+    cbt->liveness.onehop_timeouts[cbt->liveness.onehop_idx] = 1;
+    cbt->liveness.onehop_idx++;
+    cbt->liveness.onehop_idx %= RECENT_CIRCUITS;
+  }
 }
 }
 
 
 /**
 /**
- * Returns true if the network showed some sign of liveness
+ * Returns false if the network has not received a cell or tls handshake
- * in the past NETWORK_LIVE_MULTIPLIER*cbt->timeout_ms/1000 seconds.
+ * in the past NETWORK_NOTLIVE_TIMEOUT_COUNT circuits.
+ *
+ * Also has the side effect of rewinding the circuit time history
+ * in the case of recent liveness changes.
  */
  */
 int
 int
-circuit_build_times_is_network_live(circuit_build_times_t *cbt)
+circuit_build_times_network_check_live(circuit_build_times_t *cbt)
 {
 {
   time_t now = approx_time();
   time_t now = approx_time();
-  if (now - cbt->network_last_live >
+  if (cbt->liveness.nonlive_timeouts >= NETWORK_NONLIVE_DISCARD_COUNT) {
-          (cbt->timeout_ms*NETWORK_LIVE_MULTIPLIER/1000.0)) {
+    if (!cbt->liveness.nonlive_discarded) {
-    log_info(LD_CIRC, "Network is no longer live. Dead for %ld seconds.",
+      cbt->liveness.nonlive_discarded = 1;
-             (long int)(now - cbt->network_last_live));
+      log_notice(LD_CIRC, "Network is no longer live. Dead for %ld seconds.",
+             (long int)(now - cbt->liveness.network_last_live));
+      /* Only discard NETWORK_NONLIVE_TIMEOUT_COUNT-1 because we stoped
+       * counting after that */
+      circuit_build_times_rewind_history(cbt, NETWORK_NONLIVE_TIMEOUT_COUNT-1);
+    }
+    return 0;
+  } else if (cbt->liveness.nonlive_timeouts >= NETWORK_NONLIVE_TIMEOUT_COUNT) {
+    if (cbt->timeout_ms < circuit_build_times_get_initial_timeout()) {
+      log_notice(LD_CIRC,
+                "Network is flaky. No activity for %ld seconds. "
+                "Temporarily raising timeout to %lds.",
+                (long int)(now - cbt->liveness.network_last_live),
+                lround(circuit_build_times_get_initial_timeout()/1000));
+      cbt->timeout_ms = circuit_build_times_get_initial_timeout();
+    }
+
     return 0;
     return 0;
   }
   }
+
   return 1;
   return 1;
 }
 }
 
 
 /**
 /**
- * Returns true if we have seen more than MAX_RECENT_TIMEOUT_RATE of
+ * Returns true if we have seen more than MAX_RECENT_TIMEOUT_COUNT of
  * the past RECENT_CIRCUITS time out. Used to detect if the network
  * the past RECENT_CIRCUITS time out. Used to detect if the network
  * connection has changed significantly.
  * connection has changed significantly.
  *
  *
@@ -660,110 +771,82 @@ circuit_build_times_is_network_live(circuit_build_times_t *cbt)
  * new timeout.
  * new timeout.
  */
  */
 int
 int
-circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt)
+circuit_build_times_network_check_changed(circuit_build_times_t *cbt)
 {
 {
-  double timeout_rate=0;
+  int total_build_times = cbt->total_build_times;
-  build_time_t Xm = BUILD_TIME_MAX;
+  int timeout_count=0;
   int i;
   int i;
 
 
-  if ((cbt->pre_timeouts + cbt->total_build_times) < RECENT_CIRCUITS) {
+  for (i = 0; i < RECENT_CIRCUITS; i++) {
-    return 0;
+    timeout_count += cbt->liveness.onehop_timeouts[i];
   }
   }
 
 
-  /* Get timeout rate and Xm for recent circs */
-  /* XXX: Hrmm, if the switch is a hard switch, then 4 times
-   * will be from the previous network and will give a really low Xm
-   * and thus a really low alpha and thus a high timeout */
-  for (i = (cbt->build_times_idx - RECENT_CIRCUITS) % NCIRCUITS_TO_OBSERVE;
-       i != cbt->build_times_idx;
-       i = (i + 1) % NCIRCUITS_TO_OBSERVE) {
-    if (cbt->circuit_build_times[i] && cbt->circuit_build_times[i] < Xm) {
-      Xm = cbt->circuit_build_times[i];
-    }
-    if (cbt->circuit_build_times[i]+1 >=
-            (build_time_t)lround(cbt->timeout_ms)) {
-      timeout_rate++;
-    }
-  }
-  timeout_rate += cbt->pre_timeouts;
-  timeout_rate /= RECENT_CIRCUITS;
-
   /* If more than 80% of our recent circuits are timing out,
   /* If more than 80% of our recent circuits are timing out,
    * we need to re-estimate a new initial alpha and timeout */
    * we need to re-estimate a new initial alpha and timeout */
-  if (timeout_rate < MAX_RECENT_TIMEOUT_RATE) {
+  if (timeout_count < MAX_RECENT_TIMEOUT_COUNT) {
     return 0;
     return 0;
   }
   }
 
 
-  log_notice(LD_CIRC,
+  /* Reset build history */
-            "Network connection speed appears to have changed. "
+  circuit_build_times_reset(cbt);
-            "Resetting timeouts after %d pretimouts and %d buildtimes",
+  memset(cbt->liveness.onehop_timeouts, 0,
-            cbt->pre_timeouts, cbt->total_build_times);
+          sizeof(cbt->liveness.onehop_timeouts));
-
+  cbt->liveness.onehop_idx = 0;
-  if (Xm >= (build_time_t)lround(cbt->timeout_ms)) {
+
-    Xm = circuit_build_times_min(cbt);
+  /* Check to see if this has happened before. If so, double the timeout
-    if (Xm >= (build_time_t)lround(cbt->timeout_ms)) {
+   * to give people on abysmally bad network connections a shot at access */
-      /* No circuits have completed */
+  if (cbt->timeout_ms >= circuit_build_times_get_initial_timeout()) {
-      cbt->timeout_ms *= 2;
+    cbt->timeout_ms *= 2;
-      log_warn(LD_CIRC,
+  } else {
-               "Adjusting CircuitBuildTimeout to %lds in the hopes that "
+    cbt->timeout_ms = circuit_build_times_get_initial_timeout();
-               "some connections will succeed", lround(cbt->timeout_ms/1000));
-      goto reset;
-    }
-  }
-  tor_assert(Xm > 0);
-  cbt->Xm = Xm;
-
-  circuit_build_times_initial_alpha(cbt, 1.0-timeout_rate,
-          cbt->timeout_ms);
-
-  /* timeout is INT32_MAX at most */
-  cbt->timeout_ms = circuit_build_times_calculate_timeout(cbt,
-                                BUILDTIMEOUT_QUANTILE_CUTOFF);
-
-  if (cbt->timeout_ms < BUILD_TIMEOUT_MIN_VALUE) {
-    log_warn(LD_CIRC, "Reset buildtimeout to low value %lfms. Setting to %dms",
-             cbt->timeout_ms, BUILD_TIMEOUT_MIN_VALUE);
-    cbt->timeout_ms = BUILD_TIMEOUT_MIN_VALUE;
   }
   }
 
 
   log_notice(LD_CIRC,
   log_notice(LD_CIRC,
-             "Reset circuit build timeout to %lds (%lfms, Xm: %d, a: %lf) "
+            "Network connection speed appears to have changed. Resetting "
-             "based on timeout rate of %lf on %d recent circuit times",
+            "timeout to %lds after %d one-hop timeouts and %d buildtimes",
-             lround(cbt->timeout_ms/1000), cbt->timeout_ms, cbt->Xm,
+            lround(cbt->timeout_ms/1000), timeout_count, total_build_times);
-             cbt->alpha, timeout_rate, RECENT_CIRCUITS);
 
 
-reset:
-
-  /* Reset all data */
-  circuit_build_times_reset(cbt);
   return 1;
   return 1;
 }
 }
 
 
 /**
 /**
- * Store a timeout as a synthetic value
+ * Store a timeout as a synthetic value.
+ *
+ * Returns true if the store was successful and we should possibly
+ * update our timeout estimate.
  */
  */
-void
+int
-circuit_build_times_add_timeout(circuit_build_times_t *cbt)
+circuit_build_times_add_timeout(circuit_build_times_t *cbt,
+                                int did_onehop,
+                                time_t start_time)
 {
 {
+  circuit_build_times_network_timeout(cbt, did_onehop, start_time);
+
   /* Only count timeouts if network is live.. */
   /* Only count timeouts if network is live.. */
-  if (!circuit_build_times_is_network_live(cbt)) {
+  if (!circuit_build_times_network_check_live(cbt)) {
-    return;
+    return 0;
   }
   }
 
 
   /* If there are a ton of timeouts, we should reduce
   /* If there are a ton of timeouts, we should reduce
    * the circuit build timeout */
    * the circuit build timeout */
-  if (circuit_build_times_check_too_many_timeouts(cbt)) {
+  if (circuit_build_times_network_check_changed(cbt)) {
-    return;
+    return 0;
   }
   }
 
 
   if (!cbt->have_computed_timeout) {
   if (!cbt->have_computed_timeout) {
-    /* Store a timeout before we have enough data as special */
+    /* Store a timeout before we have enough data */
     cbt->pre_timeouts++;
     cbt->pre_timeouts++;
-    return;
+    log_info(LD_CIRC,
+             "Not enough circuits yet to calculate a new build timeout."
+             " Need %d more.",
+             MIN_CIRCUITS_TO_OBSERVE-cbt->total_build_times);
+    return 0;
   }
   }
 
 
   circuit_build_times_count_pretimeouts(cbt);
   circuit_build_times_count_pretimeouts(cbt);
   circuit_build_times_add_timeout_worker(cbt, BUILDTIMEOUT_QUANTILE_CUTOFF);
   circuit_build_times_add_timeout_worker(cbt, BUILDTIMEOUT_QUANTILE_CUTOFF);
+
+  return 1;
 }
 }
 
 
 /**
 /**
@@ -774,16 +857,12 @@ void
 circuit_build_times_set_timeout(circuit_build_times_t *cbt)
 circuit_build_times_set_timeout(circuit_build_times_t *cbt)
 {
 {
   if (cbt->total_build_times < MIN_CIRCUITS_TO_OBSERVE) {
   if (cbt->total_build_times < MIN_CIRCUITS_TO_OBSERVE) {
-    log_info(LD_CIRC,
-             "Not enough circuits yet to calculate a new build timeout."
-             " Need %d more.",
-             MIN_CIRCUITS_TO_OBSERVE-cbt->total_build_times);
     return;
     return;
   }
   }
 
 
   circuit_build_times_count_pretimeouts(cbt);
   circuit_build_times_count_pretimeouts(cbt);
   circuit_build_times_update_alpha(cbt);
   circuit_build_times_update_alpha(cbt);
-  /* timeout is INT32_MAX at most */
+
   cbt->timeout_ms = circuit_build_times_calculate_timeout(cbt,
   cbt->timeout_ms = circuit_build_times_calculate_timeout(cbt,
                                 BUILDTIMEOUT_QUANTILE_CUTOFF);
                                 BUILDTIMEOUT_QUANTILE_CUTOFF);
 
 
@@ -1391,7 +1470,8 @@ circuit_send_next_onion_skin(origin_circuit_t *circ)
         timediff = INT32_MAX;
         timediff = INT32_MAX;
       /* done building the circuit. whew. */
       /* done building the circuit. whew. */
       circuit_set_state(TO_CIRCUIT(circ), CIRCUIT_STATE_OPEN);
       circuit_set_state(TO_CIRCUIT(circ), CIRCUIT_STATE_OPEN);
-      circuit_build_times_add_time(&circ_times, (uint32_t)timediff);
+      circuit_build_times_add_time(&circ_times, (build_time_t)timediff);
+      circuit_build_times_network_circ_success(&circ_times);
       circuit_build_times_set_timeout(&circ_times);
       circuit_build_times_set_timeout(&circ_times);
       log_info(LD_CIRC,"circuit built!");
       log_info(LD_CIRC,"circuit built!");
       circuit_reset_failure_count(0);
       circuit_reset_failure_count(0);

+ 10 - 2
src/or/circuituse.c

@@ -362,8 +362,13 @@ circuit_expire_building(time_t now)
 
 
     circuit_log_path(LOG_INFO,LD_CIRC,TO_ORIGIN_CIRCUIT(victim));
     circuit_log_path(LOG_INFO,LD_CIRC,TO_ORIGIN_CIRCUIT(victim));
     circuit_mark_for_close(victim, END_CIRC_REASON_TIMEOUT);
     circuit_mark_for_close(victim, END_CIRC_REASON_TIMEOUT);
-    circuit_build_times_add_timeout(&circ_times);
+
-    circuit_build_times_set_timeout(&circ_times);
+    if (circuit_build_times_add_timeout(&circ_times,
+       TO_ORIGIN_CIRCUIT(circ)->cpath
+              && TO_ORIGIN_CIRCUIT(circ)->cpath->state == CPATH_STATE_OPEN,
+                                         circ->timestamp_created)) {
+      circuit_build_times_set_timeout(&circ_times);
+    }
   }
   }
 }
 }
 
 
@@ -838,6 +843,9 @@ circuit_build_failed(origin_circuit_t *circ)
                "(%s:%d). I'm going to try to rotate to a better connection.",
                "(%s:%d). I'm going to try to rotate to a better connection.",
                n_conn->_base.address, n_conn->_base.port);
                n_conn->_base.address, n_conn->_base.port);
       n_conn->is_bad_for_new_circs = 1;
       n_conn->is_bad_for_new_circs = 1;
+    } else {
+      log_info(LD_OR,
+               "Our circuit died before the first hop with no connection");
     }
     }
     if (n_conn_id) {
     if (n_conn_id) {
       entry_guard_register_connect_status(n_conn_id, 0, 1, time(NULL));
       entry_guard_register_connect_status(n_conn_id, 0, 1, time(NULL));

+ 68 - 20
src/or/or.h

@@ -2859,18 +2859,10 @@ void entry_guards_free_all(void);
 
 
 /* Circuit Build Timeout "public" functions and structures. */
 /* Circuit Build Timeout "public" functions and structures. */
 
 
-/** How many circuits count as recent when deciding if the
- * connection has changed. */
-#define RECENT_CIRCUITS 20
-
-/** Maximum fraction of timeouts to tolerate in the past
- * RECENT_CIRCUITS before calculating a new timeout */
-#define MAX_RECENT_TIMEOUT_RATE 0.7999999
-
 /** Maximum quantile to use to generate synthetic timeouts.
 /** Maximum quantile to use to generate synthetic timeouts.
  *  We want to stay a bit short of 1.0, because longtail is
  *  We want to stay a bit short of 1.0, because longtail is
  *  loooooooooooooooooooooooooooooooooooooooooooooooooooong. */
  *  loooooooooooooooooooooooooooooooooooooooooooooooooooong. */
-#define MAX_SYNTHETIC_QUANTILE 0.98
+#define MAX_SYNTHETIC_QUANTILE 0.985
 
 
 /** Minimum circuits before estimating a timeout */
 /** Minimum circuits before estimating a timeout */
 #define MIN_CIRCUITS_TO_OBSERVE 500
 #define MIN_CIRCUITS_TO_OBSERVE 500
@@ -2890,9 +2882,6 @@ void entry_guards_free_all(void);
 typedef uint32_t build_time_t;
 typedef uint32_t build_time_t;
 #define BUILD_TIME_MAX ((build_time_t)(INT32_MAX))
 #define BUILD_TIME_MAX ((build_time_t)(INT32_MAX))
 
 
-/** Have we received a cell in the last N seconds? */
-#define NETWORK_LIVE_MULTIPLIER (RECENT_CIRCUITS/2.5)
-
 /** Lowest allowable value for CircuitBuildTimeout in milliseconds */
 /** Lowest allowable value for CircuitBuildTimeout in milliseconds */
 #define BUILD_TIMEOUT_MIN_VALUE (3*1000)
 #define BUILD_TIMEOUT_MIN_VALUE (3*1000)
 
 
@@ -2905,17 +2894,68 @@ typedef uint32_t build_time_t;
 /** Save state every 10 circuits */
 /** Save state every 10 circuits */
 #define BUILD_TIMES_SAVE_STATE_EVERY 10
 #define BUILD_TIMES_SAVE_STATE_EVERY 10
 
 
+/* Circuit Build Timeout network liveness constants */
+
+/**
+ * How many circuits count as recent when considering if the
+ * connection has gone gimpy or changed.
+ */
+#define RECENT_CIRCUITS 20
+
+/**
+ * Have we received a cell in the last N circ attempts?
+ *
+ * This tells us when to temporarily switch back to
+ * BUILD_TIMEOUT_INITIAL_VALUE until we start getting cells,
+ * at which point we switch back to computing the timeout from
+ * our saved history.
+ */
+#define NETWORK_NONLIVE_TIMEOUT_COUNT (lround(RECENT_CIRCUITS*0.15))
+
+/**
+ * This tells us when to toss out the last streak of N timeouts.
+ *
+ * If instead we start getting cells, we switch back to computing the timeout
+ * from our saved history.
+ */
+#define NETWORK_NONLIVE_DISCARD_COUNT (lround(NETWORK_NONLIVE_TIMEOUT_COUNT*2))
+
+/**
+ * Maximum count of timeouts that finish the first hop in the past
+ * RECENT_CIRCUITS before calculating a new timeout.
+ *
+ * This tells us to abandon timeout history and set
+ * the timeout back to BUILD_TIMEOUT_INITIAL_VALUE.
+ */
+#define MAX_RECENT_TIMEOUT_COUNT (lround(RECENT_CIRCUITS*0.75))
+
+/** Information about the state of our local network connection */
 typedef struct {
 typedef struct {
-  /** The circular array of recorded build times in milliseconds */
-  build_time_t circuit_build_times[NCIRCUITS_TO_OBSERVE];
   /** The timestamp we last completed a TLS handshake or received a cell */
   /** The timestamp we last completed a TLS handshake or received a cell */
   time_t network_last_live;
   time_t network_last_live;
-  /** Last time we built a circuit. Used to decide to build new test circs */
+  /** If the network is not live, how many timeouts has this caused? */
-  time_t last_circ_at;
+  int nonlive_timeouts;
+  /** If the network is not live, have we yet discarded our history? */
+  int nonlive_discarded;
+  /** Circular array of circuits that have made it past 1 hop. Slot is
+   * 1 if circuit timed out, 0 if circuit succeeded */
+  int8_t onehop_timeouts[RECENT_CIRCUITS];
+  /** Index into circular array. */
+  int onehop_idx;
+} network_liveness_t;
+
+/** Structure for circuit build times history */
+typedef struct {
+  /** The circular array of recorded build times in milliseconds */
+  build_time_t circuit_build_times[NCIRCUITS_TO_OBSERVE];
   /** Current index in the circuit_build_times circular array */
   /** Current index in the circuit_build_times circular array */
   int build_times_idx;
   int build_times_idx;
   /** Total number of build times accumulated. Maxes at NCIRCUITS_TO_OBSERVE */
   /** Total number of build times accumulated. Maxes at NCIRCUITS_TO_OBSERVE */
   int total_build_times;
   int total_build_times;
+  /** Information about the state of our local network connection */
+  network_liveness_t liveness;
+  /** Last time we built a circuit. Used to decide to build new test circs */
+  time_t last_circ_at;
   /** Number of timeouts that have happened before estimating pareto
   /** Number of timeouts that have happened before estimating pareto
    *  parameters */
    *  parameters */
   int pre_timeouts;
   int pre_timeouts;
@@ -2934,12 +2974,11 @@ void circuit_build_times_update_state(circuit_build_times_t *cbt,
                                       or_state_t *state);
                                       or_state_t *state);
 int circuit_build_times_parse_state(circuit_build_times_t *cbt,
 int circuit_build_times_parse_state(circuit_build_times_t *cbt,
                                     or_state_t *state, char **msg);
                                     or_state_t *state, char **msg);
-void circuit_build_times_add_timeout(circuit_build_times_t *cbt);
+int circuit_build_times_add_timeout(circuit_build_times_t *cbt,
+                                    int did_onehop, time_t start_time);
 void circuit_build_times_set_timeout(circuit_build_times_t *cbt);
 void circuit_build_times_set_timeout(circuit_build_times_t *cbt);
 int circuit_build_times_add_time(circuit_build_times_t *cbt,
 int circuit_build_times_add_time(circuit_build_times_t *cbt,
                                  build_time_t time);
                                  build_time_t time);
-void circuit_build_times_network_is_live(circuit_build_times_t *cbt);
-int circuit_build_times_is_network_live(circuit_build_times_t *cbt);
 int circuit_build_times_needs_circuits(circuit_build_times_t *cbt);
 int circuit_build_times_needs_circuits(circuit_build_times_t *cbt);
 int circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt);
 int circuit_build_times_needs_circuits_now(circuit_build_times_t *cbt);
 void circuit_build_times_init(circuit_build_times_t *cbt);
 void circuit_build_times_init(circuit_build_times_t *cbt);
@@ -2953,13 +2992,22 @@ void circuit_build_times_initial_alpha(circuit_build_times_t *cbt,
                                        double quantile, double time_ms);
                                        double quantile, double time_ms);
 void circuit_build_times_update_alpha(circuit_build_times_t *cbt);
 void circuit_build_times_update_alpha(circuit_build_times_t *cbt);
 double circuit_build_times_cdf(circuit_build_times_t *cbt, double x);
 double circuit_build_times_cdf(circuit_build_times_t *cbt, double x);
-int circuit_build_times_check_too_many_timeouts(circuit_build_times_t *cbt);
 void circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt,
 void circuit_build_times_add_timeout_worker(circuit_build_times_t *cbt,
                                        double quantile_cutoff);
                                        double quantile_cutoff);
 void circuitbuild_running_unit_tests(void);
 void circuitbuild_running_unit_tests(void);
 void circuit_build_times_reset(circuit_build_times_t *cbt);
 void circuit_build_times_reset(circuit_build_times_t *cbt);
+
+/* Network liveness functions */
+int circuit_build_times_network_check_changed(circuit_build_times_t *cbt);
 #endif
 #endif
 
 
+/* Network liveness functions */
+void circuit_build_times_network_is_live(circuit_build_times_t *cbt);
+int circuit_build_times_network_check_live(circuit_build_times_t *cbt);
+void circuit_build_times_network_timeout(circuit_build_times_t *cbt,
+                                         int did_onehop, time_t start_time);
+void circuit_build_times_network_circ_success(circuit_build_times_t *cbt);
+
 /********************************* circuitlist.c ***********************/
 /********************************* circuitlist.c ***********************/
 
 
 circuit_t * _circuit_get_global_list(void);
 circuit_t * _circuit_get_global_list(void);

+ 95 - 27
src/or/test.c

@@ -39,7 +39,13 @@ const char tor_git_revision[] = "";
 #define ROUTER_PRIVATE
 #define ROUTER_PRIVATE
 #define CIRCUIT_PRIVATE
 #define CIRCUIT_PRIVATE
 
 
-#include <math.h>
+/*
+ * Linux doesn't provide lround in math.h by default, but mac os does...
+ * It's best just to leave math.h out of the picture entirely.
+ */
+//#include <math.h>
+long int lround(double x);
+double fabs(double x);
 
 
 #include "or.h"
 #include "or.h"
 #include "test.h"
 #include "test.h"
@@ -3425,10 +3431,10 @@ test_circuit_timeout(void)
   circuit_build_times_t initial;
   circuit_build_times_t initial;
   circuit_build_times_t estimate;
   circuit_build_times_t estimate;
   circuit_build_times_t final;
   circuit_build_times_t final;
+  double timeout1, timeout2;
   or_state_t state;
   or_state_t state;
-  int i;
   char *msg;
   char *msg;
-  double timeout1, timeout2;
+  int i, runs;
   circuit_build_times_init(&initial);
   circuit_build_times_init(&initial);
   circuit_build_times_init(&estimate);
   circuit_build_times_init(&estimate);
   circuit_build_times_init(&final);
   circuit_build_times_init(&final);
@@ -3474,34 +3480,96 @@ test_circuit_timeout(void)
   test_assert(fabs(circuit_build_times_cdf(&initial, timeout0) -
   test_assert(fabs(circuit_build_times_cdf(&initial, timeout0) -
                    circuit_build_times_cdf(&initial, timeout2)) < 0.05);
                    circuit_build_times_cdf(&initial, timeout2)) < 0.05);
 
 
-  /* Generate MAX_RECENT_TIMEOUT_RATE*RECENT_CIRCUITS timeouts
+  for (runs = 0; runs < 50; runs++) {
-   * and 1-that regular values. Then check for timeout error
+    int build_times_idx = 0;
-   * Do the same for one less timeout */
+    int total_build_times = 0;
-  for (i = 0; i < RECENT_CIRCUITS; i++) {
+
-    circuit_build_times_add_time(&estimate,
+    final.timeout_ms = BUILD_TIMEOUT_INITIAL_VALUE;
-          circuit_build_times_generate_sample(&estimate, 0,
+    estimate.timeout_ms = BUILD_TIMEOUT_INITIAL_VALUE;
-              BUILDTIMEOUT_QUANTILE_CUTOFF));
+
-    circuit_build_times_add_time(&final,
+    for (i = 0; i < RECENT_CIRCUITS*2; i++) {
-          circuit_build_times_generate_sample(&final, 0,
+      circuit_build_times_network_circ_success(&estimate);
-              BUILDTIMEOUT_QUANTILE_CUTOFF));
+      circuit_build_times_add_time(&estimate,
-  }
+            circuit_build_times_generate_sample(&estimate, 0,
+                BUILDTIMEOUT_QUANTILE_CUTOFF));
+      estimate.have_computed_timeout = 1;
+      circuit_build_times_network_circ_success(&estimate);
+      circuit_build_times_add_time(&final,
+            circuit_build_times_generate_sample(&final, 0,
+                BUILDTIMEOUT_QUANTILE_CUTOFF));
+      final.have_computed_timeout = 1;
+    }
 
 
-  test_assert(!circuit_build_times_check_too_many_timeouts(&estimate));
+    test_assert(!circuit_build_times_network_check_changed(&estimate));
-  test_assert(!circuit_build_times_check_too_many_timeouts(&final));
+    test_assert(!circuit_build_times_network_check_changed(&final));
+
+    /* Reset liveness to be non-live */
+    final.liveness.network_last_live = 0;
+    estimate.liveness.network_last_live = 0;
+
+    build_times_idx = estimate.build_times_idx;
+    total_build_times = estimate.total_build_times;
+    for (i = 0; i < NETWORK_NONLIVE_TIMEOUT_COUNT; i++) {
+      test_assert(circuit_build_times_network_check_live(&estimate));
+      test_assert(circuit_build_times_network_check_live(&final));
+
+      if (circuit_build_times_add_timeout(&estimate, 0,
+                  approx_time()-estimate.timeout_ms/1000.0-1))
+        estimate.have_computed_timeout = 1;
+      if (circuit_build_times_add_timeout(&final, 0,
+                  approx_time()-final.timeout_ms/1000.0-1))
+        final.have_computed_timeout = 1;
+    }
+
+    test_assert(!circuit_build_times_network_check_live(&estimate));
+    test_assert(!circuit_build_times_network_check_live(&final));
+
+    for ( ; i < NETWORK_NONLIVE_DISCARD_COUNT; i++) {
+      if (circuit_build_times_add_timeout(&estimate, 0, approx_time()))
+        estimate.have_computed_timeout = 1;
 
 
-  for (i = 0; i < MAX_RECENT_TIMEOUT_RATE*RECENT_CIRCUITS; i++) {
+      if (i < NETWORK_NONLIVE_DISCARD_COUNT-1) {
-    circuit_build_times_add_timeout_worker(&estimate,
+        if (circuit_build_times_add_timeout(&final, 0, approx_time()))
-                                           BUILDTIMEOUT_QUANTILE_CUTOFF);
+          final.have_computed_timeout = 1;
-    if (i < MAX_RECENT_TIMEOUT_RATE*RECENT_CIRCUITS-1) {
+      }
-      circuit_build_times_add_timeout_worker(&final,
-                                             BUILDTIMEOUT_QUANTILE_CUTOFF);
     }
     }
-  }
 
 
-// Disabled 2009-09-18 since the synthetic values are not perfectly
+    test_assert(!circuit_build_times_network_check_live(&estimate));
-// accurate at falling on the right side of the line. -RD
+    test_assert(!circuit_build_times_network_check_live(&final));
-//  test_assert(circuit_build_times_check_too_many_timeouts(&estimate) == 1);
+
-//  test_assert(!circuit_build_times_check_too_many_timeouts(&final));
+    log_info(LD_CIRC, "idx: %d %d, tot: %d %d",
+             build_times_idx, estimate.build_times_idx,
+             total_build_times, estimate.total_build_times);
+
+    /* Check rollback index. Should match top of loop. */
+    test_assert(build_times_idx == estimate.build_times_idx);
+    test_assert(total_build_times == estimate.total_build_times);
+
+    /* Now simulate that the network has become live and we need
+     * a change */
+    circuit_build_times_network_is_live(&estimate);
+    circuit_build_times_network_is_live(&final);
+
+    for (i = 0; i < MAX_RECENT_TIMEOUT_COUNT; i++) {
+      if (circuit_build_times_add_timeout(&estimate, 1, approx_time()-1))
+        estimate.have_computed_timeout = 1;
+
+      if (i < MAX_RECENT_TIMEOUT_COUNT-1) {
+        if (circuit_build_times_add_timeout(&final, 1, approx_time()-1))
+          final.have_computed_timeout = 1;
+      }
+    }
+
+    test_assert(estimate.liveness.onehop_idx == 0);
+    test_assert(final.liveness.onehop_idx == MAX_RECENT_TIMEOUT_COUNT-1);
+
+    test_assert(circuit_build_times_network_check_live(&estimate));
+    test_assert(circuit_build_times_network_check_live(&final));
+
+    if (circuit_build_times_add_timeout(&final, 1, approx_time()-1))
+      final.have_computed_timeout = 1;
+
+  }
 
 
 done:
 done:
   return;
   return;