Browse Source

Merge remote-tracking branch 'mikeperry/bug23114_squashed2'

Nick Mathewson 6 years ago
parent
commit
0340c02975

+ 7 - 0
changes/bug23100

@@ -0,0 +1,7 @@
+  o Minor bugfixes (Performance):
+    - Use hidden service circuits (and other circuits longer than 3 hops)
+      to calculate a circuit build timeout. Previously, Tor only calculated
+      its build timeout based on circuits that planned to be exactly 3 hops
+      long. With this change, we include measurements from all circuits at
+      the point where they complete their third hop.  Fixes bug 23100;
+      bugfix on 0.2.2.2-alpha.

+ 6 - 0
changes/bug23114

@@ -0,0 +1,6 @@
+  o Minor bugfixes (Performance):
+    - Consider circuits for timeout as soon as they complete a hop. This
+      is more accurate than applying the timeout in circuit_expire_building()
+      because that function is only called once per second, which is now
+      too slow for typical timeouts on the current network. Fixes bug 23114;
+      bugfix on 0.2.2.2-alpha.

+ 19 - 40
src/or/circuitbuild.c

@@ -75,7 +75,7 @@ static int onion_pick_cpath_exit(origin_circuit_t *circ, extend_info_t *exit,
                                  int is_hs_v3_rp_circuit);
 static crypt_path_t *onion_next_hop_in_cpath(crypt_path_t *cpath);
 static int onion_extend_cpath(origin_circuit_t *circ);
-static int onion_append_hop(crypt_path_t **head_ptr, extend_info_t *choice);
+STATIC int onion_append_hop(crypt_path_t **head_ptr, extend_info_t *choice);
 static int circuit_send_first_onion_skin(origin_circuit_t *circ);
 static int circuit_build_no_more_hops(origin_circuit_t *circ);
 static int circuit_send_intermediate_onion_skin(origin_circuit_t *circ,
@@ -824,16 +824,25 @@ should_use_create_fast_for_circuit(origin_circuit_t *circ)
   return networkstatus_get_param(NULL, "usecreatefast", 0, 0, 1);
 }
 
-/** Return true if <b>circ</b> is the type of circuit we want to count
- * timeouts from. In particular, we want it to have not completed yet
- * (already completing indicates we cannibalized it), and we want it to
- * have exactly three hops.
+/**
+ * Return true if <b>circ</b> is the type of circuit we want to count
+ * timeouts from.
+ *
+ * In particular, we want to consider any circuit that plans to build
+ * at least 3 hops (but maybe more), but has 3 or fewer hops built
+ * so far.
+ *
+ * We still want to consider circuits before 3 hops, because we need
+ * to decide if we should convert them to a measurement circuit in
+ * circuit_build_times_handle_completed_hop(), rather than letting
+ * slow circuits get killed right away.
  */
 int
-circuit_timeout_want_to_count_circ(origin_circuit_t *circ)
+circuit_timeout_want_to_count_circ(const origin_circuit_t *circ)
 {
   return !circ->has_opened
-          && circ->build_state->desired_path_len == DEFAULT_ROUTE_LEN;
+          && circ->build_state->desired_path_len >= DEFAULT_ROUTE_LEN
+          && circuit_get_cpath_opened_len(circ) <= DEFAULT_ROUTE_LEN;
 }
 
 /** Decide whether to use a TAP or ntor handshake for connecting to <b>ei</b>
@@ -937,7 +946,9 @@ circuit_send_next_onion_skin(origin_circuit_t *circ)
 
   tor_assert(circ->cpath->state == CPATH_STATE_OPEN);
   tor_assert(circ->base_.state == CIRCUIT_STATE_BUILDING);
+
   crypt_path_t *hop = onion_next_hop_in_cpath(circ->cpath);
+  circuit_build_times_handle_completed_hop(circ);
 
   if (hop) {
     /* Case two: we're on a hop after the first. */
@@ -1052,38 +1063,6 @@ circuit_build_no_more_hops(origin_circuit_t *circ)
    * I think I got them right, but more checking would be wise. -NM
    */
 
-  if (circuit_timeout_want_to_count_circ(circ)) {
-    struct timeval end;
-    long timediff;
-    tor_gettimeofday(&end);
-    timediff = tv_mdiff(&circ->base_.timestamp_began, &end);
-
-    /*
-     * If the circuit build time is much greater than we would have cut
-     * it off at, we probably had a suspend event along this codepath,
-     * and we should discard the value.
-     */
-    if (timediff < 0 ||
-        timediff > 2*get_circuit_build_close_time_ms()+1000) {
-      log_notice(LD_CIRC, "Strange value for circuit build time: %ldmsec. "
-                 "Assuming clock jump. Purpose %d (%s)", timediff,
-                 circ->base_.purpose,
-                 circuit_purpose_to_string(circ->base_.purpose));
-    } else if (!circuit_build_times_disabled(get_options())) {
-      /* Only count circuit times if the network is live */
-      if (circuit_build_times_network_check_live(
-                                                 get_circuit_build_times())) {
-        circuit_build_times_add_time(get_circuit_build_times_mutable(),
-                                     (build_time_t)timediff);
-        circuit_build_times_set_timeout(get_circuit_build_times_mutable());
-      }
-
-      if (circ->base_.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {
-        circuit_build_times_network_circ_success(
-                                      get_circuit_build_times_mutable());
-      }
-    }
-  }
   log_info(LD_CIRC,"circuit built!");
   circuit_reset_failure_count(0);
 
@@ -2606,7 +2585,7 @@ onion_extend_cpath(origin_circuit_t *circ)
 /** Create a new hop, annotate it with information about its
  * corresponding router <b>choice</b>, and append it to the
  * end of the cpath <b>head_ptr</b>. */
-static int
+STATIC int
 onion_append_hop(crypt_path_t **head_ptr, extend_info_t *choice)
 {
   crypt_path_t *hop = tor_malloc_zero(sizeof(crypt_path_t));

+ 1 - 1
src/or/circuitbuild.h

@@ -27,7 +27,7 @@ int circuit_handle_first_hop(origin_circuit_t *circ);
 void circuit_n_chan_done(channel_t *chan, int status,
                          int close_origin_circuits);
 int inform_testing_reachability(void);
-int circuit_timeout_want_to_count_circ(origin_circuit_t *circ);
+int circuit_timeout_want_to_count_circ(const origin_circuit_t *circ);
 int circuit_send_next_onion_skin(origin_circuit_t *circ);
 void circuit_note_clock_jumped(int seconds_elapsed);
 int circuit_extend(cell_t *cell, circuit_t *circ);

+ 77 - 0
src/or/circuitlist.c

@@ -109,6 +109,14 @@ static void cpath_ref_decref(crypt_path_reference_t *cpath_ref);
 static void circuit_about_to_free_atexit(circuit_t *circ);
 static void circuit_about_to_free(circuit_t *circ);
 
+/**
+ * A cached value of the current state of the origin circuit list.  Has the
+ * value 1 if we saw any opened circuits recently (since the last call to
+ * circuit_any_opened_circuits(), which gets called around once a second by
+ * circuit_expire_building). 0 otherwise.
+ */
+static int any_opened_circs_cached_val = 0;
+
 /********* END VARIABLES ************/
 
 /** A map from channel and circuit ID to circuit.  (Lookup performance is
@@ -596,6 +604,56 @@ circuit_get_global_origin_circuit_list(void)
   return global_origin_circuit_list;
 }
 
+/**
+ * Return true if we have any opened general-purpose 3 hop
+ * origin circuits.
+ *
+ * The result from this function is cached for use by
+ * circuit_any_opened_circuits_cached().
+ */
+int
+circuit_any_opened_circuits(void)
+{
+  SMARTLIST_FOREACH_BEGIN(circuit_get_global_origin_circuit_list(),
+          const origin_circuit_t *, next_circ) {
+    if (!TO_CIRCUIT(next_circ)->marked_for_close &&
+        next_circ->has_opened &&
+        TO_CIRCUIT(next_circ)->state == CIRCUIT_STATE_OPEN &&
+        TO_CIRCUIT(next_circ)->purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT &&
+        next_circ->build_state &&
+        next_circ->build_state->desired_path_len == DEFAULT_ROUTE_LEN) {
+      circuit_cache_opened_circuit_state(1);
+      return 1;
+    }
+  } SMARTLIST_FOREACH_END(next_circ);
+
+  circuit_cache_opened_circuit_state(0);
+  return 0;
+}
+
+/**
+ * Cache the "any circuits opened" state, as specified in param
+ * circuits_are_opened. This is a helper function to update
+ * the circuit opened status whenever we happen to look at the
+ * circuit list.
+ */
+void
+circuit_cache_opened_circuit_state(int circuits_are_opened)
+{
+  any_opened_circs_cached_val = circuits_are_opened;
+}
+
+/**
+ * Return true if there were any opened circuits since the last call to
+ * circuit_any_opened_circuits(), or since circuit_expire_building() last
+ * ran (it runs roughly once per second).
+ */
+int
+circuit_any_opened_circuits_cached(void)
+{
+  return any_opened_circs_cached_val;
+}
+
 /** Function to make circ-\>state human-readable */
 const char *
 circuit_state_to_string(int state)
@@ -1788,6 +1846,25 @@ circuit_get_cpath_len(origin_circuit_t *circ)
   return n;
 }
 
+/** Return the number of opened hops in circuit's path.
+ * If circ has no entries, or is NULL, returns 0. */
+int
+circuit_get_cpath_opened_len(const origin_circuit_t *circ)
+{
+  int n = 0;
+  if (circ && circ->cpath) {
+    crypt_path_t *cpath, *cpath_next = NULL;
+    for (cpath = circ->cpath;
+         cpath->state == CPATH_STATE_OPEN
+           && cpath_next != circ->cpath;
+         cpath = cpath_next) {
+      cpath_next = cpath->next;
+      ++n;
+    }
+  }
+  return n;
+}
+
 /** Return the <b>hopnum</b>th hop in <b>circ</b>->cpath, or NULL if there
  * aren't that many hops in the list. <b>hopnum</b> starts at 1.
  * Returns NULL if <b>hopnum</b> is 0 or negative. */

+ 5 - 0
src/or/circuitlist.h

@@ -17,6 +17,10 @@
 
 MOCK_DECL(smartlist_t *, circuit_get_global_list, (void));
 smartlist_t *circuit_get_global_origin_circuit_list(void);
+int circuit_any_opened_circuits(void);
+int circuit_any_opened_circuits_cached(void);
+void circuit_cache_opened_circuit_state(int circuits_are_opened);
+
 const char *circuit_state_to_string(int state);
 const char *circuit_purpose_to_controller_string(uint8_t purpose);
 const char *circuit_purpose_to_controller_hs_state_string(uint8_t purpose);
@@ -58,6 +62,7 @@ void circuit_mark_all_dirty_circs_as_unusable(void);
 MOCK_DECL(void, circuit_mark_for_close_, (circuit_t *circ, int reason,
                                           int line, const char *file));
 int circuit_get_cpath_len(origin_circuit_t *circ);
+int circuit_get_cpath_opened_len(const origin_circuit_t *);
 void circuit_clear_cpath(origin_circuit_t *circ);
 crypt_path_t *circuit_get_cpath_hop(origin_circuit_t *circ, int hopnum);
 void circuit_get_all_pending_on_channel(smartlist_t *out,

+ 192 - 3
src/or/circuitstats.c

@@ -36,6 +36,8 @@
 #include "rendclient.h"
 #include "rendservice.h"
 #include "statefile.h"
+#include "circuitlist.h"
+#include "circuituse.h"
 
 #undef log
 #include <math.h>
@@ -43,6 +45,7 @@
 static void cbt_control_event_buildtimeout_set(
                                   const circuit_build_times_t *cbt,
                                   buildtimeout_set_event_t type);
+static void circuit_build_times_scale_circ_counts(circuit_build_times_t *cbt);
 
 #define CBT_BIN_TO_MS(bin) ((bin)*CBT_BIN_WIDTH + (CBT_BIN_WIDTH/2))
 
@@ -540,6 +543,11 @@ circuit_build_times_reset(circuit_build_times_t *cbt)
   cbt->total_build_times = 0;
   cbt->build_times_idx = 0;
   cbt->have_computed_timeout = 0;
+
+  // Reset timeout and close counts
+  cbt->num_circ_succeeded = 0;
+  cbt->num_circ_closed = 0;
+  cbt->num_circ_timeouts = 0;
 }
 
 /**
@@ -615,6 +623,123 @@ circuit_build_times_rewind_history(circuit_build_times_t *cbt, int n)
 }
 #endif /* 0 */
 
+/**
+ * Mark this circuit as timed out, but change its purpose
+ * so that it continues to build, allowing us to measure
+ * its full build time.
+ */
+void
+circuit_build_times_mark_circ_as_measurement_only(origin_circuit_t *circ)
+{
+  control_event_circuit_status(circ,
+                               CIRC_EVENT_FAILED,
+                               END_CIRC_REASON_TIMEOUT);
+  circuit_change_purpose(TO_CIRCUIT(circ),
+                         CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT);
+  /* Record this event to check for too many timeouts
+   * in a row. This function does not record a time value yet
+   * (we do that later); it only counts the fact that we did
+   * have a timeout. We also want to avoid double-counting
+   * already "relaxed" circuits, which are counted in
+   * circuit_expire_building(). */
+  if (!circ->relaxed_timeout) {
+    int first_hop_succeeded = circ->cpath &&
+          circ->cpath->state == CPATH_STATE_OPEN;
+
+    circuit_build_times_count_timeout(
+                                 get_circuit_build_times_mutable(),
+                                 first_hop_succeeded);
+  }
+}
+
+/**
+ * Perform the build time work that needs to be done when a circuit
+ * completes a hop.
+ *
+ * This function decides if we should record a circuit's build time
+ * in our histogram data and other statistics, and if so, records it.
+ * It also will mark circuits that have already timed out as
+ * measurement-only circuits, so they can continue to build but
+ * not get used.
+ *
+ * For this, we want to consider circuits that will eventually make
+ * it to the third hop. For circuits longer than 3 hops, we want to
+ * record their build time when they reach the third hop, but let
+ * them continue (and not count them later). For circuits that are
+ * exactly 3 hops, this will count them when they are completed. We
+ * do this so that CBT is always gathering statistics on circuits
+ * of the same length, regardless of their type.
+ */
+void
+circuit_build_times_handle_completed_hop(origin_circuit_t *circ)
+{
+  struct timeval end;
+  long timediff;
+
+  /* If circuit build times are disabled, let circuit_expire_building()
+   * handle it.. */
+  if (circuit_build_times_disabled(get_options())) {
+    return;
+  }
+
+  /* Is this a circuit for which the timeout applies in a straight-forward
+   * way? If so, handle it below. If not, just return (and let
+   * circuit_expire_building() eventually take care of it).
+   */
+  if (!circuit_timeout_want_to_count_circ(circ)) {
+    return;
+  }
+
+  tor_gettimeofday(&end);
+  timediff = tv_mdiff(&circ->base_.timestamp_began, &end);
+
+  /* Check if we would have timed out already. If so, change the
+   * purpose here. But don't do any timeout handling here if there
+   * are no circuits opened yet. Save it for circuit_expire_building()
+   * (to allow it to handle timeout "relaxing" over there). */
+  if (timediff > get_circuit_build_timeout_ms() &&
+      circuit_any_opened_circuits_cached()) {
+
+    /* Circuits are allowed to last longer for measurement.
+     * Switch their purpose and wait. */
+    if (circ->base_.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {
+      log_info(LD_CIRC,
+               "Deciding to timeout circuit "U64_FORMAT"\n",
+               U64_PRINTF_ARG(circ->global_identifier));
+      circuit_build_times_mark_circ_as_measurement_only(circ);
+    }
+  }
+
+  /* If the circuit is built to exactly the DEFAULT_ROUTE_LEN,
+   * add it to our buildtimes. */
+  if (circuit_get_cpath_opened_len(circ) == DEFAULT_ROUTE_LEN) {
+    /* If the circuit build time is much greater than we would have cut
+     * it off at, we probably had a suspend event along this codepath,
+     * and we should discard the value.
+     */
+    if (timediff < 0 ||
+        timediff > 2*get_circuit_build_close_time_ms()+1000) {
+      log_notice(LD_CIRC, "Strange value for circuit build time: %ldmsec. "
+                 "Assuming clock jump. Purpose %d (%s)", timediff,
+                 circ->base_.purpose,
+                 circuit_purpose_to_string(circ->base_.purpose));
+    } else {
+      /* Only count circuit times if the network is live */
+      if (circuit_build_times_network_check_live(
+                            get_circuit_build_times())) {
+        circuit_build_times_add_time(get_circuit_build_times_mutable(),
+                                     (build_time_t)timediff);
+        circuit_build_times_set_timeout(get_circuit_build_times_mutable());
+      }
+
+      if (circ->base_.purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {
+        circuit_build_times_network_circ_success(
+                                      get_circuit_build_times_mutable());
+      }
+    }
+  }
+}
+
 /**
  * Add a new build time value <b>time</b> to the set of build times. Time
  * units are milliseconds.
@@ -1290,9 +1415,32 @@ circuit_build_times_network_is_live(circuit_build_times_t *cbt)
 }
 
 /**
- * Called to indicate that we completed a circuit. Because this circuit
+ * Non-destructively scale all of our circuit success, timeout, and close
+ * counts down by a factor of two. Scaling in this way preserves the
+ * ratios between succeeded vs timed out vs closed circuits, so that
+ * our statistics don't change when we scale.
+ *
+ * This is used only in the rare event that we build more than
+ * INT32_MAX circuits.  Since the num_circ_* variables are
+ * uint32_t, we won't even be close to overflowing them.
+ */
+void
+circuit_build_times_scale_circ_counts(circuit_build_times_t *cbt)
+{
+  cbt->num_circ_succeeded /= 2;
+  cbt->num_circ_timeouts /= 2;
+  cbt->num_circ_closed /= 2;
+}
+
+/**
+ * Called to indicate that we "completed" a circuit. Because this circuit
  * succeeded, it doesn't count as a timeout-after-the-first-hop.
  *
+ * (For the purposes of the cbt code, we consider a circuit "completed" if
+ * it has 3 hops, regardless of its final hop count. We do this because
+ * we're trying to answer the question, "how long should a circuit take to
+ * reach the 3-hop count".)
+ *
  * This is used by circuit_build_times_network_check_changed() to determine
  * if we had too many recent timeouts and need to reset our learned timeout
  * to something higher.
@@ -1300,6 +1448,14 @@ circuit_build_times_network_is_live(circuit_build_times_t *cbt)
 void
 circuit_build_times_network_circ_success(circuit_build_times_t *cbt)
 {
+  // Count circuit success
+  cbt->num_circ_succeeded++;
+
+  // If we're going to wrap int32, scale everything
+  if (cbt->num_circ_succeeded >= INT32_MAX) {
+    circuit_build_times_scale_circ_counts(cbt);
+  }
+
   /* Check for NULLness because we might not be using adaptive timeouts */
   if (cbt->liveness.timeouts_after_firsthop &&
       cbt->liveness.num_recent_circs > 0) {
@@ -1322,6 +1478,14 @@ static void
 circuit_build_times_network_timeout(circuit_build_times_t *cbt,
                                     int did_onehop)
 {
+  // Count circuit timeout
+  cbt->num_circ_timeouts++;
+
+  // If we're going to wrap int32, scale everything
+  if (cbt->num_circ_timeouts >= INT32_MAX) {
+    circuit_build_times_scale_circ_counts(cbt);
+  }
+
   /* Check for NULLness because we might not be using adaptive timeouts */
   if (cbt->liveness.timeouts_after_firsthop &&
       cbt->liveness.num_recent_circs > 0) {
@@ -1347,6 +1511,15 @@ circuit_build_times_network_close(circuit_build_times_t *cbt,
                                     int did_onehop, time_t start_time)
 {
   time_t now = time(NULL);
+
+  // Count circuit close
+  cbt->num_circ_closed++;
+
+  // If we're going to wrap int32, scale everything
+  if (cbt->num_circ_closed >= INT32_MAX) {
+    circuit_build_times_scale_circ_counts(cbt);
+  }
+
   /*
    * Check if this is a timeout that was for a circuit that spent its
    * entire existence during a time where we have had no network activity.
@@ -1701,6 +1874,8 @@ cbt_control_event_buildtimeout_set(const circuit_build_times_t *cbt,
 {
   char *args = NULL;
   double qnt;
+  double timeout_rate = 0.0;
+  double close_rate = 0.0;
 
   switch (type) {
     case BUILDTIMEOUT_SET_EVENT_RESET:
@@ -1715,15 +1890,29 @@ cbt_control_event_buildtimeout_set(const circuit_build_times_t *cbt,
       break;
   }
 
+  /* The timeout rate is the ratio of the timeout count over
+   * the total number of circuits attempted. The total number of
+   * circuits is (timeouts+succeeded+closed), since a circuit can
+   * either timeout, close, or succeed. We cast the denominator
+   * to promote it to double before the addition, to avoid int32
+   * overflow. */
+  const double total_circuits =
+    ((double)cbt->num_circ_timeouts) + cbt->num_circ_succeeded
+       + cbt->num_circ_closed;
+  if (total_circuits >= 1.0) {
+    timeout_rate = cbt->num_circ_timeouts / total_circuits;
+    close_rate = cbt->num_circ_closed / total_circuits;
+  }
+
   tor_asprintf(&args, "TOTAL_TIMES=%lu "
                "TIMEOUT_MS=%lu XM=%lu ALPHA=%f CUTOFF_QUANTILE=%f "
                "TIMEOUT_RATE=%f CLOSE_MS=%lu CLOSE_RATE=%f",
                (unsigned long)cbt->total_build_times,
                (unsigned long)cbt->timeout_ms,
                (unsigned long)cbt->Xm, cbt->alpha, qnt,
-               circuit_build_times_timeout_rate(cbt),
+               timeout_rate,
                (unsigned long)cbt->close_ms,
-               circuit_build_times_close_rate(cbt));
+               close_rate);
 
   control_event_buildtimeout_set(type, args);
 

+ 12 - 0
src/or/circuitstats.h

@@ -34,6 +34,7 @@ void circuit_build_times_set_timeout(circuit_build_times_t *cbt);
 int circuit_build_times_add_time(circuit_build_times_t *cbt,
                                  build_time_t time);
 int circuit_build_times_needs_circuits(const circuit_build_times_t *cbt);
+void circuit_build_times_handle_completed_hop(origin_circuit_t *circ);
 
 int circuit_build_times_needs_circuits_now(const circuit_build_times_t *cbt);
 void circuit_build_times_init(circuit_build_times_t *cbt);
@@ -44,6 +45,7 @@ double circuit_build_times_timeout_rate(const circuit_build_times_t *cbt);
 double circuit_build_times_close_rate(const circuit_build_times_t *cbt);
 
 void circuit_build_times_update_last_circ(circuit_build_times_t *cbt);
+void circuit_build_times_mark_circ_as_measurement_only(origin_circuit_t *circ);
 
 #ifdef CIRCUITSTATS_PRIVATE
 STATIC double circuit_build_times_calculate_timeout(circuit_build_times_t *cbt,
@@ -94,6 +96,16 @@ struct circuit_build_times_s {
   double timeout_ms;
   /** How long we wait before actually closing the circuit. */
   double close_ms;
+  /** Total succeeded counts. Old measurements may be scaled downward if
+   * we've seen a lot of circuits. */
+  uint32_t num_circ_succeeded;
+  /** Total timeout counts.  Old measurements may be scaled downward if
+   * we've seen a lot of circuits. */
+  uint32_t num_circ_timeouts;
+  /** Total closed counts.  Old measurements may be scaled downward if
+   * we've seen a lot of circuits.*/
+  uint32_t num_circ_closed;
+
 };
 #endif /* defined(CIRCUITSTATS_PRIVATE) */
 

+ 27 - 32
src/or/circuituse.c

@@ -412,8 +412,17 @@ circuit_conforms_to_options(const origin_circuit_t *circ,
 }
 #endif /* 0 */
 
-/** Close all circuits that start at us, aren't open, and were born
+/**
+ * Close all circuits that start at us, aren't open, and were born
  * at least CircuitBuildTimeout seconds ago.
+ *
+ * TODO: This function is now partially redundant to
+ * circuit_build_times_handle_completed_hop(), but that function only
+ * covers circuits up to and including 3 hops that are still actually
+ * completing hops. However, circuit_expire_building() also handles longer
+ * circuits, as well as circuits that are completely stalled.
+ * In the future (after prop247/other path selection revamping), we probably
+ * want to eliminate this rats nest in favor of a simpler approach.
  */
 void
 circuit_expire_building(void)
@@ -435,21 +444,7 @@ circuit_expire_building(void)
    * we want to be more lenient with timeouts, in case the
    * user has relocated and/or changed network connections.
    * See bug #3443. */
-  SMARTLIST_FOREACH_BEGIN(circuit_get_global_list(), circuit_t *, next_circ) {
-    if (!CIRCUIT_IS_ORIGIN(next_circ) || /* didn't originate here */
-        next_circ->marked_for_close) { /* don't mess with marked circs */
-      continue;
-    }
-
-    if (TO_ORIGIN_CIRCUIT(next_circ)->has_opened &&
-        next_circ->state == CIRCUIT_STATE_OPEN &&
-        TO_ORIGIN_CIRCUIT(next_circ)->build_state &&
-        TO_ORIGIN_CIRCUIT(next_circ)->build_state->desired_path_len
-          == DEFAULT_ROUTE_LEN) {
-      any_opened_circs = 1;
-      break;
-    }
-  } SMARTLIST_FOREACH_END(next_circ);
+  any_opened_circs = circuit_any_opened_circuits();
 
 #define SET_CUTOFF(target, msec) do {                       \
     long ms = tor_lround(msec);                             \
@@ -493,6 +488,10 @@ circuit_expire_building(void)
   SET_CUTOFF(general_cutoff, get_circuit_build_timeout_ms());
   SET_CUTOFF(begindir_cutoff, get_circuit_build_timeout_ms());
 
+  // TODO: We should probably use route_len_for_purpose() here instead,
+  // except that does not count the extra round trip for things like server
+  // intros and rends.
+
   /* > 3hop circs seem to have a 1.0 second delay on their cannibalized
    * 4th hop. */
   SET_CUTOFF(fourhop_cutoff, get_circuit_build_timeout_ms() * (10/6.0) + 1000);
@@ -585,7 +584,8 @@ circuit_expire_building(void)
                    TO_ORIGIN_CIRCUIT(victim)->build_state->desired_path_len :
                    -1,
                  circuit_state_to_string(victim->state),
-                 channel_state_to_string(victim->n_chan->state));
+                 victim->n_chan ?
+                    channel_state_to_string(victim->n_chan->state) : "none");
 
           /* We count the timeout here for CBT, because technically this
            * was a timeout, and the timeout value needs to reset if we
@@ -610,7 +610,8 @@ circuit_expire_building(void)
                    TO_ORIGIN_CIRCUIT(victim)->build_state->desired_path_len :
                    -1,
                  circuit_state_to_string(victim->state),
-                 channel_state_to_string(victim->n_chan->state),
+                 victim->n_chan ?
+                    channel_state_to_string(victim->n_chan->state) : "none",
                  (long)build_close_ms);
       }
     }
@@ -694,23 +695,17 @@ circuit_expire_building(void)
 
       if (circuit_timeout_want_to_count_circ(TO_ORIGIN_CIRCUIT(victim)) &&
           circuit_build_times_enough_to_compute(get_circuit_build_times())) {
+
+        log_info(LD_CIRC,
+                   "Deciding to count the timeout for circuit "U64_FORMAT"\n",
+                   U64_PRINTF_ARG(
+                       TO_ORIGIN_CIRCUIT(victim)->global_identifier));
+
         /* Circuits are allowed to last longer for measurement.
          * Switch their purpose and wait. */
         if (victim->purpose != CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT) {
-          control_event_circuit_status(TO_ORIGIN_CIRCUIT(victim),
-                                       CIRC_EVENT_FAILED,
-                                       END_CIRC_REASON_TIMEOUT);
-          circuit_change_purpose(victim, CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT);
-          /* Record this failure to check for too many timeouts
-           * in a row. This function does not record a time value yet
-           * (we do that later); it only counts the fact that we did
-           * have a timeout. We also want to avoid double-counting
-           * already "relaxed" circuits, which are counted above. */
-          if (!TO_ORIGIN_CIRCUIT(victim)->relaxed_timeout) {
-            circuit_build_times_count_timeout(
-                                         get_circuit_build_times_mutable(),
-                                         first_hop_succeeded);
-          }
+          circuit_build_times_mark_circ_as_measurement_only(TO_ORIGIN_CIRCUIT(
+                                                            victim));
           continue;
         }
 

+ 1 - 0
src/test/Makefile.nmake

@@ -18,6 +18,7 @@ TEST_OBJECTS = test.obj test_addr.obj test_channel.obj test_channeltls.obj \
         test_config.obj test_connection.obj \
 	test_cell_formats.obj test_relay.obj test_replay.obj \
 	test_channelpadding.obj \
+	test_circuitstats.obj \
 	test_scheduler.obj test_introduce.obj test_hs.obj tinytest.obj
 
 tinytest.obj: ..\ext\tinytest.c

+ 1 - 0
src/test/include.am

@@ -98,6 +98,7 @@ src_test_test_SOURCES = \
 	src/test/test_circuitmux.c \
 	src/test/test_circuitbuild.c \
 	src/test/test_circuituse.c \
+	src/test/test_circuitstats.c \
 	src/test/test_compat_libevent.c \
 	src/test/test_config.c \
 	src/test/test_connection.c \

+ 1 - 0
src/test/test.c

@@ -1178,6 +1178,7 @@ struct testgroup_t testgroups[] = {
   { "circuitlist/", circuitlist_tests },
   { "circuitmux/", circuitmux_tests },
   { "circuituse/", circuituse_tests },
+  { "circuitstats/", circuitstats_tests },
   { "compat/libevent/", compat_libevent_tests },
   { "config/", config_tests },
   { "connection/", connection_tests },

+ 1 - 0
src/test/test.h

@@ -188,6 +188,7 @@ extern struct testcase_t circuitbuild_tests[];
 extern struct testcase_t circuitlist_tests[];
 extern struct testcase_t circuitmux_tests[];
 extern struct testcase_t circuituse_tests[];
+extern struct testcase_t circuitstats_tests[];
 extern struct testcase_t compat_libevent_tests[];
 extern struct testcase_t config_tests[];
 extern struct testcase_t connection_tests[];

+ 199 - 0
src/test/test_circuitstats.c

@@ -0,0 +1,199 @@
+/* Copyright (c) 2017, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#define CIRCUITBUILD_PRIVATE
+#define CIRCUITSTATS_PRIVATE
+#define CHANNEL_PRIVATE_
+
+#include "or.h"
+#include "test.h"
+#include "test_helpers.h"
+#include "log_test_helpers.h"
+#include "config.h"
+#include "circuitlist.h"
+#include "circuitbuild.h"
+#include "circuitstats.h"
+#include "circuituse.h"
+#include "channel.h"
+
+void test_circuitstats_timeout(void *arg);
+void test_circuitstats_hoplen(void *arg);
+origin_circuit_t *subtest_fourhop_circuit(struct timeval, int);
+origin_circuit_t *add_opened_threehop(void);
+origin_circuit_t *build_unopened_fourhop(struct timeval);
+
+void circuit_free(circuit_t *circ);
+int onion_append_hop(crypt_path_t **head_ptr, extend_info_t *choice);
+
+static int marked_for_close;
+/* Mock function because we are not trying to test the close circuit that does
+ * an awful lot of checks on the circuit object. */
+static void
+mock_circuit_mark_for_close(circuit_t *circ, int reason, int line,
+                            const char *file)
+{
+  (void) circ;
+  (void) reason;
+  (void) line;
+  (void) file;
+  marked_for_close = 1;
+  return;
+}
+
+origin_circuit_t *
+add_opened_threehop(void)
+{
+  origin_circuit_t *or_circ = origin_circuit_new();
+  extend_info_t fakehop;
+  memset(&fakehop, 0, sizeof(fakehop));
+
+  TO_CIRCUIT(or_circ)->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  or_circ->build_state = tor_malloc_zero(sizeof(cpath_build_state_t));
+  or_circ->build_state->desired_path_len = DEFAULT_ROUTE_LEN;
+
+  onion_append_hop(&or_circ->cpath, &fakehop);
+  onion_append_hop(&or_circ->cpath, &fakehop);
+  onion_append_hop(&or_circ->cpath, &fakehop);
+
+  or_circ->has_opened = 1;
+  TO_CIRCUIT(or_circ)->state = CIRCUIT_STATE_OPEN;
+  TO_CIRCUIT(or_circ)->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  return or_circ;
+}
+
+origin_circuit_t *
+build_unopened_fourhop(struct timeval circ_start_time)
+{
+  origin_circuit_t *or_circ = origin_circuit_new();
+  extend_info_t *fakehop = tor_malloc_zero(sizeof(extend_info_t));
+  memset(fakehop, 0, sizeof(extend_info_t));
+
+  TO_CIRCUIT(or_circ)->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  TO_CIRCUIT(or_circ)->timestamp_began = circ_start_time;
+  TO_CIRCUIT(or_circ)->timestamp_created = circ_start_time;
+
+  or_circ->build_state = tor_malloc_zero(sizeof(cpath_build_state_t));
+  or_circ->build_state->desired_path_len = 4;
+
+  onion_append_hop(&or_circ->cpath, fakehop);
+  onion_append_hop(&or_circ->cpath, fakehop);
+  onion_append_hop(&or_circ->cpath, fakehop);
+  onion_append_hop(&or_circ->cpath, fakehop);
+
+  return or_circ;
+}
+
+origin_circuit_t *
+subtest_fourhop_circuit(struct timeval circ_start_time, int should_timeout)
+{
+  origin_circuit_t *or_circ = build_unopened_fourhop(circ_start_time);
+
+  // Now make them open one at a time and call
+  // circuit_build_times_handle_completed_hop();
+  or_circ->cpath->state = CPATH_STATE_OPEN;
+  circuit_build_times_handle_completed_hop(or_circ);
+  tt_int_op(get_circuit_build_times()->total_build_times, OP_EQ, 0);
+
+  or_circ->cpath->next->state = CPATH_STATE_OPEN;
+  circuit_build_times_handle_completed_hop(or_circ);
+  tt_int_op(get_circuit_build_times()->total_build_times, OP_EQ, 0);
+
+  // Third hop: We should count it now.
+  or_circ->cpath->next->next->state = CPATH_STATE_OPEN;
+  circuit_build_times_handle_completed_hop(or_circ);
+  tt_int_op(get_circuit_build_times()->total_build_times, OP_EQ,
+            !should_timeout); // 1 if counted, 0 otherwise
+
+  // Fourth hop: Don't double count
+  or_circ->cpath->next->next->next->state = CPATH_STATE_OPEN;
+  circuit_build_times_handle_completed_hop(or_circ);
+  tt_int_op(get_circuit_build_times()->total_build_times, OP_EQ,
+            !should_timeout);
+
+ done:
+  return or_circ;
+}
+
+void
+test_circuitstats_hoplen(void *arg)
+{
+  /* Plan:
+   *   0. Test no other opened circs (relaxed timeout)
+   *   1. Check >3 hop circ building w/o timeout
+   *   2. Check >3 hop circs w/ timeouts..
+   */
+  struct timeval circ_start_time;
+  origin_circuit_t *threehop = NULL;
+  origin_circuit_t *fourhop = NULL;
+  (void)arg;
+  MOCK(circuit_mark_for_close_, mock_circuit_mark_for_close);
+
+  circuit_build_times_init(get_circuit_build_times_mutable());
+
+  // Let's set a close_ms to 2X the initial timeout, so we can
+  // test relaxed functionality (which uses the close_ms timeout)
+  get_circuit_build_times_mutable()->close_ms *= 2;
+
+  tor_gettimeofday(&circ_start_time);
+  circ_start_time.tv_sec -= 119; // make us hit "relaxed" cutoff
+
+  // Test 1: Build a fourhop circuit that should get marked
+  // as relaxed and eventually counted by circuit_expire_building
+  // (but not before)
+  fourhop = subtest_fourhop_circuit(circ_start_time, 0);
+  tt_int_op(fourhop->relaxed_timeout, OP_EQ, 0);
+  tt_int_op(marked_for_close, OP_EQ, 0);
+  circuit_expire_building();
+  tt_int_op(marked_for_close, OP_EQ, 0);
+  tt_int_op(fourhop->relaxed_timeout, OP_EQ, 1);
+  TO_CIRCUIT(fourhop)->timestamp_began.tv_sec -= 119;
+  circuit_expire_building();
+  tt_int_op(get_circuit_build_times()->total_build_times, OP_EQ, 1);
+  tt_int_op(marked_for_close, OP_EQ, 1);
+
+  circuit_free(TO_CIRCUIT(fourhop));
+  circuit_build_times_reset(get_circuit_build_times_mutable());
+
+  // Test 2: Add a threehop circuit for non-relaxed timeouts
+  threehop = add_opened_threehop();
+
+  /* This circuit should not timeout */
+  tor_gettimeofday(&circ_start_time);
+  circ_start_time.tv_sec -= 59;
+  fourhop = subtest_fourhop_circuit(circ_start_time, 0);
+  circuit_expire_building();
+  tt_int_op(get_circuit_build_times()->total_build_times, OP_EQ, 1);
+  tt_int_op(TO_CIRCUIT(fourhop)->purpose, OP_NE,
+            CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT);
+
+  circuit_free((circuit_t *)fourhop);
+  circuit_build_times_reset(get_circuit_build_times_mutable());
+
+  /* Test 3: This circuit should now time out and get marked as a
+   * measurement circuit, but still get counted (and counted only once)
+   */
+  circ_start_time.tv_sec -= 2;
+  fourhop = subtest_fourhop_circuit(circ_start_time, 0);
+  tt_int_op(TO_CIRCUIT(fourhop)->purpose, OP_EQ,
+            CIRCUIT_PURPOSE_C_MEASURE_TIMEOUT);
+  tt_int_op(get_circuit_build_times()->total_build_times, OP_EQ, 1);
+  circuit_expire_building();
+  tt_int_op(get_circuit_build_times()->total_build_times, OP_EQ, 1);
+
+ done:
+  UNMOCK(circuit_mark_for_close_);
+  circuit_free(TO_CIRCUIT(threehop));
+  circuit_free(TO_CIRCUIT(fourhop));
+  circuit_build_times_free_timeouts(get_circuit_build_times_mutable());
+}
+
+#define TEST_CIRCUITSTATS(name, flags) \
+    { #name, test_##name, (flags), NULL, NULL }
+
+struct testcase_t circuitstats_tests[] = {
+  TEST_CIRCUITSTATS(circuitstats_hoplen, TT_FORK),
+  END_OF_TESTCASES
+};
+