Browse Source

Merge branch 'dirreq-timing'

Nick Mathewson 16 years ago
parent
commit
022d251cb7
10 changed files with 435 additions and 42 deletions
  1. 8 5
      ChangeLog
  2. 4 4
      configure.in
  3. 24 10
      src/or/config.c
  4. 7 0
      src/or/connection.c
  5. 10 0
      src/or/connection_edge.c
  6. 23 2
      src/or/directory.c
  7. 280 17
      src/or/geoip.c
  8. 49 3
      src/or/or.h
  9. 29 0
      src/or/relay.c
  10. 1 1
      src/or/router.c

+ 8 - 5
ChangeLog

@@ -15,11 +15,14 @@ Changes in version 0.2.2.1-alpha - 2009-??-??
     - The memarea code now uses a sentinel value at the end of each area
     - The memarea code now uses a sentinel value at the end of each area
       to make sure nothing writes beyond the end of an area.  This might
       to make sure nothing writes beyond the end of an area.  This might
       help debug some conceivable causes of bug 930.
       help debug some conceivable causes of bug 930.
-    - Directories that are configured with the --enable-geoip-stats flag
-      now write their GeoIP stats to disk exactly every 24 hours.
-      Estimated shares of v2 and v3 requests are determined as averages,
-      not at the end of a measurement period. Also, unresolved requests
-      are listed with country code '??'.
+    - Directories that are configured with the --enable-dirreq-stats flag
+      and have "DirReqStatistics 1" set write directory request stats to
+      disk every 24 hours. As compared to the --enable-geoip-stats flag
+      in 0.2.1.x, there are a few improvements: 1) stats are written to
+      disk exactly every 24 hours; 2) estimated shares of v2 and v3
+      requests are determined as mean values, not at the end of a
+      measurement period; 3) unresolved requests are listed with country
+      code '??'; 4) directories also measure download times.
     - Exit nodes can write statistics on the number of exit streams and
     - Exit nodes can write statistics on the number of exit streams and
       transferred bytes per port to disk every 24 hours.  To enable this,
       transferred bytes per port to disk every 24 hours.  To enable this,
       run configure with the --enable-exit-stats option, and set
       run configure with the --enable-exit-stats option, and set

+ 4 - 4
configure.in

@@ -92,11 +92,11 @@ if test "$enable_exit_stats" = "yes"; then
   AC_DEFINE(ENABLE_EXIT_STATS, 1, [Defined if we try to collect per-port statistics on exits])
   AC_DEFINE(ENABLE_EXIT_STATS, 1, [Defined if we try to collect per-port statistics on exits])
 fi
 fi
 
 
-AC_ARG_ENABLE(geoip-stats,
-     AS_HELP_STRING(--enable-geoip-stats, enable code for directories to collect per-country statistics))
+AC_ARG_ENABLE(dirreq-stats,
+     AS_HELP_STRING(--enable-dirreq-stats, enable code for directories to collect per-country statistics))
 
 
-if test "$enable_geoip_stats" = "yes"; then
-  AC_DEFINE(ENABLE_GEOIP_STATS, 1, [Defined if we try to collect per-country statistics])
+if test "$enable_dirreq_stats" = "yes"; then
+  AC_DEFINE(ENABLE_DIRREQ_STATS, 1, [Defined if we try to collect per-country statistics])
 fi
 fi
 
 
 AC_ARG_ENABLE(buffer-stats,
 AC_ARG_ENABLE(buffer-stats,

+ 24 - 10
src/or/config.c

@@ -187,12 +187,13 @@ static config_var_t _option_vars[] = {
   V(DirPort,                     UINT,     "0"),
   V(DirPort,                     UINT,     "0"),
   V(DirPortFrontPage,            FILENAME, NULL),
   V(DirPortFrontPage,            FILENAME, NULL),
   OBSOLETE("DirPostPeriod"),
   OBSOLETE("DirPostPeriod"),
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   OBSOLETE("DirRecordUsageByCountry"),
   OBSOLETE("DirRecordUsageByCountry"),
   OBSOLETE("DirRecordUsageGranularity"),
   OBSOLETE("DirRecordUsageGranularity"),
   OBSOLETE("DirRecordUsageRetainIPs"),
   OBSOLETE("DirRecordUsageRetainIPs"),
   OBSOLETE("DirRecordUsageSaveInterval"),
   OBSOLETE("DirRecordUsageSaveInterval"),
 #endif
 #endif
+  V(DirReqStatistics,            BOOL,     "0"),
   VAR("DirServer",               LINELIST, DirServers, NULL),
   VAR("DirServer",               LINELIST, DirServers, NULL),
   V(DNSPort,                     UINT,     "0"),
   V(DNSPort,                     UINT,     "0"),
   V(DNSListenAddress,            LINELIST, NULL),
   V(DNSListenAddress,            LINELIST, NULL),
@@ -1376,17 +1377,25 @@ options_act(or_options_t *old_options)
     geoip_load_file(actual_fname, options);
     geoip_load_file(actual_fname, options);
     tor_free(actual_fname);
     tor_free(actual_fname);
   }
   }
-#ifdef ENABLE_GEOIP_STATS
-  /* Check if GeoIP database could be loaded. */
-  if (!geoip_is_loaded()) {
-    log_warn(LD_CONFIG, "Configured to measure GeoIP statistics, but no "
-                        "GeoIP database found!");
-    return -1;
+
+#ifdef ENABLE_DIRREQ_STATS
+  if (options->DirReqStatistics) {
+    /* Check if GeoIP database could be loaded. */
+    if (!geoip_is_loaded()) {
+      log_warn(LD_CONFIG, "Configured to measure directory request "
+               "statistics, but no GeoIP database found!");
+      return -1;
+    }
+    log_notice(LD_CONFIG, "Configured to count directory requests by "
+               "country and write aggregate statistics to disk. Check the "
+               "dirreq-stats file in your data directory that will first "
+               "be written in 24 hours from now.");
   }
   }
-  log_notice(LD_CONFIG, "Configured to measure usage by country and "
-    "write aggregate statistics to disk. Check the geoip-stats file "
-    "in your data directory once I've been running for 24 hours.");
+#else
+  log_warn(LD_CONFIG, "DirReqStatistics enabled, but Tor was built "
+           "without support for directory request statistics.");
 #endif
 #endif
+
 #ifdef ENABLE_EXIT_STATS
 #ifdef ENABLE_EXIT_STATS
   if (options->ExitPortStatistics)
   if (options->ExitPortStatistics)
     log_notice(LD_CONFIG, "Configured to measure exit port statistics. "
     log_notice(LD_CONFIG, "Configured to measure exit port statistics. "
@@ -1417,6 +1426,11 @@ options_act(or_options_t *old_options)
       log_warn(LD_CONFIG, "Bridges cannot be configured to measure "
       log_warn(LD_CONFIG, "Bridges cannot be configured to measure "
                "additional GeoIP statistics as entry guards.");
                "additional GeoIP statistics as entry guards.");
       return -1;
       return -1;
+    } else if (!geoip_is_loaded()) {
+      /* Check if GeoIP database could be loaded. */
+      log_warn(LD_CONFIG, "Configured to measure entry node statistics, "
+               "but no GeoIP database found!");
+      return -1;
     } else
     } else
       log_notice(LD_CONFIG, "Configured to measure entry node "
       log_notice(LD_CONFIG, "Configured to measure entry node "
                  "statistics. Look for the entry-stats file that will "
                  "statistics. Look for the entry-stats file that will "

+ 7 - 0
src/or/connection.c

@@ -2302,6 +2302,13 @@ connection_handle_write(connection_t *conn, int force)
     /* else open, or closing */
     /* else open, or closing */
     result = flush_buf_tls(or_conn->tls, conn->outbuf,
     result = flush_buf_tls(or_conn->tls, conn->outbuf,
                            max_to_write, &conn->outbuf_flushlen);
                            max_to_write, &conn->outbuf_flushlen);
+#ifdef ENABLE_DIRREQ_STATS
+    /* If we just flushed the last bytes, check if this tunneled dir
+     * request is done. */
+    if (buf_datalen(conn->outbuf) == 0 && conn->dirreq_id)
+      geoip_change_dirreq_state(conn->dirreq_id, DIRREQ_TUNNELED,
+                                DIRREQ_OR_CONN_BUFFER_FLUSHED);
+#endif
     switch (result) {
     switch (result) {
       CASE_TOR_TLS_ERROR_ANY:
       CASE_TOR_TLS_ERROR_ANY:
       case TOR_TLS_CLOSE:
       case TOR_TLS_CLOSE:

+ 10 - 0
src/or/connection_edge.c

@@ -2551,6 +2551,11 @@ connection_exit_begin_conn(cell_t *cell, circuit_t *circ)
 
 
   log_debug(LD_EXIT,"Creating new exit connection.");
   log_debug(LD_EXIT,"Creating new exit connection.");
   n_stream = edge_connection_new(CONN_TYPE_EXIT, AF_INET);
   n_stream = edge_connection_new(CONN_TYPE_EXIT, AF_INET);
+#ifdef ENABLE_DIRREQ_STATS
+  /* Remember the tunneled request ID in the new edge connection, so that
+   * we can measure download times. */
+  TO_CONN(n_stream)->dirreq_id = circ->dirreq_id;
+#endif
   n_stream->_base.purpose = EXIT_PURPOSE_CONNECT;
   n_stream->_base.purpose = EXIT_PURPOSE_CONNECT;
 
 
   n_stream->stream_id = rh.stream_id;
   n_stream->stream_id = rh.stream_id;
@@ -2787,6 +2792,11 @@ connection_exit_connect_dir(edge_connection_t *exitconn)
   dirconn->_base.purpose = DIR_PURPOSE_SERVER;
   dirconn->_base.purpose = DIR_PURPOSE_SERVER;
   dirconn->_base.state = DIR_CONN_STATE_SERVER_COMMAND_WAIT;
   dirconn->_base.state = DIR_CONN_STATE_SERVER_COMMAND_WAIT;
 
 
+#ifdef ENABLE_DIRREQ_STATS
+  /* Note that the new dir conn belongs to the same tunneled request as
+   * the edge conn, so that we can measure download times. */
+  TO_CONN(dirconn)->dirreq_id = TO_CONN(exitconn)->dirreq_id;
+#endif
   connection_link_connections(TO_CONN(dirconn), TO_CONN(exitconn));
   connection_link_connections(TO_CONN(dirconn), TO_CONN(exitconn));
 
 
   if (connection_add(TO_CONN(exitconn))<0) {
   if (connection_add(TO_CONN(exitconn))<0) {

+ 23 - 2
src/or/directory.c

@@ -2562,12 +2562,21 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
       goto done;
       goto done;
     }
     }
 
 
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
     {
     {
       struct in_addr in;
       struct in_addr in;
-      if (tor_inet_aton((TO_CONN(conn))->address, &in))
+      if (tor_inet_aton((TO_CONN(conn))->address, &in)) {
         geoip_note_client_seen(act, ntohl(in.s_addr), time(NULL));
         geoip_note_client_seen(act, ntohl(in.s_addr), time(NULL));
         geoip_note_ns_response(act, GEOIP_SUCCESS);
         geoip_note_ns_response(act, GEOIP_SUCCESS);
+        /* Note that a request for a network status has started, so that we
+         * can measure the download time later on. */
+        if (TO_CONN(conn)->dirreq_id)
+          geoip_start_dirreq(TO_CONN(conn)->dirreq_id, dlen, act,
+                             DIRREQ_TUNNELED);
+        else
+          geoip_start_dirreq(TO_CONN(conn)->global_identifier, dlen, act,
+                             DIRREQ_DIRECT);
+      }
     }
     }
 #endif
 #endif
 
 
@@ -3201,6 +3210,18 @@ connection_dir_finished_flushing(dir_connection_t *conn)
   tor_assert(conn);
   tor_assert(conn);
   tor_assert(conn->_base.type == CONN_TYPE_DIR);
   tor_assert(conn->_base.type == CONN_TYPE_DIR);
 
 
+#ifdef ENABLE_DIRREQ_STATS
+  /* Note that we have finished writing the directory response. For direct
+   * connections this means we're done, for tunneled connections its only
+   * an intermediate step. */
+  if (TO_CONN(conn)->dirreq_id)
+    geoip_change_dirreq_state(TO_CONN(conn)->dirreq_id, DIRREQ_TUNNELED,
+                              DIRREQ_FLUSHING_DIR_CONN_FINISHED);
+  else
+    geoip_change_dirreq_state(TO_CONN(conn)->global_identifier,
+                              DIRREQ_DIRECT,
+                              DIRREQ_FLUSHING_DIR_CONN_FINISHED);
+#endif
   switch (conn->_base.state) {
   switch (conn->_base.state) {
     case DIR_CONN_STATE_CLIENT_SENDING:
     case DIR_CONN_STATE_CLIENT_SENDING:
       log_debug(LD_DIR,"client finished sending command.");
       log_debug(LD_DIR,"client finished sending command.");

+ 280 - 17
src/or/geoip.c

@@ -347,7 +347,7 @@ geoip_determine_shares(time_t now)
   last_time_determined_shares = now;
   last_time_determined_shares = now;
 }
 }
 
 
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
 /** Calculate which fraction of v2 and v3 directory requests aimed at caches
 /** Calculate which fraction of v2 and v3 directory requests aimed at caches
  * have been sent to us since the last call of this function up to time
  * have been sent to us since the last call of this function up to time
  * <b>now</b>. Set *<b>v2_share_out</b> and *<b>v3_share_out</b> to the
  * <b>now</b>. Set *<b>v2_share_out</b> and *<b>v3_share_out</b> to the
@@ -390,10 +390,11 @@ geoip_note_client_seen(geoip_client_action_t action,
     if (client_history_starts > now)
     if (client_history_starts > now)
       return;
       return;
   } else {
   } else {
-#ifndef ENABLE_GEOIP_STATS
+#ifndef ENABLE_DIRREQ_STATS
     return;
     return;
 #else
 #else
-    if (options->BridgeRelay || options->BridgeAuthoritativeDir)
+    if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
+        !options->DirReqStatistics)
       return;
       return;
 #endif
 #endif
   }
   }
@@ -494,7 +495,7 @@ geoip_remove_old_clients(time_t cutoff)
     client_history_starts = cutoff;
     client_history_starts = cutoff;
 }
 }
 
 
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
 /** How many responses are we giving to clients requesting v2 network
 /** How many responses are we giving to clients requesting v2 network
  * statuses? */
  * statuses? */
 static uint32_t ns_v2_responses[GEOIP_NS_RESPONSE_NUM];
 static uint32_t ns_v2_responses[GEOIP_NS_RESPONSE_NUM];
@@ -511,8 +512,10 @@ void
 geoip_note_ns_response(geoip_client_action_t action,
 geoip_note_ns_response(geoip_client_action_t action,
                        geoip_ns_response_t response)
                        geoip_ns_response_t response)
 {
 {
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   static int arrays_initialized = 0;
   static int arrays_initialized = 0;
+  if (!get_options()->DirReqStatistics)
+    return;
   if (!arrays_initialized) {
   if (!arrays_initialized) {
     memset(ns_v2_responses, 0, sizeof(ns_v2_responses));
     memset(ns_v2_responses, 0, sizeof(ns_v2_responses));
     memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
     memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
@@ -570,6 +573,235 @@ _c_hist_compare(const void **_a, const void **_b)
     return strcmp(a->country, b->country);
     return strcmp(a->country, b->country);
 }
 }
 
 
+/** When there are incomplete directory requests at the end of a 24-hour
+ * period, consider those requests running for longer than this timeout as
+ * failed, the others as still running. */
+#define DIRREQ_TIMEOUT (10*60)
+
+/** Entry in a map from either conn->global_identifier for direct requests
+ * or a unique circuit identifier for tunneled requests to request time,
+ * response size, and completion time of a network status request. Used to
+ * measure download times of requests to derive average client
+ * bandwidths. */
+typedef struct dirreq_map_entry_t {
+  HT_ENTRY(dirreq_map_entry_t) node;
+  /** Unique identifier for this network status request; this is either the
+   * conn->global_identifier of the dir conn (direct request) or a new
+   * locally unique identifier of a circuit (tunneled request). This ID is
+   * only unique among other direct or tunneled requests, respectively. */
+  uint64_t dirreq_id;
+  unsigned int state:3; /**< State of this directory request. */
+  unsigned int type:1; /**< Is this a direct or a tunneled request? */
+  unsigned int completed:1; /**< Is this request complete? */
+  unsigned int action:2; /**< Is this a v2 or v3 request? */
+  /** When did we receive the request and started sending the response? */
+  struct timeval request_time;
+  size_t response_size; /**< What is the size of the response in bytes? */
+  struct timeval completion_time; /**< When did the request succeed? */
+} dirreq_map_entry_t;
+
+/** Map of all directory requests asking for v2 or v3 network statuses in
+ * the current geoip-stats interval. Values are
+ * of type *<b>dirreq_map_entry_t</b>. */
+static HT_HEAD(dirreqmap, dirreq_map_entry_t) dirreq_map =
+     HT_INITIALIZER();
+
+static int
+dirreq_map_ent_eq(const dirreq_map_entry_t *a,
+                  const dirreq_map_entry_t *b)
+{
+  return a->dirreq_id == b->dirreq_id && a->type == b->type;
+}
+
+static unsigned
+dirreq_map_ent_hash(const dirreq_map_entry_t *entry)
+{
+  unsigned u = (unsigned) entry->dirreq_id;
+  u += entry->type << 20;
+  return u;
+}
+
+HT_PROTOTYPE(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash,
+             dirreq_map_ent_eq);
+HT_GENERATE(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash,
+            dirreq_map_ent_eq, 0.6, malloc, realloc, free);
+
+/** Helper: Put <b>entry</b> into map of directory requests using
+ * <b>tunneled</b> and <b>dirreq_id</b> as key parts. If there is
+ * already an entry for that key, print out a BUG warning and return. */
+static void
+_dirreq_map_put(dirreq_map_entry_t *entry, dirreq_type_t type,
+               uint64_t dirreq_id)
+{
+  dirreq_map_entry_t *old_ent;
+  tor_assert(entry->type == type);
+  tor_assert(entry->dirreq_id == dirreq_id);
+
+  /* XXXX022 once we're sure the bug case never happens, we can switch
+   * to HT_INSERT */
+  old_ent = HT_REPLACE(dirreqmap, &dirreq_map, entry);
+  if (old_ent && old_ent != entry) {
+    log_warn(LD_BUG, "Error when putting directory request into local "
+             "map. There was already an entry for the same identifier.");
+    return;
+  }
+}
+
+/** Helper: Look up and return an entry in the map of directory requests
+ * using <b>tunneled</b> and <b>dirreq_id</b> as key parts. If there
+ * is no such entry, return NULL. */
+static dirreq_map_entry_t *
+_dirreq_map_get(dirreq_type_t type, uint64_t dirreq_id)
+{
+  dirreq_map_entry_t lookup;
+  lookup.type = type;
+  lookup.dirreq_id = dirreq_id;
+  return HT_FIND(dirreqmap, &dirreq_map, &lookup);
+}
+
+/** Note that an either direct or tunneled (see <b>type</b>) directory
+ * request for a network status with unique ID <b>dirreq_id</b> of size
+ * <b>response_size</b> and action <b>action</b> (either v2 or v3) has
+ * started. */
+void
+geoip_start_dirreq(uint64_t dirreq_id, size_t response_size,
+                   geoip_client_action_t action, dirreq_type_t type)
+{
+  dirreq_map_entry_t *ent;
+  if (!get_options()->DirReqStatistics)
+    return;
+  ent = tor_malloc_zero(sizeof(dirreq_map_entry_t));
+  ent->dirreq_id = dirreq_id;
+  tor_gettimeofday(&ent->request_time);
+  ent->response_size = response_size;
+  ent->action = action;
+  ent->type = type;
+  _dirreq_map_put(ent, type, dirreq_id);
+}
+
+/** Change the state of the either direct or tunneled (see <b>type</b>)
+ * directory request with <b>dirreq_id</b> to <b>new_state</b> and
+ * possibly mark it as completed. If no entry can be found for the given
+ * key parts (e.g., if this is a directory request that we are not
+ * measuring, or one that was started in the previous measurement period),
+ * or if the state cannot be advanced to <b>new_state</b>, do nothing. */
+void
+geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type,
+                          dirreq_state_t new_state)
+{
+  dirreq_map_entry_t *ent;
+  if (!get_options()->DirReqStatistics)
+    return;
+  ent = _dirreq_map_get(type, dirreq_id);
+  if (!ent)
+    return;
+  if (new_state == DIRREQ_IS_FOR_NETWORK_STATUS)
+    return;
+  if (new_state - 1 != ent->state)
+    return;
+  ent->state = new_state;
+  if ((type == DIRREQ_DIRECT &&
+         new_state == DIRREQ_FLUSHING_DIR_CONN_FINISHED) ||
+      (type == DIRREQ_TUNNELED &&
+         new_state == DIRREQ_OR_CONN_BUFFER_FLUSHED)) {
+    tor_gettimeofday(&ent->completion_time);
+    ent->completed = 1;
+  }
+}
+
+#ifdef ENABLE_DIRREQ_STATS
+/** Return a newly allocated comma-separated string containing statistics
+ * on network status downloads. The string contains the number of completed
+ * requests, timeouts, and still running requests as well as the download
+ * times by deciles and quartiles. Return NULL if we have not observed
+ * requests for long enough. */
+static char *
+geoip_get_dirreq_history(geoip_client_action_t action,
+                           dirreq_type_t type)
+{
+  char *result = NULL;
+  smartlist_t *dirreq_times = NULL;
+  uint32_t complete = 0, timeouts = 0, running = 0;
+  int i = 0, bufsize = 1024, written;
+  dirreq_map_entry_t **ptr, **next, *ent;
+  struct timeval now;
+
+  tor_gettimeofday(&now);
+  if (action != GEOIP_CLIENT_NETWORKSTATUS &&
+      action != GEOIP_CLIENT_NETWORKSTATUS_V2)
+    return NULL;
+  dirreq_times = smartlist_create();
+  for (ptr = HT_START(dirreqmap, &dirreq_map); ptr; ptr = next) {
+    ent = *ptr;
+    if (ent->action != action || ent->type != type) {
+      next = HT_NEXT(dirreqmap, &dirreq_map, ptr);
+      continue;
+    } else {
+      if (ent->completed) {
+        uint32_t *bytes_per_second = tor_malloc_zero(sizeof(uint32_t));
+        uint32_t time_diff = (uint32_t) tv_udiff(&ent->request_time,
+                                                 &ent->completion_time);
+        if (time_diff == 0)
+          time_diff = 1; /* Avoid DIV/0; "instant" answers are impossible
+                          * anyway by law of nature or something.. */
+        *bytes_per_second = 1000000 * ent->response_size / time_diff;
+        smartlist_add(dirreq_times, bytes_per_second);
+        complete++;
+      } else {
+        if (tv_udiff(&ent->request_time, &now) / 1000000 > DIRREQ_TIMEOUT)
+          timeouts++;
+        else
+          running++;
+      }
+      next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ptr);
+      tor_free(ent);
+    }
+  }
+#define DIR_REQ_GRANULARITY 4
+  complete = round_uint32_to_next_multiple_of(complete,
+                                              DIR_REQ_GRANULARITY);
+  timeouts = round_uint32_to_next_multiple_of(timeouts,
+                                              DIR_REQ_GRANULARITY);
+  running = round_uint32_to_next_multiple_of(running,
+                                             DIR_REQ_GRANULARITY);
+  result = tor_malloc_zero(bufsize);
+  written = tor_snprintf(result, bufsize, "complete=%u,timeout=%u,"
+                         "running=%u", complete, timeouts, running);
+  if (written < 0)
+    return NULL;
+#define MIN_DIR_REQ_RESPONSES 16
+  if (complete >= MIN_DIR_REQ_RESPONSES) {
+    uint32_t *dltimes = tor_malloc(sizeof(uint32_t) * complete);
+    SMARTLIST_FOREACH(dirreq_times, uint32_t *, dlt, {
+      dltimes[i++] = *dlt;
+      tor_free(dlt);
+    });
+    median_uint32(dltimes, complete); /* sort */
+    written = tor_snprintf(result + written, bufsize - written,
+                           ",min=%u,d1=%u,d2=%u,q1=%u,d3=%u,d4=%u,md=%u,"
+                           "d6=%u,d7=%u,q3=%u,d8=%u,d9=%u,max=%u",
+                           dltimes[0],
+                           dltimes[1*complete/10-1],
+                           dltimes[2*complete/10-1],
+                           dltimes[1*complete/4-1],
+                           dltimes[3*complete/10-1],
+                           dltimes[4*complete/10-1],
+                           dltimes[5*complete/10-1],
+                           dltimes[6*complete/10-1],
+                           dltimes[7*complete/10-1],
+                           dltimes[3*complete/4-1],
+                           dltimes[8*complete/10-1],
+                           dltimes[9*complete/10-1],
+                           dltimes[complete-1]);
+    tor_free(dltimes);
+  }
+  if (written < 0)
+    result = NULL;
+  smartlist_free(dirreq_times);
+  return result;
+}
+#endif
+
 /** How long do we have to have observed per-country request history before we
 /** How long do we have to have observed per-country request history before we
  * are willing to talk about it? */
  * are willing to talk about it? */
 #define GEOIP_MIN_OBSERVATION_TIME (12*60*60)
 #define GEOIP_MIN_OBSERVATION_TIME (12*60*60)
@@ -584,7 +816,7 @@ geoip_get_client_history(time_t now, geoip_client_action_t action)
 {
 {
   char *result = NULL;
   char *result = NULL;
   int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
   int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
   min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
 #endif
 #endif
   if (!geoip_is_loaded())
   if (!geoip_is_loaded())
@@ -599,7 +831,7 @@ geoip_get_client_history(time_t now, geoip_client_action_t action)
     unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
     unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
     unsigned total = 0;
     unsigned total = 0;
     unsigned granularity = IP_GRANULARITY;
     unsigned granularity = IP_GRANULARITY;
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
     granularity = DIR_RECORD_USAGE_GRANULARITY;
     granularity = DIR_RECORD_USAGE_GRANULARITY;
 #endif
 #endif
     HT_FOREACH(ent, clientmap, &client_history) {
     HT_FOREACH(ent, clientmap, &client_history) {
@@ -667,7 +899,7 @@ geoip_get_request_history(time_t now, geoip_client_action_t action)
   char *result;
   char *result;
   unsigned granularity = IP_GRANULARITY;
   unsigned granularity = IP_GRANULARITY;
   int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
   int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   granularity = DIR_RECORD_USAGE_GRANULARITY;
   granularity = DIR_RECORD_USAGE_GRANULARITY;
   min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
   min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
 #endif
 #endif
@@ -712,14 +944,14 @@ geoip_get_request_history(time_t now, geoip_client_action_t action)
   return result;
   return result;
 }
 }
 
 
-/** Store all our geoip statistics into $DATADIR/geoip-stats. */
+/** Store all our geoip statistics into $DATADIR/dirreq-stats. */
 static void
 static void
 dump_geoip_stats(void)
 dump_geoip_stats(void)
 {
 {
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   time_t now = time(NULL);
   time_t now = time(NULL);
   time_t request_start;
   time_t request_start;
-  char *filename = get_datadir_fname("geoip-stats");
+  char *filename = get_datadir_fname("dirreq-stats");
   char *data_v2 = NULL, *data_v3 = NULL;
   char *data_v2 = NULL, *data_v3 = NULL;
   char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
   char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
   open_file_t *open_file = NULL;
   open_file_t *open_file = NULL;
@@ -727,6 +959,9 @@ dump_geoip_stats(void)
   FILE *out;
   FILE *out;
   int i;
   int i;
 
 
+  if (!get_options()->DirReqStatistics)
+    goto done;
+
   data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
   data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
   data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
   data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
   format_iso_time(since, geoip_get_history_start());
   format_iso_time(since, geoip_get_history_start());
@@ -785,6 +1020,23 @@ dump_geoip_stats(void)
       goto done;
       goto done;
   }
   }
 
 
+  data_v2 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS_V2,
+                                       DIRREQ_DIRECT);
+  data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS,
+                                       DIRREQ_DIRECT);
+  if (fprintf(out, "ns-direct-dl %s\nns-v2-direct-dl %s\n",
+              data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
+    goto done;
+  tor_free(data_v2);
+  tor_free(data_v3);
+  data_v2 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS_V2,
+                                       DIRREQ_TUNNELED);
+  data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS,
+                                       DIRREQ_TUNNELED);
+  if (fprintf(out, "ns-tunneled-dl %s\nns-v2-tunneled-dl %s\n",
+              data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
+    goto done;
+
   finish_writing_to_file(open_file);
   finish_writing_to_file(open_file);
   open_file = NULL;
   open_file = NULL;
  done:
  done:
@@ -873,13 +1125,24 @@ clear_geoip_db(void)
 void
 void
 geoip_free_all(void)
 geoip_free_all(void)
 {
 {
-  clientmap_entry_t **ent, **next, *this;
-  for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) {
-    this = *ent;
-    next = HT_NEXT_RMV(clientmap, &client_history, ent);
-    tor_free(this);
+  {
+    clientmap_entry_t **ent, **next, *this;
+    for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) {
+      this = *ent;
+      next = HT_NEXT_RMV(clientmap, &client_history, ent);
+      tor_free(this);
+    }
+    HT_CLEAR(clientmap, &client_history);
+  }
+  {
+    dirreq_map_entry_t **ent, **next, *this;
+    for (ent = HT_START(dirreqmap, &dirreq_map); ent != NULL; ent = next) {
+      this = *ent;
+      next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ent);
+      tor_free(this);
+    }
+    HT_CLEAR(dirreqmap, &dirreq_map);
   }
   }
-  HT_CLEAR(clientmap, &client_history);
 
 
   clear_geoip_db();
   clear_geoip_db();
 }
 }

+ 49 - 3
src/or/or.h

@@ -20,8 +20,8 @@
 #ifndef INSTRUMENT_DOWNLOADS
 #ifndef INSTRUMENT_DOWNLOADS
 #define INSTRUMENT_DOWNLOADS 1
 #define INSTRUMENT_DOWNLOADS 1
 #endif
 #endif
-#ifndef ENABLE_GEOIP_STATS
-#define ENABLE_GEOIP_STATS 1
+#ifndef ENABLE_DIRREQ_STATS
+#define ENABLE_DIRREQ_STATS 1
 #endif
 #endif
 #ifndef ENABLE_BUFFER_STATS
 #ifndef ENABLE_BUFFER_STATS
 #define ENABLE_BUFFER_STATS 1
 #define ENABLE_BUFFER_STATS 1
@@ -970,6 +970,10 @@ typedef struct connection_t {
    * to the evdns_server_port is uses to listen to and answer connections. */
    * to the evdns_server_port is uses to listen to and answer connections. */
   struct evdns_server_port *dns_server_port;
   struct evdns_server_port *dns_server_port;
 
 
+#ifdef ENABLE_DIRREQ_STATS
+  /** Unique ID for measuring tunneled network status requests. */
+  uint64_t dirreq_id;
+#endif
 } connection_t;
 } connection_t;
 
 
 /** Stores flags and information related to the portion of a v2 Tor OR
 /** Stores flags and information related to the portion of a v2 Tor OR
@@ -1956,6 +1960,10 @@ typedef struct circuit_t {
    * linked to an OR connection. */
    * linked to an OR connection. */
   struct circuit_t *prev_active_on_n_conn;
   struct circuit_t *prev_active_on_n_conn;
   struct circuit_t *next; /**< Next circuit in linked list of all circuits. */
   struct circuit_t *next; /**< Next circuit in linked list of all circuits. */
+#ifdef ENABLE_DIRREQ_STATS
+  /** Unique ID for measuring tunneled network status requests. */
+  uint64_t dirreq_id;
+#endif
 } circuit_t;
 } circuit_t;
 
 
 /** Largest number of relay_early cells that we can send on a given
 /** Largest number of relay_early cells that we can send on a given
@@ -2492,6 +2500,10 @@ typedef struct {
    * exit allows it, we use it. */
    * exit allows it, we use it. */
   int AllowSingleHopCircuits;
   int AllowSingleHopCircuits;
 
 
+  /** If true, the user wants us to collect statistics on clients
+   * requesting network statuses from us as directory. */
+  int DirReqStatistics;
+
   /** If true, the user wants us to collect statistics on port usage. */
   /** If true, the user wants us to collect statistics on port usage. */
   int ExitPortStatistics;
   int ExitPortStatistics;
 
 
@@ -2556,7 +2568,7 @@ typedef struct {
   int BridgeRecordUsageByCountry;
   int BridgeRecordUsageByCountry;
 
 
 #if 0
 #if 0
-  /** If true, and Tor is built with GEOIP_STATS support, and we're a
+  /** If true, and Tor is built with DIRREQ_STATS support, and we're a
    * directory, record how many directory requests we get from each country. */
    * directory, record how many directory requests we get from each country. */
   int DirRecordUsageByCountry;
   int DirRecordUsageByCountry;
   /** Round all GeoIP results to the next multiple of this value, to avoid
   /** Round all GeoIP results to the next multiple of this value, to avoid
@@ -3672,6 +3684,40 @@ int getinfo_helper_geoip(control_connection_t *control_conn,
                          const char *question, char **answer);
                          const char *question, char **answer);
 void geoip_free_all(void);
 void geoip_free_all(void);
 
 
+/** Directory requests that we are measuring can be either direct or
+ * tunneled. */
+typedef enum {
+  DIRREQ_DIRECT = 0,
+  DIRREQ_TUNNELED = 1,
+} dirreq_type_t;
+
+/** Possible states for either direct or tunneled directory requests that
+ * are relevant for determining network status download times. */
+typedef enum {
+  /** Found that the client requests a network status; applies to both
+   * direct and tunneled requests; initial state of a request that we are
+   * measuring. */
+  DIRREQ_IS_FOR_NETWORK_STATUS = 0,
+  /** Finished writing a network status to the directory connection;
+   * applies to both direct and tunneled requests; completes a direct
+   * request. */
+  DIRREQ_FLUSHING_DIR_CONN_FINISHED = 1,
+  /** END cell sent to circuit that initiated a tunneled request. */
+  DIRREQ_END_CELL_SENT = 2,
+  /** Flushed last cell from queue of the circuit that initiated a
+    * tunneled request to the outbuf of the OR connection. */
+  DIRREQ_CIRC_QUEUE_FLUSHED = 3,
+  /** Flushed last byte from buffer of the OR connection belonging to the
+    * circuit that initiated a tunneled request; completes a tunneled
+    * request. */
+  DIRREQ_OR_CONN_BUFFER_FLUSHED = 4
+} dirreq_state_t;
+
+void geoip_start_dirreq(uint64_t dirreq_id, size_t response_size,
+                        geoip_client_action_t action, dirreq_type_t type);
+void geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type,
+                               dirreq_state_t new_state);
+
 /********************************* hibernate.c **********************/
 /********************************* hibernate.c **********************/
 
 
 int accounting_parse_options(or_options_t *options, int validate_only);
 int accounting_parse_options(or_options_t *options, int validate_only);

+ 29 - 0
src/or/relay.c

@@ -532,6 +532,14 @@ relay_send_command_from_edge(uint16_t stream_id, circuit_t *circ,
   log_debug(LD_OR,"delivering %d cell %s.", relay_command,
   log_debug(LD_OR,"delivering %d cell %s.", relay_command,
             cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward");
             cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward");
 
 
+#ifdef ENABLE_DIRREQ_STATS
+  /* If we are sending an END cell and this circuit is used for a tunneled
+   * directory request, advance its state. */
+  if (relay_command == RELAY_COMMAND_END && circ->dirreq_id)
+    geoip_change_dirreq_state(circ->dirreq_id, DIRREQ_TUNNELED,
+                              DIRREQ_END_CELL_SENT);
+#endif
+
   if (cell_direction == CELL_DIRECTION_OUT && circ->n_conn) {
   if (cell_direction == CELL_DIRECTION_OUT && circ->n_conn) {
     /* if we're using relaybandwidthrate, this conn wants priority */
     /* if we're using relaybandwidthrate, this conn wants priority */
     circ->n_conn->client_used = approx_time();
     circ->n_conn->client_used = approx_time();
@@ -1032,6 +1040,18 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
                "Begin cell for known stream. Dropping.");
                "Begin cell for known stream. Dropping.");
         return 0;
         return 0;
       }
       }
+#ifdef ENABLE_DIRREQ_STATS
+      if (rh.command == RELAY_COMMAND_BEGIN_DIR) {
+        /* Assign this circuit and its app-ward OR connection a unique ID,
+         * so that we can measure download times. The local edge and dir
+         * connection will be assigned the same ID when they are created
+         * and linked. */
+        static uint64_t next_id = 0;
+        circ->dirreq_id = ++next_id;
+        TO_CONN(TO_OR_CIRCUIT(circ)->p_conn)->dirreq_id = circ->dirreq_id;
+      }
+#endif
+
       return connection_exit_begin_conn(cell, circ);
       return connection_exit_begin_conn(cell, circ);
     case RELAY_COMMAND_DATA:
     case RELAY_COMMAND_DATA:
       ++stats_n_data_cells_received;
       ++stats_n_data_cells_received;
@@ -1821,6 +1841,15 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max,
       orcirc->processed_cells++;
       orcirc->processed_cells++;
     }
     }
 #endif
 #endif
+#ifdef ENABLE_DIRREQ_STATS
+    /* If we just flushed our queue and this circuit is used for a
+     * tunneled directory request, possibly advance its state. */
+    if (queue->n == 0 && TO_CONN(conn)->dirreq_id)
+      geoip_change_dirreq_state(TO_CONN(conn)->dirreq_id,
+                                DIRREQ_TUNNELED,
+                                DIRREQ_CIRC_QUEUE_FLUSHED);
+#endif
+
     connection_write_to_buf(cell->body, CELL_NETWORK_SIZE, TO_CONN(conn));
     connection_write_to_buf(cell->body, CELL_NETWORK_SIZE, TO_CONN(conn));
 
 
     packed_cell_free(cell);
     packed_cell_free(cell);

+ 1 - 1
src/or/router.c

@@ -1916,7 +1916,7 @@ extrainfo_get_client_geoip_summary(time_t now)
 {
 {
   static time_t last_purged_at = 0;
   static time_t last_purged_at = 0;
   int geoip_purge_interval = 48*60*60;
   int geoip_purge_interval = 48*60*60;
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   geoip_purge_interval = DIR_RECORD_USAGE_RETAIN_IPS;
   geoip_purge_interval = DIR_RECORD_USAGE_RETAIN_IPS;
 #endif
 #endif
 #ifdef ENABLE_ENTRY_STATS
 #ifdef ENABLE_ENTRY_STATS