Procházet zdrojové kódy

Merge branch 'dirreq-timing'

Nick Mathewson před 15 roky
rodič
revize
022d251cb7
10 změnil soubory, kde provedl 435 přidání a 42 odebrání
  1. 8 5
      ChangeLog
  2. 4 4
      configure.in
  3. 24 10
      src/or/config.c
  4. 7 0
      src/or/connection.c
  5. 10 0
      src/or/connection_edge.c
  6. 23 2
      src/or/directory.c
  7. 280 17
      src/or/geoip.c
  8. 49 3
      src/or/or.h
  9. 29 0
      src/or/relay.c
  10. 1 1
      src/or/router.c

+ 8 - 5
ChangeLog

@@ -15,11 +15,14 @@ Changes in version 0.2.2.1-alpha - 2009-??-??
     - The memarea code now uses a sentinel value at the end of each area
       to make sure nothing writes beyond the end of an area.  This might
       help debug some conceivable causes of bug 930.
-    - Directories that are configured with the --enable-geoip-stats flag
-      now write their GeoIP stats to disk exactly every 24 hours.
-      Estimated shares of v2 and v3 requests are determined as averages,
-      not at the end of a measurement period. Also, unresolved requests
-      are listed with country code '??'.
+    - Directories that are configured with the --enable-dirreq-stats flag
+      and have "DirReqStatistics 1" set write directory request stats to
+      disk every 24 hours. As compared to the --enable-geoip-stats flag
+      in 0.2.1.x, there are a few improvements: 1) stats are written to
+      disk exactly every 24 hours; 2) estimated shares of v2 and v3
+      requests are determined as mean values, not at the end of a
+      measurement period; 3) unresolved requests are listed with country
+      code '??'; 4) directories also measure download times.
     - Exit nodes can write statistics on the number of exit streams and
       transferred bytes per port to disk every 24 hours.  To enable this,
       run configure with the --enable-exit-stats option, and set

+ 4 - 4
configure.in

@@ -92,11 +92,11 @@ if test "$enable_exit_stats" = "yes"; then
   AC_DEFINE(ENABLE_EXIT_STATS, 1, [Defined if we try to collect per-port statistics on exits])
 fi
 
-AC_ARG_ENABLE(geoip-stats,
-     AS_HELP_STRING(--enable-geoip-stats, enable code for directories to collect per-country statistics))
+AC_ARG_ENABLE(dirreq-stats,
+     AS_HELP_STRING(--enable-dirreq-stats, enable code for directories to collect per-country statistics))
 
-if test "$enable_geoip_stats" = "yes"; then
-  AC_DEFINE(ENABLE_GEOIP_STATS, 1, [Defined if we try to collect per-country statistics])
+if test "$enable_dirreq_stats" = "yes"; then
+  AC_DEFINE(ENABLE_DIRREQ_STATS, 1, [Defined if we try to collect per-country statistics])
 fi
 
 AC_ARG_ENABLE(buffer-stats,

+ 24 - 10
src/or/config.c

@@ -187,12 +187,13 @@ static config_var_t _option_vars[] = {
   V(DirPort,                     UINT,     "0"),
   V(DirPortFrontPage,            FILENAME, NULL),
   OBSOLETE("DirPostPeriod"),
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   OBSOLETE("DirRecordUsageByCountry"),
   OBSOLETE("DirRecordUsageGranularity"),
   OBSOLETE("DirRecordUsageRetainIPs"),
   OBSOLETE("DirRecordUsageSaveInterval"),
 #endif
+  V(DirReqStatistics,            BOOL,     "0"),
   VAR("DirServer",               LINELIST, DirServers, NULL),
   V(DNSPort,                     UINT,     "0"),
   V(DNSListenAddress,            LINELIST, NULL),
@@ -1376,17 +1377,25 @@ options_act(or_options_t *old_options)
     geoip_load_file(actual_fname, options);
     tor_free(actual_fname);
   }
-#ifdef ENABLE_GEOIP_STATS
-  /* Check if GeoIP database could be loaded. */
-  if (!geoip_is_loaded()) {
-    log_warn(LD_CONFIG, "Configured to measure GeoIP statistics, but no "
-                        "GeoIP database found!");
-    return -1;
+
+#ifdef ENABLE_DIRREQ_STATS
+  if (options->DirReqStatistics) {
+    /* Check if GeoIP database could be loaded. */
+    if (!geoip_is_loaded()) {
+      log_warn(LD_CONFIG, "Configured to measure directory request "
+               "statistics, but no GeoIP database found!");
+      return -1;
+    }
+    log_notice(LD_CONFIG, "Configured to count directory requests by "
+               "country and write aggregate statistics to disk. Check the "
+               "dirreq-stats file in your data directory that will first "
+               "be written in 24 hours from now.");
   }
-  log_notice(LD_CONFIG, "Configured to measure usage by country and "
-    "write aggregate statistics to disk. Check the geoip-stats file "
-    "in your data directory once I've been running for 24 hours.");
+#else
+  log_warn(LD_CONFIG, "DirReqStatistics enabled, but Tor was built "
+           "without support for directory request statistics.");
 #endif
+
 #ifdef ENABLE_EXIT_STATS
   if (options->ExitPortStatistics)
     log_notice(LD_CONFIG, "Configured to measure exit port statistics. "
@@ -1417,6 +1426,11 @@ options_act(or_options_t *old_options)
       log_warn(LD_CONFIG, "Bridges cannot be configured to measure "
                "additional GeoIP statistics as entry guards.");
       return -1;
+    } else if (!geoip_is_loaded()) {
+      /* Check if GeoIP database could be loaded. */
+      log_warn(LD_CONFIG, "Configured to measure entry node statistics, "
+               "but no GeoIP database found!");
+      return -1;
     } else
       log_notice(LD_CONFIG, "Configured to measure entry node "
                  "statistics. Look for the entry-stats file that will "

+ 7 - 0
src/or/connection.c

@@ -2302,6 +2302,13 @@ connection_handle_write(connection_t *conn, int force)
     /* else open, or closing */
     result = flush_buf_tls(or_conn->tls, conn->outbuf,
                            max_to_write, &conn->outbuf_flushlen);
+#ifdef ENABLE_DIRREQ_STATS
+    /* If we just flushed the last bytes, check if this tunneled dir
+     * request is done. */
+    if (buf_datalen(conn->outbuf) == 0 && conn->dirreq_id)
+      geoip_change_dirreq_state(conn->dirreq_id, DIRREQ_TUNNELED,
+                                DIRREQ_OR_CONN_BUFFER_FLUSHED);
+#endif
     switch (result) {
       CASE_TOR_TLS_ERROR_ANY:
       case TOR_TLS_CLOSE:

+ 10 - 0
src/or/connection_edge.c

@@ -2551,6 +2551,11 @@ connection_exit_begin_conn(cell_t *cell, circuit_t *circ)
 
   log_debug(LD_EXIT,"Creating new exit connection.");
   n_stream = edge_connection_new(CONN_TYPE_EXIT, AF_INET);
+#ifdef ENABLE_DIRREQ_STATS
+  /* Remember the tunneled request ID in the new edge connection, so that
+   * we can measure download times. */
+  TO_CONN(n_stream)->dirreq_id = circ->dirreq_id;
+#endif
   n_stream->_base.purpose = EXIT_PURPOSE_CONNECT;
 
   n_stream->stream_id = rh.stream_id;
@@ -2787,6 +2792,11 @@ connection_exit_connect_dir(edge_connection_t *exitconn)
   dirconn->_base.purpose = DIR_PURPOSE_SERVER;
   dirconn->_base.state = DIR_CONN_STATE_SERVER_COMMAND_WAIT;
 
+#ifdef ENABLE_DIRREQ_STATS
+  /* Note that the new dir conn belongs to the same tunneled request as
+   * the edge conn, so that we can measure download times. */
+  TO_CONN(dirconn)->dirreq_id = TO_CONN(exitconn)->dirreq_id;
+#endif
   connection_link_connections(TO_CONN(dirconn), TO_CONN(exitconn));
 
   if (connection_add(TO_CONN(exitconn))<0) {

+ 23 - 2
src/or/directory.c

@@ -2562,12 +2562,21 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
       goto done;
     }
 
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
     {
       struct in_addr in;
-      if (tor_inet_aton((TO_CONN(conn))->address, &in))
+      if (tor_inet_aton((TO_CONN(conn))->address, &in)) {
         geoip_note_client_seen(act, ntohl(in.s_addr), time(NULL));
         geoip_note_ns_response(act, GEOIP_SUCCESS);
+        /* Note that a request for a network status has started, so that we
+         * can measure the download time later on. */
+        if (TO_CONN(conn)->dirreq_id)
+          geoip_start_dirreq(TO_CONN(conn)->dirreq_id, dlen, act,
+                             DIRREQ_TUNNELED);
+        else
+          geoip_start_dirreq(TO_CONN(conn)->global_identifier, dlen, act,
+                             DIRREQ_DIRECT);
+      }
     }
 #endif
 
@@ -3201,6 +3210,18 @@ connection_dir_finished_flushing(dir_connection_t *conn)
   tor_assert(conn);
   tor_assert(conn->_base.type == CONN_TYPE_DIR);
 
+#ifdef ENABLE_DIRREQ_STATS
+  /* Note that we have finished writing the directory response. For direct
+   * connections this means we're done, for tunneled connections its only
+   * an intermediate step. */
+  if (TO_CONN(conn)->dirreq_id)
+    geoip_change_dirreq_state(TO_CONN(conn)->dirreq_id, DIRREQ_TUNNELED,
+                              DIRREQ_FLUSHING_DIR_CONN_FINISHED);
+  else
+    geoip_change_dirreq_state(TO_CONN(conn)->global_identifier,
+                              DIRREQ_DIRECT,
+                              DIRREQ_FLUSHING_DIR_CONN_FINISHED);
+#endif
   switch (conn->_base.state) {
     case DIR_CONN_STATE_CLIENT_SENDING:
       log_debug(LD_DIR,"client finished sending command.");

+ 280 - 17
src/or/geoip.c

@@ -347,7 +347,7 @@ geoip_determine_shares(time_t now)
   last_time_determined_shares = now;
 }
 
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
 /** Calculate which fraction of v2 and v3 directory requests aimed at caches
  * have been sent to us since the last call of this function up to time
  * <b>now</b>. Set *<b>v2_share_out</b> and *<b>v3_share_out</b> to the
@@ -390,10 +390,11 @@ geoip_note_client_seen(geoip_client_action_t action,
     if (client_history_starts > now)
       return;
   } else {
-#ifndef ENABLE_GEOIP_STATS
+#ifndef ENABLE_DIRREQ_STATS
     return;
 #else
-    if (options->BridgeRelay || options->BridgeAuthoritativeDir)
+    if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
+        !options->DirReqStatistics)
       return;
 #endif
   }
@@ -494,7 +495,7 @@ geoip_remove_old_clients(time_t cutoff)
     client_history_starts = cutoff;
 }
 
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
 /** How many responses are we giving to clients requesting v2 network
  * statuses? */
 static uint32_t ns_v2_responses[GEOIP_NS_RESPONSE_NUM];
@@ -511,8 +512,10 @@ void
 geoip_note_ns_response(geoip_client_action_t action,
                        geoip_ns_response_t response)
 {
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   static int arrays_initialized = 0;
+  if (!get_options()->DirReqStatistics)
+    return;
   if (!arrays_initialized) {
     memset(ns_v2_responses, 0, sizeof(ns_v2_responses));
     memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
@@ -570,6 +573,235 @@ _c_hist_compare(const void **_a, const void **_b)
     return strcmp(a->country, b->country);
 }
 
+/** When there are incomplete directory requests at the end of a 24-hour
+ * period, consider those requests running for longer than this timeout as
+ * failed, the others as still running. */
+#define DIRREQ_TIMEOUT (10*60)
+
+/** Entry in a map from either conn->global_identifier for direct requests
+ * or a unique circuit identifier for tunneled requests to request time,
+ * response size, and completion time of a network status request. Used to
+ * measure download times of requests to derive average client
+ * bandwidths. */
+typedef struct dirreq_map_entry_t {
+  HT_ENTRY(dirreq_map_entry_t) node;
+  /** Unique identifier for this network status request; this is either the
+   * conn->global_identifier of the dir conn (direct request) or a new
+   * locally unique identifier of a circuit (tunneled request). This ID is
+   * only unique among other direct or tunneled requests, respectively. */
+  uint64_t dirreq_id;
+  unsigned int state:3; /**< State of this directory request. */
+  unsigned int type:1; /**< Is this a direct or a tunneled request? */
+  unsigned int completed:1; /**< Is this request complete? */
+  unsigned int action:2; /**< Is this a v2 or v3 request? */
+  /** When did we receive the request and started sending the response? */
+  struct timeval request_time;
+  size_t response_size; /**< What is the size of the response in bytes? */
+  struct timeval completion_time; /**< When did the request succeed? */
+} dirreq_map_entry_t;
+
+/** Map of all directory requests asking for v2 or v3 network statuses in
+ * the current geoip-stats interval. Values are
+ * of type *<b>dirreq_map_entry_t</b>. */
+static HT_HEAD(dirreqmap, dirreq_map_entry_t) dirreq_map =
+     HT_INITIALIZER();
+
+static int
+dirreq_map_ent_eq(const dirreq_map_entry_t *a,
+                  const dirreq_map_entry_t *b)
+{
+  return a->dirreq_id == b->dirreq_id && a->type == b->type;
+}
+
+static unsigned
+dirreq_map_ent_hash(const dirreq_map_entry_t *entry)
+{
+  unsigned u = (unsigned) entry->dirreq_id;
+  u += entry->type << 20;
+  return u;
+}
+
+HT_PROTOTYPE(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash,
+             dirreq_map_ent_eq);
+HT_GENERATE(dirreqmap, dirreq_map_entry_t, node, dirreq_map_ent_hash,
+            dirreq_map_ent_eq, 0.6, malloc, realloc, free);
+
+/** Helper: Put <b>entry</b> into map of directory requests using
+ * <b>tunneled</b> and <b>dirreq_id</b> as key parts. If there is
+ * already an entry for that key, print out a BUG warning and return. */
+static void
+_dirreq_map_put(dirreq_map_entry_t *entry, dirreq_type_t type,
+               uint64_t dirreq_id)
+{
+  dirreq_map_entry_t *old_ent;
+  tor_assert(entry->type == type);
+  tor_assert(entry->dirreq_id == dirreq_id);
+
+  /* XXXX022 once we're sure the bug case never happens, we can switch
+   * to HT_INSERT */
+  old_ent = HT_REPLACE(dirreqmap, &dirreq_map, entry);
+  if (old_ent && old_ent != entry) {
+    log_warn(LD_BUG, "Error when putting directory request into local "
+             "map. There was already an entry for the same identifier.");
+    return;
+  }
+}
+
+/** Helper: Look up and return an entry in the map of directory requests
+ * using <b>tunneled</b> and <b>dirreq_id</b> as key parts. If there
+ * is no such entry, return NULL. */
+static dirreq_map_entry_t *
+_dirreq_map_get(dirreq_type_t type, uint64_t dirreq_id)
+{
+  dirreq_map_entry_t lookup;
+  lookup.type = type;
+  lookup.dirreq_id = dirreq_id;
+  return HT_FIND(dirreqmap, &dirreq_map, &lookup);
+}
+
+/** Note that an either direct or tunneled (see <b>type</b>) directory
+ * request for a network status with unique ID <b>dirreq_id</b> of size
+ * <b>response_size</b> and action <b>action</b> (either v2 or v3) has
+ * started. */
+void
+geoip_start_dirreq(uint64_t dirreq_id, size_t response_size,
+                   geoip_client_action_t action, dirreq_type_t type)
+{
+  dirreq_map_entry_t *ent;
+  if (!get_options()->DirReqStatistics)
+    return;
+  ent = tor_malloc_zero(sizeof(dirreq_map_entry_t));
+  ent->dirreq_id = dirreq_id;
+  tor_gettimeofday(&ent->request_time);
+  ent->response_size = response_size;
+  ent->action = action;
+  ent->type = type;
+  _dirreq_map_put(ent, type, dirreq_id);
+}
+
+/** Change the state of the either direct or tunneled (see <b>type</b>)
+ * directory request with <b>dirreq_id</b> to <b>new_state</b> and
+ * possibly mark it as completed. If no entry can be found for the given
+ * key parts (e.g., if this is a directory request that we are not
+ * measuring, or one that was started in the previous measurement period),
+ * or if the state cannot be advanced to <b>new_state</b>, do nothing. */
+void
+geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type,
+                          dirreq_state_t new_state)
+{
+  dirreq_map_entry_t *ent;
+  if (!get_options()->DirReqStatistics)
+    return;
+  ent = _dirreq_map_get(type, dirreq_id);
+  if (!ent)
+    return;
+  if (new_state == DIRREQ_IS_FOR_NETWORK_STATUS)
+    return;
+  if (new_state - 1 != ent->state)
+    return;
+  ent->state = new_state;
+  if ((type == DIRREQ_DIRECT &&
+         new_state == DIRREQ_FLUSHING_DIR_CONN_FINISHED) ||
+      (type == DIRREQ_TUNNELED &&
+         new_state == DIRREQ_OR_CONN_BUFFER_FLUSHED)) {
+    tor_gettimeofday(&ent->completion_time);
+    ent->completed = 1;
+  }
+}
+
+#ifdef ENABLE_DIRREQ_STATS
+/** Return a newly allocated comma-separated string containing statistics
+ * on network status downloads. The string contains the number of completed
+ * requests, timeouts, and still running requests as well as the download
+ * times by deciles and quartiles. Return NULL if we have not observed
+ * requests for long enough. */
+static char *
+geoip_get_dirreq_history(geoip_client_action_t action,
+                           dirreq_type_t type)
+{
+  char *result = NULL;
+  smartlist_t *dirreq_times = NULL;
+  uint32_t complete = 0, timeouts = 0, running = 0;
+  int i = 0, bufsize = 1024, written;
+  dirreq_map_entry_t **ptr, **next, *ent;
+  struct timeval now;
+
+  tor_gettimeofday(&now);
+  if (action != GEOIP_CLIENT_NETWORKSTATUS &&
+      action != GEOIP_CLIENT_NETWORKSTATUS_V2)
+    return NULL;
+  dirreq_times = smartlist_create();
+  for (ptr = HT_START(dirreqmap, &dirreq_map); ptr; ptr = next) {
+    ent = *ptr;
+    if (ent->action != action || ent->type != type) {
+      next = HT_NEXT(dirreqmap, &dirreq_map, ptr);
+      continue;
+    } else {
+      if (ent->completed) {
+        uint32_t *bytes_per_second = tor_malloc_zero(sizeof(uint32_t));
+        uint32_t time_diff = (uint32_t) tv_udiff(&ent->request_time,
+                                                 &ent->completion_time);
+        if (time_diff == 0)
+          time_diff = 1; /* Avoid DIV/0; "instant" answers are impossible
+                          * anyway by law of nature or something.. */
+        *bytes_per_second = 1000000 * ent->response_size / time_diff;
+        smartlist_add(dirreq_times, bytes_per_second);
+        complete++;
+      } else {
+        if (tv_udiff(&ent->request_time, &now) / 1000000 > DIRREQ_TIMEOUT)
+          timeouts++;
+        else
+          running++;
+      }
+      next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ptr);
+      tor_free(ent);
+    }
+  }
+#define DIR_REQ_GRANULARITY 4
+  complete = round_uint32_to_next_multiple_of(complete,
+                                              DIR_REQ_GRANULARITY);
+  timeouts = round_uint32_to_next_multiple_of(timeouts,
+                                              DIR_REQ_GRANULARITY);
+  running = round_uint32_to_next_multiple_of(running,
+                                             DIR_REQ_GRANULARITY);
+  result = tor_malloc_zero(bufsize);
+  written = tor_snprintf(result, bufsize, "complete=%u,timeout=%u,"
+                         "running=%u", complete, timeouts, running);
+  if (written < 0)
+    return NULL;
+#define MIN_DIR_REQ_RESPONSES 16
+  if (complete >= MIN_DIR_REQ_RESPONSES) {
+    uint32_t *dltimes = tor_malloc(sizeof(uint32_t) * complete);
+    SMARTLIST_FOREACH(dirreq_times, uint32_t *, dlt, {
+      dltimes[i++] = *dlt;
+      tor_free(dlt);
+    });
+    median_uint32(dltimes, complete); /* sort */
+    written = tor_snprintf(result + written, bufsize - written,
+                           ",min=%u,d1=%u,d2=%u,q1=%u,d3=%u,d4=%u,md=%u,"
+                           "d6=%u,d7=%u,q3=%u,d8=%u,d9=%u,max=%u",
+                           dltimes[0],
+                           dltimes[1*complete/10-1],
+                           dltimes[2*complete/10-1],
+                           dltimes[1*complete/4-1],
+                           dltimes[3*complete/10-1],
+                           dltimes[4*complete/10-1],
+                           dltimes[5*complete/10-1],
+                           dltimes[6*complete/10-1],
+                           dltimes[7*complete/10-1],
+                           dltimes[3*complete/4-1],
+                           dltimes[8*complete/10-1],
+                           dltimes[9*complete/10-1],
+                           dltimes[complete-1]);
+    tor_free(dltimes);
+  }
+  if (written < 0)
+    result = NULL;
+  smartlist_free(dirreq_times);
+  return result;
+}
+#endif
+
 /** How long do we have to have observed per-country request history before we
  * are willing to talk about it? */
 #define GEOIP_MIN_OBSERVATION_TIME (12*60*60)
@@ -584,7 +816,7 @@ geoip_get_client_history(time_t now, geoip_client_action_t action)
 {
   char *result = NULL;
   int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
 #endif
   if (!geoip_is_loaded())
@@ -599,7 +831,7 @@ geoip_get_client_history(time_t now, geoip_client_action_t action)
     unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
     unsigned total = 0;
     unsigned granularity = IP_GRANULARITY;
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
     granularity = DIR_RECORD_USAGE_GRANULARITY;
 #endif
     HT_FOREACH(ent, clientmap, &client_history) {
@@ -667,7 +899,7 @@ geoip_get_request_history(time_t now, geoip_client_action_t action)
   char *result;
   unsigned granularity = IP_GRANULARITY;
   int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   granularity = DIR_RECORD_USAGE_GRANULARITY;
   min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
 #endif
@@ -712,14 +944,14 @@ geoip_get_request_history(time_t now, geoip_client_action_t action)
   return result;
 }
 
-/** Store all our geoip statistics into $DATADIR/geoip-stats. */
+/** Store all our geoip statistics into $DATADIR/dirreq-stats. */
 static void
 dump_geoip_stats(void)
 {
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   time_t now = time(NULL);
   time_t request_start;
-  char *filename = get_datadir_fname("geoip-stats");
+  char *filename = get_datadir_fname("dirreq-stats");
   char *data_v2 = NULL, *data_v3 = NULL;
   char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
   open_file_t *open_file = NULL;
@@ -727,6 +959,9 @@ dump_geoip_stats(void)
   FILE *out;
   int i;
 
+  if (!get_options()->DirReqStatistics)
+    goto done;
+
   data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
   data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
   format_iso_time(since, geoip_get_history_start());
@@ -785,6 +1020,23 @@ dump_geoip_stats(void)
       goto done;
   }
 
+  data_v2 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS_V2,
+                                       DIRREQ_DIRECT);
+  data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS,
+                                       DIRREQ_DIRECT);
+  if (fprintf(out, "ns-direct-dl %s\nns-v2-direct-dl %s\n",
+              data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
+    goto done;
+  tor_free(data_v2);
+  tor_free(data_v3);
+  data_v2 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS_V2,
+                                       DIRREQ_TUNNELED);
+  data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS,
+                                       DIRREQ_TUNNELED);
+  if (fprintf(out, "ns-tunneled-dl %s\nns-v2-tunneled-dl %s\n",
+              data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
+    goto done;
+
   finish_writing_to_file(open_file);
   open_file = NULL;
  done:
@@ -873,13 +1125,24 @@ clear_geoip_db(void)
 void
 geoip_free_all(void)
 {
-  clientmap_entry_t **ent, **next, *this;
-  for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) {
-    this = *ent;
-    next = HT_NEXT_RMV(clientmap, &client_history, ent);
-    tor_free(this);
+  {
+    clientmap_entry_t **ent, **next, *this;
+    for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) {
+      this = *ent;
+      next = HT_NEXT_RMV(clientmap, &client_history, ent);
+      tor_free(this);
+    }
+    HT_CLEAR(clientmap, &client_history);
+  }
+  {
+    dirreq_map_entry_t **ent, **next, *this;
+    for (ent = HT_START(dirreqmap, &dirreq_map); ent != NULL; ent = next) {
+      this = *ent;
+      next = HT_NEXT_RMV(dirreqmap, &dirreq_map, ent);
+      tor_free(this);
+    }
+    HT_CLEAR(dirreqmap, &dirreq_map);
   }
-  HT_CLEAR(clientmap, &client_history);
 
   clear_geoip_db();
 }

+ 49 - 3
src/or/or.h

@@ -20,8 +20,8 @@
 #ifndef INSTRUMENT_DOWNLOADS
 #define INSTRUMENT_DOWNLOADS 1
 #endif
-#ifndef ENABLE_GEOIP_STATS
-#define ENABLE_GEOIP_STATS 1
+#ifndef ENABLE_DIRREQ_STATS
+#define ENABLE_DIRREQ_STATS 1
 #endif
 #ifndef ENABLE_BUFFER_STATS
 #define ENABLE_BUFFER_STATS 1
@@ -970,6 +970,10 @@ typedef struct connection_t {
    * to the evdns_server_port is uses to listen to and answer connections. */
   struct evdns_server_port *dns_server_port;
 
+#ifdef ENABLE_DIRREQ_STATS
+  /** Unique ID for measuring tunneled network status requests. */
+  uint64_t dirreq_id;
+#endif
 } connection_t;
 
 /** Stores flags and information related to the portion of a v2 Tor OR
@@ -1956,6 +1960,10 @@ typedef struct circuit_t {
    * linked to an OR connection. */
   struct circuit_t *prev_active_on_n_conn;
   struct circuit_t *next; /**< Next circuit in linked list of all circuits. */
+#ifdef ENABLE_DIRREQ_STATS
+  /** Unique ID for measuring tunneled network status requests. */
+  uint64_t dirreq_id;
+#endif
 } circuit_t;
 
 /** Largest number of relay_early cells that we can send on a given
@@ -2492,6 +2500,10 @@ typedef struct {
    * exit allows it, we use it. */
   int AllowSingleHopCircuits;
 
+  /** If true, the user wants us to collect statistics on clients
+   * requesting network statuses from us as directory. */
+  int DirReqStatistics;
+
   /** If true, the user wants us to collect statistics on port usage. */
   int ExitPortStatistics;
 
@@ -2556,7 +2568,7 @@ typedef struct {
   int BridgeRecordUsageByCountry;
 
 #if 0
-  /** If true, and Tor is built with GEOIP_STATS support, and we're a
+  /** If true, and Tor is built with DIRREQ_STATS support, and we're a
    * directory, record how many directory requests we get from each country. */
   int DirRecordUsageByCountry;
   /** Round all GeoIP results to the next multiple of this value, to avoid
@@ -3672,6 +3684,40 @@ int getinfo_helper_geoip(control_connection_t *control_conn,
                          const char *question, char **answer);
 void geoip_free_all(void);
 
+/** Directory requests that we are measuring can be either direct or
+ * tunneled. */
+typedef enum {
+  DIRREQ_DIRECT = 0,
+  DIRREQ_TUNNELED = 1,
+} dirreq_type_t;
+
+/** Possible states for either direct or tunneled directory requests that
+ * are relevant for determining network status download times. */
+typedef enum {
+  /** Found that the client requests a network status; applies to both
+   * direct and tunneled requests; initial state of a request that we are
+   * measuring. */
+  DIRREQ_IS_FOR_NETWORK_STATUS = 0,
+  /** Finished writing a network status to the directory connection;
+   * applies to both direct and tunneled requests; completes a direct
+   * request. */
+  DIRREQ_FLUSHING_DIR_CONN_FINISHED = 1,
+  /** END cell sent to circuit that initiated a tunneled request. */
+  DIRREQ_END_CELL_SENT = 2,
+  /** Flushed last cell from queue of the circuit that initiated a
+    * tunneled request to the outbuf of the OR connection. */
+  DIRREQ_CIRC_QUEUE_FLUSHED = 3,
+  /** Flushed last byte from buffer of the OR connection belonging to the
+    * circuit that initiated a tunneled request; completes a tunneled
+    * request. */
+  DIRREQ_OR_CONN_BUFFER_FLUSHED = 4
+} dirreq_state_t;
+
+void geoip_start_dirreq(uint64_t dirreq_id, size_t response_size,
+                        geoip_client_action_t action, dirreq_type_t type);
+void geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type,
+                               dirreq_state_t new_state);
+
 /********************************* hibernate.c **********************/
 
 int accounting_parse_options(or_options_t *options, int validate_only);

+ 29 - 0
src/or/relay.c

@@ -532,6 +532,14 @@ relay_send_command_from_edge(uint16_t stream_id, circuit_t *circ,
   log_debug(LD_OR,"delivering %d cell %s.", relay_command,
             cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward");
 
+#ifdef ENABLE_DIRREQ_STATS
+  /* If we are sending an END cell and this circuit is used for a tunneled
+   * directory request, advance its state. */
+  if (relay_command == RELAY_COMMAND_END && circ->dirreq_id)
+    geoip_change_dirreq_state(circ->dirreq_id, DIRREQ_TUNNELED,
+                              DIRREQ_END_CELL_SENT);
+#endif
+
   if (cell_direction == CELL_DIRECTION_OUT && circ->n_conn) {
     /* if we're using relaybandwidthrate, this conn wants priority */
     circ->n_conn->client_used = approx_time();
@@ -1032,6 +1040,18 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
                "Begin cell for known stream. Dropping.");
         return 0;
       }
+#ifdef ENABLE_DIRREQ_STATS
+      if (rh.command == RELAY_COMMAND_BEGIN_DIR) {
+        /* Assign this circuit and its app-ward OR connection a unique ID,
+         * so that we can measure download times. The local edge and dir
+         * connection will be assigned the same ID when they are created
+         * and linked. */
+        static uint64_t next_id = 0;
+        circ->dirreq_id = ++next_id;
+        TO_CONN(TO_OR_CIRCUIT(circ)->p_conn)->dirreq_id = circ->dirreq_id;
+      }
+#endif
+
       return connection_exit_begin_conn(cell, circ);
     case RELAY_COMMAND_DATA:
       ++stats_n_data_cells_received;
@@ -1821,6 +1841,15 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max,
       orcirc->processed_cells++;
     }
 #endif
+#ifdef ENABLE_DIRREQ_STATS
+    /* If we just flushed our queue and this circuit is used for a
+     * tunneled directory request, possibly advance its state. */
+    if (queue->n == 0 && TO_CONN(conn)->dirreq_id)
+      geoip_change_dirreq_state(TO_CONN(conn)->dirreq_id,
+                                DIRREQ_TUNNELED,
+                                DIRREQ_CIRC_QUEUE_FLUSHED);
+#endif
+
     connection_write_to_buf(cell->body, CELL_NETWORK_SIZE, TO_CONN(conn));
 
     packed_cell_free(cell);

+ 1 - 1
src/or/router.c

@@ -1916,7 +1916,7 @@ extrainfo_get_client_geoip_summary(time_t now)
 {
   static time_t last_purged_at = 0;
   int geoip_purge_interval = 48*60*60;
-#ifdef ENABLE_GEOIP_STATS
+#ifdef ENABLE_DIRREQ_STATS
   geoip_purge_interval = DIR_RECORD_USAGE_RETAIN_IPS;
 #endif
 #ifdef ENABLE_ENTRY_STATS