Преглед изворни кода

Merge commit 'karsten/proposal-166-impl-master'

Nick Mathewson пре 15 година

+ 18 - 17

@@ -7,23 +7,24 @@ Changes in version - 2009-0?-??
       Code by Christopher Davis.
       Code by Christopher Davis.
   o New options for gathering stats safely:
   o New options for gathering stats safely:
-    - Directories that configure with --enable-dirreq-stats and set
-      "DirReqStatistics 1" write directory request stats to disk every
-      24 hours. As compared to the --enable-geoip-stats flag in 0.2.1.x,
-      there are a few improvements: 1) stats are written to disk exactly
-      every 24 hours; 2) estimated shares of v2 and v3 requests are
-      determined as mean values, not at the end of a measurement period;
-      3) unresolved requests are listed with country code '??';
-      4) directories also measure download times.
-    - Exit nodes that configure with --enable-exit-stats and set
-      "ExitPortStatistics 1" write statistics on the number of exit
-      streams and transferred bytes per port to disk every 24 hours.
-    - Relays that configure with --enable-buffer-stats and set
-      "CellStatistics 1" write statistics to disk every 24 hours on how
-      long cells spend in their circuit queues.
-    - Entry nodes that configure with --enable-entry-stats and set
-      "EntryStatistics 1" write statistics to disk every 24 hours on
-      the rough number and origins of connecting clients.
+    - Directories that set "DirReqStatistics 1" write statistics on
+      directory request to disk every 24 hours. As compared to the
+      --enable-geoip-stats flag in 0.2.1.x, there are a few improvements:
+      1) stats are written to disk exactly every 24 hours; 2) estimated
+      shares of v2 and v3 requests are determined as mean values, not at
+      the end of a measurement period; 3) unresolved requests are listed
+      with country code '??'; 4) directories also measure download times.
+    - Exit nodes that set "ExitPortStatistics 1" write statistics on the
+      number of exit streams and transferred bytes per port to disk every
+      24 hours.
+    - Relays that set "CellStatistics 1" write statistics on how long
+      cells spend in their circuit queues to disk every 24 hours.
+    - Entry nodes that set "EntryStatistics 1" write statistics on the
+      rough number and origins of connecting clients to disk every 24
+      hours.
+    - Relays that write any of the above statistics to disk and set
+      "ExtraInfoStatistics 1" include the past 24 hours of statistics in
+      their extra-info documents.
   o Minor features:
   o Minor features:
     - New --digests command-line switch to output the digests of the
     - New --digests command-line switch to output the digests of the

+ 0 - 28

@@ -85,34 +85,6 @@ case $host in
-     AS_HELP_STRING(--enable-exit-stats, enable code for exits to collect per-port statistics))
-if test "$enable_exit_stats" = "yes"; then
-  AC_DEFINE(ENABLE_EXIT_STATS, 1, [Defined if we try to collect per-port statistics on exits])
-     AS_HELP_STRING(--enable-dirreq-stats, enable code for directories to collect per-country statistics))
-if test "$enable_dirreq_stats" = "yes"; then
-  AC_DEFINE(ENABLE_DIRREQ_STATS, 1, [Defined if we try to collect per-country statistics])
-     AS_HELP_STRING(--enable-buffer-stats, enable code for relays to collect buffer statistics))
-if test "$enable_buffer_stats" = "yes"; then
-  AC_DEFINE(ENABLE_BUFFER_STATS, 1, [Defined if we try to collect buffer statistics])
-     AS_HELP_STRING(--enable-entry-stats, enable code for entry guards to collect per-country statistics))
-if test "$enable_entry_stats" = "yes"; then
-  AC_DEFINE(ENABLE_ENTRY_STATS, 1, [Defined if we try to collect per-country statistics])
      AS_HELP_STRING(--enable-gcc-warnings, enable verbose warnings))
      AS_HELP_STRING(--enable-gcc-warnings, enable verbose warnings))

+ 194 - 0

@@ -641,6 +641,200 @@
         "geoip-start" is the time at which we began collecting geoip
         "geoip-start" is the time at which we began collecting geoip
+    "dirreq-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL
+        [At most once.]
+        YYYY-MM-DD HH:MM:SS defines the end of the included measurement
+        interval of length NSEC seconds (86400 seconds by default).
+        A "dirreq-stats-end" line, as well as any other "dirreq-*" line,
+        is only added when the relay has opened its Dir port and after 24
+        hours of measuring directory requests.
+    "dirreq-v2-ips" CC=N,CC=N,... NL
+        [At most once.]
+    "dirreq-v3-ips" CC=N,CC=N,... NL
+        [At most once.]
+        List of mappings from two-letter country codes to the number of
+        unique IP addresses that have connected from that country to
+        request a v2/v3 network status, rounded up to the nearest multiple
+        of 8. Only those IP addresses are counted that the directory can
+        answer with a 200 OK status code.
+    "dirreq-v2-reqs" CC=N,CC=N,... NL
+        [At most once.]
+    "dirreq-v3-reqs" CC=N,CC=N,... NL
+        [At most once.]
+        List of mappings from two-letter country codes to the number of
+        requests for v2/v3 network statuses from that country, rounded up
+        to the nearest multiple of 8. Only those requests are counted that
+        the directory can answer with a 200 OK status code.
+    "dirreq-v2-share" num% NL
+        [At most once.]
+    "dirreq-v3-share" num% NL
+        [At most once.]
+        The share of v2/v3 network status requests that the directory
+        expects to receive from clients based on its advertised bandwidth
+        compared to the overall network bandwidth capacity. Shares are
+        formatted in percent with two decimal places. Shares are
+        calculated as means over the whole 24-hour interval.
+    "dirreq-v2-resp" status=num,... NL
+        [At most once.]
+    "dirreq-v3-resp" status=nul,... NL
+        [At most once.]
+        List of mappings from response statuses to the number of requests
+        for v2/v3 network statuses that were answered with that response
+        status, rounded up to the nearest multiple of 4. Only response
+        statuses with at least 1 response are reported. New response
+        statuses can be added at any time. The current list of response
+        statuses is as follows:
+        "ok": a network status request is answered; this number
+           corresponds to the sum of all requests as reported in
+           "dirreq-v2-reqs" or "dirreq-v3-reqs", respectively, before
+           rounding up.
+        "not-enough-sigs: a version 3 network status is not signed by a
+           sufficient number of requested authorities.
+        "unavailable": a requested network status object is unavailable.
+        "not-found": a requested network status is not found.
+        "not-modified": a network status has not been modified since the
+           If-Modified-Since time that is included in the request.
+        "busy": the directory is busy.
+    "dirreq-v2-direct-dl" key=val,... NL
+        [At most once.]
+    "dirreq-v3-direct-dl" key=val,... NL
+        [At most once.]
+    "dirreq-v2-tunneled-dl" key=val,... NL
+        [At most once.]
+    "dirreq-v3-tunneled-dl" key=val,... NL
+        [At most once.]
+        List of statistics about possible failures in the download process
+        of v2/v3 network statuses. Requests are either "direct"
+        HTTP-encoded requests over the relay's directory port, or
+        "tunneled" requests using a BEGIN_DIR cell over the relay's OR
+        port. The list of possible statistics can change, and statistics
+        can be left out from reporting. The current list of statistics is
+        as follows:
+        Successful downloads and failures:
+        "complete": a client has finished the download successfully.
+        "timeout": a download did not finish within 10 minutes after
+           starting to send the response.
+        "running": a download is still running at the end of the
+           measurement period for less than 10 minutes after starting to
+           send the response.
+        Download times:
+        "min", "max": smallest and largest measured bandwidth in B/s.
+        "d[1-4,6-9]": 1st to 4th and 6th to 9th decile of measured
+           bandwidth in B/s. For a given decile i, i/10 of all downloads
+           had a smaller bandwidth than di, and (10-i)/10 of all downloads
+           had a larger bandwidth than di.
+        "q[1,3]": 1st and 3rd quartile of measured bandwidth in B/s. One
+           fourth of all downloads had a smaller bandwidth than q1, one
+           fourth of all downloads had a larger bandwidth than q3, and the
+           remaining half of all downloads had a bandwidth between q1 and
+           q3.
+        "md": median of measured bandwidth in B/s. Half of the downloads
+           had a smaller bandwidth than md, the other half had a larger
+           bandwidth than md.
+    "entry-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL
+        [At most once.]
+        YYYY-MM-DD HH:MM:SS defines the end of the included measurement
+        interval of length NSEC seconds (86400 seconds by default).
+        An "entry-stats-end" line, as well as any other "entry-*"
+        line, is first added after the relay has been running for at least
+        24 hours.
+    "entry-ips" CC=N,CC=N,... NL
+        [At most once.]
+        List of mappings from two-letter country codes to the number of
+        unique IP addresses that have connected from that country to the
+        relay and which are no known other relays, rounded up to the
+        nearest multiple of 8.
+    "cell-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL
+        [At most once.]
+        YYYY-MM-DD HH:MM:SS defines the end of the included measurement
+        interval of length NSEC seconds (86400 seconds by default).
+        A "cell-stats-end" line, as well as any other "cell-*" line,
+        is first added after the relay has been running for at least 24
+        hours.
+    "cell-processed-cells" num,...,num NL
+        [At most once.]
+        Mean number of processed cells per circuit, subdivided into
+        deciles of circuits by the number of cells they have processed in
+        descending order from loudest to quietest circuits.
+    "cell-queued-cells" num,...,num NL
+        [At most once.]
+        Mean number of cells contained in queues by circuit decile. These
+        means are calculated by 1) determining the mean number of cells in
+        a single circuit between its creation and its termination and 2)
+        calculating the mean for all circuits in a given decile as
+        determined in "cell-processed-cells". Numbers have a precision of
+        two decimal places.
+    "cell-time-in-queue" num,...,num NL
+        [At most once.]
+        Mean time cells spend in circuit queues in milliseconds. Times are
+        calculated by 1) determining the mean time cells spend in the
+        queue of a single circuit and 2) calculating the mean for all
+        circuits in a given decile as determined in
+        "cell-processed-cells".
+    "cell-circuits-per-decile" num NL
+        [At most once.]
+        Mean number of circuits that are included in any of the deciles,
+        rounded up to the next integer.
+    "exit-stats-end" YYYY-MM-DD HH:MM:SS (NSEC s) NL
+        [At most once.]
+        YYYY-MM-DD HH:MM:SS defines the end of the included measurement
+        interval of length NSEC seconds (86400 seconds by default).
+        An "exit-stats-end" line, as well as any other "exit-*" line, is
+        first added after the relay has been running for at least 24 hours
+        and only if the relay permits exiting (where exiting to a single
+        port and IP address is sufficient).
+    "exit-kibibytes-written" port=N,port=N,... NL
+        [At most once.]
+    "exit-kibibytes-read" port=N,port=N,... NL
+        [At most once.]
+        List of mappings from ports to the number of kibibytes that the
+        relay has written to or read from exit connections to that port,
+        rounded up to the next full kibibyte.
+    "exit-streams-opened" port=N,port=N,... NL
+        [At most once.]
+        List of mappings from ports to the number of opened exit streams
+        to that port, rounded up to the nearest multiple of 4.
     "router-signature" NL Signature NL
     "router-signature" NL Signature NL
         [At end, exactly once.]
         [At end, exactly once.]

+ 2 - 2

@@ -86,7 +86,7 @@ Proposals by number:
 163  Detecting whether a connection comes from a client [OPEN]
 163  Detecting whether a connection comes from a client [OPEN]
 164  Reporting the status of server votes [OPEN]
 164  Reporting the status of server votes [OPEN]
 165  Easy migration for voting authority sets [OPEN]
 165  Easy migration for voting authority sets [OPEN]
-166  Including Network Statistics in Extra-Info Documents [OPEN]
+166  Including Network Statistics in Extra-Info Documents [ACCEPTED]
 Proposals by status:
 Proposals by status:
@@ -114,7 +114,6 @@ Proposals by status:
    163  Detecting whether a connection comes from a client [for 0.2.2]
    163  Detecting whether a connection comes from a client [for 0.2.2]
    164  Reporting the status of server votes [for 0.2.2]
    164  Reporting the status of server votes [for 0.2.2]
    165  Easy migration for voting authority sets
    165  Easy migration for voting authority sets
-   166  Including Network Statistics in Extra-Info Documents [for 0.2.2]
    110  Avoiding infinite length circuits [for 0.2.1.x] [in]
    110  Avoiding infinite length circuits [for 0.2.1.x] [in]
    117  IPv6 exits [for 0.2.1.x]
    117  IPv6 exits [for 0.2.1.x]
@@ -122,6 +121,7 @@ Proposals by status:
    140  Provide diffs between consensuses [for 0.2.2.x]
    140  Provide diffs between consensuses [for 0.2.2.x]
    147  Eliminate the need for v2 directories in generating v3 directories [for 0.2.1.x]
    147  Eliminate the need for v2 directories in generating v3 directories [for 0.2.1.x]
    157  Make certificate downloads specific [for 0.2.1.x]
    157  Make certificate downloads specific [for 0.2.1.x]
+   166  Including Network Statistics in Extra-Info Documents [for 0.2.2]
    000  Index of Tor Proposals
    000  Index of Tor Proposals
    001  The Tor Proposal Process
    001  The Tor Proposal Process

+ 8 - 8

@@ -3,7 +3,7 @@ Title: Including Network Statistics in Extra-Info Documents
 Author: Karsten Loesing
 Author: Karsten Loesing
 Created: 21-Jul-2009
 Created: 21-Jul-2009
 Target: 0.2.2
 Target: 0.2.2
-Status: Open
+Status: Accepted
 Change history:
 Change history:
@@ -298,7 +298,7 @@ Exit statistics:
   The last type of statistics affects exit nodes counting the number of
   The last type of statistics affects exit nodes counting the number of
   bytes written and read and the number of streams opened per port and
   bytes written and read and the number of streams opened per port and
-  per 24 hours. Exit port statistics can be measured from looking of
+  per 24 hours. Exit port statistics can be measured from looking at
   headers of BEGIN and DATA cells. A BEGIN cell contains the exit port
   headers of BEGIN and DATA cells. A BEGIN cell contains the exit port
   that is required for the exit node to open a new exit stream.
   that is required for the exit node to open a new exit stream.
   Subsequent DATA cells coming from the client or being sent back to the
   Subsequent DATA cells coming from the client or being sent back to the
@@ -361,7 +361,7 @@ Implementation notes:
      basically means renaming keywords.
      basically means renaming keywords.
   2. The timing of writing the four *-stats files should be unified, so
   2. The timing of writing the four *-stats files should be unified, so
-     that they are written exactly after 24 hours after starting the
+     that they are written exactly 24 hours after starting the
      relay. Right now, the measurement intervals for dirreq, entry, and
      relay. Right now, the measurement intervals for dirreq, entry, and
      exit stats starts with the first observed request, and files are
      exit stats starts with the first observed request, and files are
      written when observing the first request that occurs more than 24
      written when observing the first request that occurs more than 24
@@ -373,14 +373,14 @@ Implementation notes:
      directory until they are included in extra-info documents. The
      directory until they are included in extra-info documents. The
      reason is that the 24-hour measurement interval can be very
      reason is that the 24-hour measurement interval can be very
      different from the 18-hour publication interval of extra-info
      different from the 18-hour publication interval of extra-info
-     documents. When a relay crashed after finishing a measurement
+     documents. When a relay crashes after finishing a measurement
      interval, but before publishing the next extra-info document,
      interval, but before publishing the next extra-info document,
      statistics would get lost. Therefore, statistics are written to
      statistics would get lost. Therefore, statistics are written to
      disk when finishing a measurement interval and read from disk when
      disk when finishing a measurement interval and read from disk when
-     generating an extra-info document. As a result, the *-stats files
-     need to be overwritten after 24 hours, rather than appending new
-     statistics to them. Further, the contents of the *-stats files need
-     to be checked in the process of generating extra-info documents.
+     generating an extra-info document. Only the statistics that were
+     appended to the *-stats files within the past 24 hours are included
+     in extra-info documents. Further, the contents of the *-stats files
+     need to be checked in the process of generating extra-info documents.
   4. With the statistics patches being tested, the ./configure options
   4. With the statistics patches being tested, the ./configure options
      should be removed and the statistics code be compiled by default.
      should be removed and the statistics code be compiled by default.

+ 30 - 0

@@ -1075,6 +1075,36 @@ behalf of clients.
 \fBGeoIPFile \fR\fIfilename\fP
 \fBGeoIPFile \fR\fIfilename\fP
 A filename containing GeoIP data, for use with BridgeRecordUsageByCountry.
 A filename containing GeoIP data, for use with BridgeRecordUsageByCountry.
+\fBCellStatistics \fR\fB0\fR|\fB1\fR\fP
+When this option is enabled, Tor writes statistics on the mean time that
+cells spend in circuit queues to disk every 24 hours. Cannot be changed
+while Tor is running. (Default: 0)
+\fBDirReqStatistics \fR\fB0\fR|\fB1\fR\fP
+When this option is enabled, Tor writes statistics on the number and
+response time of network status requests to disk every 24 hours. Cannot be
+changed while Tor is running. (Default: 0)
+\fBEntryStatistics \fR\fB0\fR|\fB1\fR\fP
+When this option is enabled, Tor writes statistics on the number of
+directly connecting clients to disk every 24 hours. Cannot be changed
+while Tor is running. (Default: 0)
+\fBExitPortStatistics \fR\fB0\fR|\fB1\fR\fP
+When this option is enabled, Tor writes statistics on the number of
+relayed bytes and opened stream per exit port to disk every 24 hours.
+Cannot be changed while Tor is running. (Default: 0)
+\fBExtraInfoStatistics \fR\fB0\fR|\fB1\fR\fP
+When this option is enabled, Tor includes previously gathered statistics
+in its extra-info documents that it uploads to the directory authorities.
+(Default: 0)

+ 1 - 3

@@ -447,11 +447,9 @@ circuit_free(circuit_t *circ)
   } else {
   } else {
     or_circuit_t *ocirc = TO_OR_CIRCUIT(circ);
     or_circuit_t *ocirc = TO_OR_CIRCUIT(circ);
     /* Remember cell statistics for this circuit before deallocating. */
     /* Remember cell statistics for this circuit before deallocating. */
     if (get_options()->CellStatistics)
     if (get_options()->CellStatistics)
-      add_circ_to_buffer_stats(circ, time(NULL));
+      rep_hist_buffer_stats_add_circ(circ, time(NULL));
     mem = ocirc;
     mem = ocirc;
     memlen = sizeof(or_circuit_t);
     memlen = sizeof(or_circuit_t);
     tor_assert(circ->magic == OR_CIRCUIT_MAGIC);
     tor_assert(circ->magic == OR_CIRCUIT_MAGIC);

+ 17 - 50

@@ -188,12 +188,10 @@ static config_var_t _option_vars[] = {
   V(DirPort,                     UINT,     "0"),
   V(DirPort,                     UINT,     "0"),
   V(DirPortFrontPage,            FILENAME, NULL),
   V(DirPortFrontPage,            FILENAME, NULL),
   V(DirReqStatistics,            BOOL,     "0"),
   V(DirReqStatistics,            BOOL,     "0"),
   VAR("DirServer",               LINELIST, DirServers, NULL),
   VAR("DirServer",               LINELIST, DirServers, NULL),
   V(DNSPort,                     UINT,     "0"),
   V(DNSPort,                     UINT,     "0"),
@@ -210,6 +208,7 @@ static config_var_t _option_vars[] = {
   V(ExitPolicy,                  LINELIST, NULL),
   V(ExitPolicy,                  LINELIST, NULL),
   V(ExitPolicyRejectPrivate,     BOOL,     "1"),
   V(ExitPolicyRejectPrivate,     BOOL,     "1"),
   V(ExitPortStatistics,          BOOL,     "0"),
   V(ExitPortStatistics,          BOOL,     "0"),
+  V(ExtraInfoStatistics,         BOOL,     "0"),
   V(FallbackNetworkstatusFile,   FILENAME,
   V(FallbackNetworkstatusFile,   FILENAME,
     SHARE_DATADIR PATH_SEPARATOR "tor" PATH_SEPARATOR "fallback-consensus"),
     SHARE_DATADIR PATH_SEPARATOR "tor" PATH_SEPARATOR "fallback-consensus"),
   V(FascistFirewall,             BOOL,     "0"),
   V(FascistFirewall,             BOOL,     "0"),
@@ -1413,47 +1412,13 @@ options_act(or_options_t *old_options)
-  if (options->DirReqStatistics) {
+  if (options->DirReqStatistics && !geoip_is_loaded()) {
     /* Check if GeoIP database could be loaded. */
     /* Check if GeoIP database could be loaded. */
-    if (!geoip_is_loaded()) {
-      log_warn(LD_CONFIG, "Configured to measure directory request "
-               "statistics, but no GeoIP database found!");
-      return -1;
-    }
-    log_notice(LD_CONFIG, "Configured to count directory requests by "
-               "country and write aggregate statistics to disk. Check the "
-               "dirreq-stats file in your data directory that will first "
-               "be written in 24 hours from now.");
-  log_warn(LD_CONFIG, "DirReqStatistics enabled, but Tor was built "
-           "without support for directory request statistics.");
+    log_warn(LD_CONFIG, "Configured to measure directory request "
+             "statistics, but no GeoIP database found!");
+    return -1;
-  if (options->ExitPortStatistics)
-    log_notice(LD_CONFIG, "Configured to measure exit port statistics. "
-               "Look for the exit-stats file that will first be written to "
-               "the data directory in 24 hours from now.");
-  if (options->ExitPortStatistics)
-    log_warn(LD_CONFIG, "ExitPortStatistics enabled, but Tor was built "
-             "without port statistics support.");
-  if (options->CellStatistics)
-    log_notice(LD_CONFIG, "Configured to measure cell statistics. Look "
-               "for the buffer-stats file that will first be written to "
-               "the data directory in 24 hours from now.");
-  if (options->CellStatistics)
-    log_warn(LD_CONFIG, "CellStatistics enabled, but Tor was built "
-             "without cell statistics support.");
   if (options->EntryStatistics) {
   if (options->EntryStatistics) {
     if (should_record_bridge_info(options)) {
     if (should_record_bridge_info(options)) {
       /* Don't allow measuring statistics on entry guards when configured
       /* Don't allow measuring statistics on entry guards when configured
@@ -1466,17 +1431,9 @@ options_act(or_options_t *old_options)
       log_warn(LD_CONFIG, "Configured to measure entry node statistics, "
       log_warn(LD_CONFIG, "Configured to measure entry node statistics, "
                "but no GeoIP database found!");
                "but no GeoIP database found!");
       return -1;
       return -1;
-    } else
-      log_notice(LD_CONFIG, "Configured to measure entry node "
-                 "statistics. Look for the entry-stats file that will "
-                 "first be written to the data directory in 24 hours "
-                 "from now.");
+    }
-  if (options->EntryStatistics)
-    log_warn(LD_CONFIG, "EntryStatistics enabled, but Tor was built "
-             "without entry node statistics support.");
   /* Check if we need to parse and add the EntryNodes config option. */
   /* Check if we need to parse and add the EntryNodes config option. */
   if (options->EntryNodes &&
   if (options->EntryNodes &&
       (!old_options ||
       (!old_options ||
@@ -3861,6 +3818,16 @@ options_transition_allowed(or_options_t *old, or_options_t *new_val,
     return -1;
     return -1;
+  if (old->CellStatistics != new_val->CellStatistics ||
+      old->DirReqStatistics != new_val->DirReqStatistics ||
+      old->EntryStatistics != new_val->EntryStatistics ||
+      old->ExitPortStatistics != new_val->ExitPortStatistics) {
+    *msg = tor_strdup("While Tor is running, changing either "
+                      "CellStatistics, DirReqStatistics, EntryStatistics, "
+                      "or ExitPortStatistics is not allowed.");
+    return -1;
+  }
   return 0;
   return 0;

+ 4 - 4

@@ -2054,12 +2054,12 @@ connection_buckets_decrement(connection_t *conn, time_t now,
   if (num_read > 0) {
   if (num_read > 0) {
     if (conn->type == CONN_TYPE_EXIT)
     if (conn->type == CONN_TYPE_EXIT)
-      rep_hist_note_exit_bytes_read(conn->port, num_read, now);
+      rep_hist_note_exit_bytes_read(conn->port, num_read);
     rep_hist_note_bytes_read(num_read, now);
     rep_hist_note_bytes_read(num_read, now);
   if (num_written > 0) {
   if (num_written > 0) {
     if (conn->type == CONN_TYPE_EXIT)
     if (conn->type == CONN_TYPE_EXIT)
-      rep_hist_note_exit_bytes_written(conn->port, num_written, now);
+      rep_hist_note_exit_bytes_written(conn->port, num_written);
     rep_hist_note_bytes_written(num_written, now);
     rep_hist_note_bytes_written(num_written, now);
@@ -2652,13 +2652,13 @@ connection_handle_write(connection_t *conn, int force)
     /* else open, or closing */
     /* else open, or closing */
     result = flush_buf_tls(or_conn->tls, conn->outbuf,
     result = flush_buf_tls(or_conn->tls, conn->outbuf,
                            max_to_write, &conn->outbuf_flushlen);
                            max_to_write, &conn->outbuf_flushlen);
     /* If we just flushed the last bytes, check if this tunneled dir
     /* If we just flushed the last bytes, check if this tunneled dir
      * request is done. */
      * request is done. */
     if (buf_datalen(conn->outbuf) == 0 && conn->dirreq_id)
     if (buf_datalen(conn->outbuf) == 0 && conn->dirreq_id)
       geoip_change_dirreq_state(conn->dirreq_id, DIRREQ_TUNNELED,
       geoip_change_dirreq_state(conn->dirreq_id, DIRREQ_TUNNELED,
     switch (result) {
     switch (result) {
       case TOR_TLS_CLOSE:
       case TOR_TLS_CLOSE:

+ 4 - 5

@@ -333,7 +333,7 @@ connection_edge_finished_connecting(edge_connection_t *edge_conn)
-  rep_hist_note_exit_stream_opened(conn->port, approx_time());
+  rep_hist_note_exit_stream_opened(conn->port);
   conn->state = EXIT_CONN_STATE_OPEN;
   conn->state = EXIT_CONN_STATE_OPEN;
   connection_watch_events(conn, READ_EVENT); /* stop writing, keep reading */
   connection_watch_events(conn, READ_EVENT); /* stop writing, keep reading */
@@ -2544,11 +2544,11 @@ connection_exit_begin_conn(cell_t *cell, circuit_t *circ)
   log_debug(LD_EXIT,"Creating new exit connection.");
   log_debug(LD_EXIT,"Creating new exit connection.");
   n_stream = edge_connection_new(CONN_TYPE_EXIT, AF_INET);
   n_stream = edge_connection_new(CONN_TYPE_EXIT, AF_INET);
   /* Remember the tunneled request ID in the new edge connection, so that
   /* Remember the tunneled request ID in the new edge connection, so that
    * we can measure download times. */
    * we can measure download times. */
   TO_CONN(n_stream)->dirreq_id = circ->dirreq_id;
   TO_CONN(n_stream)->dirreq_id = circ->dirreq_id;
   n_stream->_base.purpose = EXIT_PURPOSE_CONNECT;
   n_stream->_base.purpose = EXIT_PURPOSE_CONNECT;
   n_stream->stream_id = rh.stream_id;
   n_stream->stream_id = rh.stream_id;
@@ -2785,11 +2785,10 @@ connection_exit_connect_dir(edge_connection_t *exitconn)
   dirconn->_base.purpose = DIR_PURPOSE_SERVER;
   dirconn->_base.purpose = DIR_PURPOSE_SERVER;
   dirconn->_base.state = DIR_CONN_STATE_SERVER_COMMAND_WAIT;
   dirconn->_base.state = DIR_CONN_STATE_SERVER_COMMAND_WAIT;
   /* Note that the new dir conn belongs to the same tunneled request as
   /* Note that the new dir conn belongs to the same tunneled request as
    * the edge conn, so that we can measure download times. */
    * the edge conn, so that we can measure download times. */
   TO_CONN(dirconn)->dirreq_id = TO_CONN(exitconn)->dirreq_id;
   TO_CONN(dirconn)->dirreq_id = TO_CONN(exitconn)->dirreq_id;
   connection_link_connections(TO_CONN(dirconn), TO_CONN(exitconn));
   connection_link_connections(TO_CONN(dirconn), TO_CONN(exitconn));
   if (connection_add(TO_CONN(exitconn))<0) {
   if (connection_add(TO_CONN(exitconn))<0) {

+ 0 - 4

@@ -2573,7 +2573,6 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
       goto done;
       goto done;
       struct in_addr in;
       struct in_addr in;
       if (tor_inet_aton((TO_CONN(conn))->address, &in)) {
       if (tor_inet_aton((TO_CONN(conn))->address, &in)) {
@@ -2589,7 +2588,6 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
     // note_request(request_type,dlen);
     // note_request(request_type,dlen);
     (void) request_type;
     (void) request_type;
@@ -3221,7 +3219,6 @@ connection_dir_finished_flushing(dir_connection_t *conn)
   tor_assert(conn->_base.type == CONN_TYPE_DIR);
   tor_assert(conn->_base.type == CONN_TYPE_DIR);
   /* Note that we have finished writing the directory response. For direct
   /* Note that we have finished writing the directory response. For direct
    * connections this means we're done, for tunneled connections its only
    * connections this means we're done, for tunneled connections its only
    * an intermediate step. */
    * an intermediate step. */
@@ -3232,7 +3229,6 @@ connection_dir_finished_flushing(dir_connection_t *conn)
   switch (conn->_base.state) {
   switch (conn->_base.state) {
       log_debug(LD_DIR,"client finished sending command.");
       log_debug(LD_DIR,"client finished sending command.");

+ 145 - 108

@@ -12,8 +12,6 @@
 #include "ht.h"
 #include "ht.h"
 static void clear_geoip_db(void);
 static void clear_geoip_db(void);
-static void dump_geoip_stats(void);
-static void dump_entry_stats(void);
 /** An entry from the GeoIP file: maps an IP range to a country. */
 /** An entry from the GeoIP file: maps an IP range to a country. */
 typedef struct geoip_entry_t {
 typedef struct geoip_entry_t {
@@ -347,7 +345,6 @@ geoip_determine_shares(time_t now)
   last_time_determined_shares = now;
   last_time_determined_shares = now;
 /** Calculate which fraction of v2 and v3 directory requests aimed at caches
 /** Calculate which fraction of v2 and v3 directory requests aimed at caches
  * have been sent to us since the last call of this function up to time
  * have been sent to us since the last call of this function up to time
  * <b>now</b>. Set *<b>v2_share_out</b> and *<b>v3_share_out</b> to the
  * <b>now</b>. Set *<b>v2_share_out</b> and *<b>v3_share_out</b> to the
@@ -367,7 +364,23 @@ geoip_get_mean_shares(time_t now, double *v2_share_out,
   share_seconds = 0;
   share_seconds = 0;
   return 0;
   return 0;
+/* Rotate period of v2 and v3 network status requests. */
+static void
+  SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
+      memmove(&c->n_v2_ns_requests[0], &c->n_v2_ns_requests[1],
+              sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
+      memmove(&c->n_v3_ns_requests[0], &c->n_v3_ns_requests[1],
+              sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
+      c->n_v2_ns_requests[REQUEST_HIST_LEN-1] = 0;
+      c->n_v3_ns_requests[REQUEST_HIST_LEN-1] = 0;
+    });
+  current_request_period_starts += REQUEST_HIST_PERIOD;
+  if (n_old_request_periods < REQUEST_HIST_LEN-1)
+    ++n_old_request_periods;
 /** Note that we've seen a client connect from the IP <b>addr</b> (host order)
 /** Note that we've seen a client connect from the IP <b>addr</b> (host order)
  * at time <b>now</b>. Ignored by all but bridges and directories if
  * at time <b>now</b>. Ignored by all but bridges and directories if
@@ -379,55 +392,37 @@ geoip_note_client_seen(geoip_client_action_t action,
   or_options_t *options = get_options();
   or_options_t *options = get_options();
   clientmap_entry_t lookup, *ent;
   clientmap_entry_t lookup, *ent;
   if (action == GEOIP_CLIENT_CONNECT) {
   if (action == GEOIP_CLIENT_CONNECT) {
-    if (!options->EntryStatistics)
+    /* Only remember statistics as entry guard or as bridge. */
+    if (!options->EntryStatistics ||
+        (!(options->BridgeRelay && options->BridgeRecordUsageByCountry)))
-    if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
-      return;
     /* Did we recently switch from bridge to relay or back? */
     /* Did we recently switch from bridge to relay or back? */
     if (client_history_starts > now)
     if (client_history_starts > now)
   } else {
   } else {
-    return;
     if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
     if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
-  /* Rotate the current request period. */
-  while (current_request_period_starts + REQUEST_HIST_PERIOD < now) {
-    if (!geoip_countries)
-      geoip_countries = smartlist_create();
-    if (!current_request_period_starts) {
-      current_request_period_starts = now;
-      break;
+  /* As a bridge that doesn't rotate request periods every 24 hours,
+   * possibly rotate now. */
+  if (options->BridgeRelay) {
+    while (current_request_period_starts + REQUEST_HIST_PERIOD < now) {
+      if (!geoip_countries)
+        geoip_countries = smartlist_create();
+      if (!current_request_period_starts) {
+        current_request_period_starts = now;
+        break;
+      }
+      /* Also discard all items in the client history that are too old.
+       * (This only works here because bridge and directory stats are
+       * independent. Otherwise, we'd only want to discard those items
+       * with action GEOIP_CLIENT_NETWORKSTATUS{_V2}.) */
+      geoip_remove_old_clients(current_request_period_starts);
+      /* Now rotate request period */
+      rotate_request_period();
-    /* Also discard all items in the client history that are too old.
-     * (This only works here because bridge and directory stats are
-     * independent. Otherwise, we'd only want to discard those items
-     * with action GEOIP_CLIENT_NETWORKSTATUS{_V2}.) */
-    geoip_remove_old_clients(current_request_period_starts);
-    /* Before rotating, write the current stats to disk. */
-    dump_geoip_stats();
-    if (get_options()->EntryStatistics)
-      dump_entry_stats();
-    /* Now rotate request period */
-    SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
-        memmove(&c->n_v2_ns_requests[0], &c->n_v2_ns_requests[1],
-                sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
-        memmove(&c->n_v3_ns_requests[0], &c->n_v3_ns_requests[1],
-                sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
-        c->n_v2_ns_requests[REQUEST_HIST_LEN-1] = 0;
-        c->n_v3_ns_requests[REQUEST_HIST_LEN-1] = 0;
-      });
-    current_request_period_starts += REQUEST_HIST_PERIOD;
-    if (n_old_request_periods < REQUEST_HIST_LEN-1)
-      ++n_old_request_periods;
   lookup.ipaddr = addr;
   lookup.ipaddr = addr;
@@ -495,7 +490,6 @@ geoip_remove_old_clients(time_t cutoff)
     client_history_starts = cutoff;
     client_history_starts = cutoff;
 /** How many responses are we giving to clients requesting v2 network
 /** How many responses are we giving to clients requesting v2 network
  * statuses? */
  * statuses? */
 static uint32_t ns_v2_responses[GEOIP_NS_RESPONSE_NUM];
 static uint32_t ns_v2_responses[GEOIP_NS_RESPONSE_NUM];
@@ -503,7 +497,6 @@ static uint32_t ns_v2_responses[GEOIP_NS_RESPONSE_NUM];
 /** How many responses are we giving to clients requesting v3 network
 /** How many responses are we giving to clients requesting v3 network
  * statuses? */
  * statuses? */
 static uint32_t ns_v3_responses[GEOIP_NS_RESPONSE_NUM];
 static uint32_t ns_v3_responses[GEOIP_NS_RESPONSE_NUM];
 /** Note that we've rejected a client's request for a v2 or v3 network
 /** Note that we've rejected a client's request for a v2 or v3 network
  * status, encoded in <b>action</b> for reason <b>reason</b> at time
  * status, encoded in <b>action</b> for reason <b>reason</b> at time
@@ -512,7 +505,6 @@ void
 geoip_note_ns_response(geoip_client_action_t action,
 geoip_note_ns_response(geoip_client_action_t action,
                        geoip_ns_response_t response)
                        geoip_ns_response_t response)
   static int arrays_initialized = 0;
   static int arrays_initialized = 0;
   if (!get_options()->DirReqStatistics)
   if (!get_options()->DirReqStatistics)
@@ -528,10 +520,6 @@ geoip_note_ns_response(geoip_client_action_t action,
-  (void) action;
-  (void) response;
 /** Do not mention any country from which fewer than this number of IPs have
 /** Do not mention any country from which fewer than this number of IPs have
@@ -709,7 +697,6 @@ geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type,
 /** Return a newly allocated comma-separated string containing statistics
 /** Return a newly allocated comma-separated string containing statistics
  * on network status downloads. The string contains the number of completed
  * on network status downloads. The string contains the number of completed
  * requests, timeouts, and still running requests as well as the download
  * requests, timeouts, and still running requests as well as the download
@@ -811,25 +798,18 @@ geoip_get_dirreq_history(geoip_client_action_t action,
   return result;
   return result;
 /** How long do we have to have observed per-country request history before we
 /** How long do we have to have observed per-country request history before we
  * are willing to talk about it? */
  * are willing to talk about it? */
 #define GEOIP_MIN_OBSERVATION_TIME (12*60*60)
 #define GEOIP_MIN_OBSERVATION_TIME (12*60*60)
-/** Return a newly allocated comma-separated string containing entries for all
- * the countries from which we've seen enough clients connect. The entry
- * format is cc=num where num is the number of IPs we've seen connecting from
- * that country, and cc is a lowercased country code. Returns NULL if we don't
- * want to export geoip data yet. */
-char *
-geoip_get_client_history(time_t now, geoip_client_action_t action)
+/** Helper for geoip_get_client_history_dirreq() and
+ * geoip_get_client_history_bridge(). */
+static char *
+geoip_get_client_history(time_t now, geoip_client_action_t action,
+                         int min_observation_time, unsigned granularity)
   char *result = NULL;
   char *result = NULL;
-  int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
-  min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
   if (!geoip_is_loaded())
   if (!geoip_is_loaded())
     return NULL;
     return NULL;
   if (client_history_starts < (now - min_observation_time)) {
   if (client_history_starts < (now - min_observation_time)) {
@@ -841,10 +821,6 @@ geoip_get_client_history(time_t now, geoip_client_action_t action)
     clientmap_entry_t **ent;
     clientmap_entry_t **ent;
     unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
     unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
     unsigned total = 0;
     unsigned total = 0;
-    unsigned granularity = IP_GRANULARITY;
     HT_FOREACH(ent, clientmap, &client_history) {
     HT_FOREACH(ent, clientmap, &client_history) {
       int country;
       int country;
       if ((*ent)->action != (int)action)
       if ((*ent)->action != (int)action)
@@ -900,6 +876,34 @@ geoip_get_client_history(time_t now, geoip_client_action_t action)
   return result;
   return result;
+/** Return a newly allocated comma-separated string containing entries for
+ * all the countries from which we've seen enough clients connect as a
+ * directory. The entry format is cc=num where num is the number of IPs
+ * we've seen connecting from that country, and cc is a lowercased country
+ * code. Returns NULL if we don't want to export geoip data yet. */
+char *
+geoip_get_client_history_dirreq(time_t now,
+                                geoip_client_action_t action)
+  return geoip_get_client_history(now, action,
+                                  DIR_RECORD_USAGE_MIN_OBSERVATION_TIME,
+                                  DIR_RECORD_USAGE_GRANULARITY);
+/** Return a newly allocated comma-separated string containing entries for
+ * all the countries from which we've seen enough clients connect as a
+ * bridge. The entry format is cc=num where num is the number of IPs
+ * we've seen connecting from that country, and cc is a lowercased country
+ * code. Returns NULL if we don't want to export geoip data yet. */
+char *
+geoip_get_client_history_bridge(time_t now,
+                                geoip_client_action_t action)
+  return geoip_get_client_history(now, action,
+                                  GEOIP_MIN_OBSERVATION_TIME,
+                                  IP_GRANULARITY);
 /** Return a newly allocated string holding the per-country request history
 /** Return a newly allocated string holding the per-country request history
  * for <b>action</b> in a format suitable for an extra-info document, or NULL
  * for <b>action</b> in a format suitable for an extra-info document, or NULL
  * on failure. */
  * on failure. */
@@ -910,10 +914,6 @@ geoip_get_request_history(time_t now, geoip_client_action_t action)
   char *result;
   char *result;
   unsigned granularity = IP_GRANULARITY;
   unsigned granularity = IP_GRANULARITY;
   int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
   int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
-  min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
   if (client_history_starts >= (now - min_observation_time))
   if (client_history_starts >= (now - min_observation_time))
     return NULL;
     return NULL;
@@ -955,16 +955,23 @@ geoip_get_request_history(time_t now, geoip_client_action_t action)
   return result;
   return result;
-/** Store all our geoip statistics into $DATADIR/dirreq-stats. */
-static void
+/** Start time of directory request stats. */
+static time_t start_of_dirreq_stats_interval;
+/** Initialize directory request stats. */
+geoip_dirreq_stats_init(time_t now)
+  start_of_dirreq_stats_interval = now;
+/** Write dirreq statistics to $DATADIR/stats/dirreq-stats. */
+geoip_dirreq_stats_write(time_t now)
-  time_t now = time(NULL);
-  time_t request_start;
-  char *filename = get_datadir_fname("dirreq-stats");
+  char *statsdir = NULL, *filename = NULL;
   char *data_v2 = NULL, *data_v3 = NULL;
   char *data_v2 = NULL, *data_v3 = NULL;
-  char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
+  char written[ISO_TIME_LEN+1];
   open_file_t *open_file = NULL;
   open_file_t *open_file = NULL;
   double v2_share = 0.0, v3_share = 0.0;
   double v2_share = 0.0, v3_share = 0.0;
   FILE *out;
   FILE *out;
@@ -973,28 +980,33 @@ dump_geoip_stats(void)
   if (!get_options()->DirReqStatistics)
   if (!get_options()->DirReqStatistics)
     goto done;
     goto done;
-  data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
-  data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
-  format_iso_time(since, geoip_get_history_start());
+  /* Discard all items in the client history that are too old. */
+  geoip_remove_old_clients(start_of_dirreq_stats_interval);
+  statsdir = get_datadir_fname("stats");
+  if (check_private_dir(statsdir, CPD_CREATE) < 0)
+    goto done;
+  filename = get_datadir_fname("stats"PATH_SEPARATOR"dirreq-stats");
+  data_v2 = geoip_get_client_history_dirreq(now,
+  data_v3 = geoip_get_client_history_dirreq(now,
   format_iso_time(written, now);
   format_iso_time(written, now);
   out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
   out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
                                     0600, &open_file);
                                     0600, &open_file);
   if (!out)
   if (!out)
     goto done;
     goto done;
-  if (fprintf(out, "written %s\nstarted-at %s\nns-ips %s\nns-v2-ips %s\n",
-              written, since,
+  if (fprintf(out, "dirreq-stats-end %s (%d s)\ndirreq-v3-ips %s\n"
+              "dirreq-v2-ips %s\n", written,
+              (unsigned) (now - start_of_dirreq_stats_interval),
               data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
               data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
     goto done;
     goto done;
-  request_start = current_request_period_starts -
-    (n_old_request_periods * REQUEST_HIST_PERIOD);
-  format_iso_time(since, request_start);
   data_v2 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
   data_v2 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
   data_v3 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS);
   data_v3 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS);
-  if (fprintf(out, "requests-start %s\nn-ns-reqs %s\nn-v2-ns-reqs %s\n",
-              since,
+  if (fprintf(out, "dirreq-v3-reqs %s\ndirreq-v2-reqs %s\n",
               data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
               data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
     goto done;
     goto done;
@@ -1005,7 +1017,7 @@ dump_geoip_stats(void)
                                ns_v3_responses[i], RESPONSE_GRANULARITY);
                                ns_v3_responses[i], RESPONSE_GRANULARITY);
-  if (fprintf(out, "n-ns-resp ok=%u,not-enough-sigs=%u,unavailable=%u,"
+  if (fprintf(out, "dirreq-v3-resp ok=%u,not-enough-sigs=%u,unavailable=%u,"
@@ -1014,7 +1026,7 @@ dump_geoip_stats(void)
                    ns_v3_responses[GEOIP_REJECT_BUSY]) < 0)
                    ns_v3_responses[GEOIP_REJECT_BUSY]) < 0)
     goto done;
     goto done;
-  if (fprintf(out, "n-v2-ns-resp ok=%u,unavailable=%u,"
+  if (fprintf(out, "dirreq-v2-resp ok=%u,unavailable=%u,"
@@ -1025,9 +1037,9 @@ dump_geoip_stats(void)
   memset(ns_v2_responses, 0, sizeof(ns_v2_responses));
   memset(ns_v2_responses, 0, sizeof(ns_v2_responses));
   memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
   memset(ns_v3_responses, 0, sizeof(ns_v3_responses));
   if (!geoip_get_mean_shares(now, &v2_share, &v3_share)) {
   if (!geoip_get_mean_shares(now, &v2_share, &v3_share)) {
-    if (fprintf(out, "v2-ns-share %0.2lf%%\n", v2_share*100) < 0)
+    if (fprintf(out, "dirreq-v2-share %0.2lf%%\n", v2_share*100) < 0)
       goto done;
       goto done;
-    if (fprintf(out, "v3-ns-share %0.2lf%%\n", v3_share*100) < 0)
+    if (fprintf(out, "dirreq-v3-share %0.2lf%%\n", v3_share*100) < 0)
       goto done;
       goto done;
@@ -1035,7 +1047,7 @@ dump_geoip_stats(void)
   data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS,
   data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS,
-  if (fprintf(out, "ns-direct-dl %s\nns-v2-direct-dl %s\n",
+  if (fprintf(out, "dirreq-v3-direct-dl %s\ndirreq-v2-direct-dl %s\n",
               data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
               data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
     goto done;
     goto done;
@@ -1044,53 +1056,78 @@ dump_geoip_stats(void)
   data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS,
   data_v3 = geoip_get_dirreq_history(GEOIP_CLIENT_NETWORKSTATUS,
-  if (fprintf(out, "ns-tunneled-dl %s\nns-v2-tunneled-dl %s\n",
+  if (fprintf(out, "dirreq-v3-tunneled-dl %s\ndirreq-v2-tunneled-dl %s\n",
               data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
               data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
     goto done;
     goto done;
   open_file = NULL;
   open_file = NULL;
+  /* Rotate request period */
+  rotate_request_period();
+  start_of_dirreq_stats_interval = now;
   if (open_file)
   if (open_file)
+  tor_free(statsdir);
-/** Store all our geoip statistics as entry guards into
- * $DATADIR/entry-stats. */
-static void
+/** Start time of entry stats. */
+static time_t start_of_entry_stats_interval;
+/** Initialize entry stats. */
+geoip_entry_stats_init(time_t now)
+  start_of_entry_stats_interval = now;
+/** Write entry statistics to $DATADIR/stats/entry-stats. */
+geoip_entry_stats_write(time_t now)
-  time_t now = time(NULL);
-  char *filename = get_datadir_fname("entry-stats");
+  char *statsdir = NULL, *filename = NULL;
   char *data = NULL;
   char *data = NULL;
-  char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
+  char written[ISO_TIME_LEN+1];
   open_file_t *open_file = NULL;
   open_file_t *open_file = NULL;
   FILE *out;
   FILE *out;
-  data = geoip_get_client_history(now, GEOIP_CLIENT_CONNECT);
-  format_iso_time(since, geoip_get_history_start());
+  if (!get_options()->EntryStatistics)
+    goto done;
+  /* Discard all items in the client history that are too old. */
+  geoip_remove_old_clients(start_of_entry_stats_interval);
+  statsdir = get_datadir_fname("stats");
+  if (check_private_dir(statsdir, CPD_CREATE) < 0)
+    goto done;
+  filename = get_datadir_fname("stats"PATH_SEPARATOR"entry-stats");
+  data = geoip_get_client_history_dirreq(now, GEOIP_CLIENT_CONNECT);
   format_iso_time(written, now);
   format_iso_time(written, now);
   out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
   out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
                                     0600, &open_file);
                                     0600, &open_file);
   if (!out)
   if (!out)
     goto done;
     goto done;
-  if (fprintf(out, "written %s\nstarted-at %s\nips %s\n",
-              written, since, data ? data : "") < 0)
+  if (fprintf(out, "entry-stats-end %s (%u s)\nentry-ips %s\n",
+              written, (unsigned) (now - start_of_entry_stats_interval),
+              data ? data : "") < 0)
     goto done;
     goto done;
+  start_of_entry_stats_interval = now;
   open_file = NULL;
   open_file = NULL;
   if (open_file)
   if (open_file)
+  tor_free(statsdir);
 /** Helper used to implement GETINFO ip-to-country/... controller command. */
 /** Helper used to implement GETINFO ip-to-country/... controller command. */

+ 38 - 9

@@ -830,9 +830,7 @@ run_scheduled_events(time_t now)
   static time_t time_to_clean_caches = 0;
   static time_t time_to_clean_caches = 0;
   static time_t time_to_recheck_bandwidth = 0;
   static time_t time_to_recheck_bandwidth = 0;
   static time_t time_to_check_for_expired_networkstatus = 0;
   static time_t time_to_check_for_expired_networkstatus = 0;
-  static time_t time_to_dump_buffer_stats = 0;
+  static time_t time_to_write_stats_files = 0;
   static time_t time_to_retry_dns_init = 0;
   static time_t time_to_retry_dns_init = 0;
   or_options_t *options = get_options();
   or_options_t *options = get_options();
   int i;
   int i;
@@ -960,13 +958,44 @@ run_scheduled_events(time_t now)
     time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL;
     time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL;
-  if (time_to_dump_buffer_stats < now) {
-    if (get_options()->CellStatistics && time_to_dump_buffer_stats)
-      dump_buffer_stats();
-    time_to_dump_buffer_stats = now + DUMP_BUFFER_STATS_INTERVAL;
+  /* 1g. Check whether we should write statistics to disk.
+   */
+  if (time_to_write_stats_files >= 0 && time_to_write_stats_files < now) {
+#define WRITE_STATS_INTERVAL (24*60*60)
+    if (options->CellStatistics || options->DirReqStatistics ||
+        options->EntryStatistics || options->ExitPortStatistics) {
+      if (!time_to_write_stats_files) {
+        /* Initialize stats. */
+        if (options->CellStatistics)
+          rep_hist_buffer_stats_init(now);
+        if (options->DirReqStatistics)
+          geoip_dirreq_stats_init(now);
+        if (options->EntryStatistics)
+          geoip_entry_stats_init(now);
+        if (options->ExitPortStatistics)
+          rep_hist_exit_stats_init(now);
+        log_notice(LD_CONFIG, "Configured to measure statistics. Look for "
+                   "the *-stats files that will first be written to the "
+                   "data directory in %d hours from now.",
+                   WRITE_STATS_INTERVAL / (60 * 60));
+        time_to_write_stats_files = now + WRITE_STATS_INTERVAL;
+      } else {
+        /* Write stats to disk. */
+        time_to_write_stats_files += WRITE_STATS_INTERVAL;
+        if (options->CellStatistics)
+          rep_hist_buffer_stats_write(time_to_write_stats_files);
+        if (options->DirReqStatistics)
+          geoip_dirreq_stats_write(time_to_write_stats_files);
+        if (options->EntryStatistics)
+          geoip_entry_stats_write(time_to_write_stats_files);
+        if (options->ExitPortStatistics)
+          rep_hist_exit_stats_write(time_to_write_stats_files);
+      }
+    } else {
+      /* Never write stats to disk */
+      time_to_write_stats_files = -1;
+    }
   /* Remove old information from rephist and the rend cache. */
   /* Remove old information from rephist and the rend cache. */
   if (time_to_clean_caches < now) {
   if (time_to_clean_caches < now) {

+ 39 - 38

@@ -20,12 +20,6 @@
 #ifdef MS_WINDOWS
 #ifdef MS_WINDOWS
@@ -854,17 +848,30 @@ typedef struct var_cell_t {
 typedef struct packed_cell_t {
 typedef struct packed_cell_t {
   struct packed_cell_t *next; /**< Next cell queued on this circuit. */
   struct packed_cell_t *next; /**< Next cell queued on this circuit. */
   char body[CELL_NETWORK_SIZE]; /**< Cell as packed for network. */
   char body[CELL_NETWORK_SIZE]; /**< Cell as packed for network. */
-  struct timeval packed_timeval; /**< When was this cell packed? */
 } packed_cell_t;
 } packed_cell_t;
+/** Number of cells added to a circuit queue including their insertion
+ * time on 10 millisecond detail; used for buffer statistics. */
+typedef struct insertion_time_elem_t {
+  struct insertion_time_elem_t *next; /**< Next element in queue. */
+  uint32_t insertion_time; /**< When were cells inserted (in 10 ms steps
+                             * starting at 0:00 of the current day)? */
+  unsigned counter; /**< How many cells were inserted? */
+} insertion_time_elem_t;
+/** Queue of insertion times. */
+typedef struct insertion_time_queue_t {
+  struct insertion_time_elem_t *first; /**< First element in queue. */
+  struct insertion_time_elem_t *last; /**< Last element in queue. */
+} insertion_time_queue_t;
 /** A queue of cells on a circuit, waiting to be added to the
 /** A queue of cells on a circuit, waiting to be added to the
  * or_connection_t's outbuf. */
  * or_connection_t's outbuf. */
 typedef struct cell_queue_t {
 typedef struct cell_queue_t {
   packed_cell_t *head; /**< The first cell, or NULL if the queue is empty. */
   packed_cell_t *head; /**< The first cell, or NULL if the queue is empty. */
   packed_cell_t *tail; /**< The last cell, or NULL if the queue is empty. */
   packed_cell_t *tail; /**< The last cell, or NULL if the queue is empty. */
   int n; /**< The number of cells in the queue. */
   int n; /**< The number of cells in the queue. */
+  insertion_time_queue_t *insertion_times; /**< Insertion times of cells. */
 } cell_queue_t;
 } cell_queue_t;
 /** Beginning of a RELAY cell payload. */
 /** Beginning of a RELAY cell payload. */
@@ -991,11 +998,8 @@ typedef struct connection_t {
    * to the evdns_server_port is uses to listen to and answer connections. */
    * to the evdns_server_port is uses to listen to and answer connections. */
   struct evdns_server_port *dns_server_port;
   struct evdns_server_port *dns_server_port;
   /** Unique ID for measuring tunneled network status requests. */
   /** Unique ID for measuring tunneled network status requests. */
   uint64_t dirreq_id;
   uint64_t dirreq_id;
 } connection_t;
 } connection_t;
 /** Stores flags and information related to the portion of a v2 Tor OR
 /** Stores flags and information related to the portion of a v2 Tor OR
@@ -1985,10 +1989,9 @@ typedef struct circuit_t {
    * linked to an OR connection. */
    * linked to an OR connection. */
   struct circuit_t *prev_active_on_n_conn;
   struct circuit_t *prev_active_on_n_conn;
   struct circuit_t *next; /**< Next circuit in linked list of all circuits. */
   struct circuit_t *next; /**< Next circuit in linked list of all circuits. */
   /** Unique ID for measuring tunneled network status requests. */
   /** Unique ID for measuring tunneled network status requests. */
   uint64_t dirreq_id;
   uint64_t dirreq_id;
 } circuit_t;
 } circuit_t;
 /** Largest number of relay_early cells that we can send on a given
 /** Largest number of relay_early cells that we can send on a given
@@ -2112,7 +2115,6 @@ typedef struct or_circuit_t {
   /** True iff this circuit was made with a CREATE_FAST cell. */
   /** True iff this circuit was made with a CREATE_FAST cell. */
   unsigned int is_first_hop : 1;
   unsigned int is_first_hop : 1;
   /** Number of cells that were removed from circuit queue; reset every
   /** Number of cells that were removed from circuit queue; reset every
    * time when writing buffer stats to disk. */
    * time when writing buffer stats to disk. */
   uint32_t processed_cells;
   uint32_t processed_cells;
@@ -2121,7 +2123,6 @@ typedef struct or_circuit_t {
    * exit-ward queues of this circuit; reset every time when writing
    * exit-ward queues of this circuit; reset every time when writing
    * buffer stats to disk. */
    * buffer stats to disk. */
   uint64_t total_cell_waiting_time;
   uint64_t total_cell_waiting_time;
 } or_circuit_t;
 } or_circuit_t;
 /** Convert a circuit subtype to a circuit_t.*/
 /** Convert a circuit subtype to a circuit_t.*/
@@ -2558,6 +2559,9 @@ typedef struct {
   /** If true, the user wants us to collect statistics as entry node. */
   /** If true, the user wants us to collect statistics as entry node. */
   int EntryStatistics;
   int EntryStatistics;
+  /** If true, include statistics file contents in extra-info documents. */
+  int ExtraInfoStatistics;
   /** If true, do not believe anybody who tells us that a domain resolves
   /** If true, do not believe anybody who tells us that a domain resolves
    * to an internal address, or that an internal address has a PTR mapping.
    * to an internal address, or that an internal address has a PTR mapping.
    * Helps avoid some cross-site attacks. */
    * Helps avoid some cross-site attacks. */
@@ -3697,15 +3701,11 @@ int dnsserv_launch_request(const char *name, int is_reverse);
  * leaking information. */
  * leaking information. */
 /** Time interval: Flush geoip data to disk this often. */
 /** Time interval: Flush geoip data to disk this often. */
-#define DIR_RECORD_USAGE_RETAIN_IPS (24*60*60)
 /** How long do we have to have observed per-country request history before
 /** How long do we have to have observed per-country request history before
  * we are willing to talk about it? */
  * we are willing to talk about it? */
-/** Time interval: Flush geoip data to disk this often when measuring on an
- * entry guard. */
 int geoip_parse_entry(const char *line);
 int geoip_parse_entry(const char *line);
@@ -3752,7 +3752,10 @@ typedef enum {
 void geoip_note_ns_response(geoip_client_action_t action,
 void geoip_note_ns_response(geoip_client_action_t action,
                             geoip_ns_response_t response);
                             geoip_ns_response_t response);
 time_t geoip_get_history_start(void);
 time_t geoip_get_history_start(void);
-char *geoip_get_client_history(time_t now, geoip_client_action_t action);
+char *geoip_get_client_history_dirreq(time_t now,
+                                      geoip_client_action_t action);
+char *geoip_get_client_history_bridge(time_t now,
+                                      geoip_client_action_t action);
 char *geoip_get_request_history(time_t now, geoip_client_action_t action);
 char *geoip_get_request_history(time_t now, geoip_client_action_t action);
 int getinfo_helper_geoip(control_connection_t *control_conn,
 int getinfo_helper_geoip(control_connection_t *control_conn,
                          const char *question, char **answer);
                          const char *question, char **answer);
@@ -3792,6 +3795,11 @@ void geoip_start_dirreq(uint64_t dirreq_id, size_t response_size,
 void geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type,
 void geoip_change_dirreq_state(uint64_t dirreq_id, dirreq_type_t type,
                                dirreq_state_t new_state);
                                dirreq_state_t new_state);
+void geoip_dirreq_stats_init(time_t now);
+void geoip_dirreq_stats_write(time_t now);
+void geoip_entry_stats_init(time_t now);
+void geoip_entry_stats_write(time_t now);
 /********************************* hibernate.c **********************/
 /********************************* hibernate.c **********************/
 int accounting_parse_options(or_options_t *options, int validate_only);
 int accounting_parse_options(or_options_t *options, int validate_only);
@@ -4133,17 +4141,11 @@ void rep_hist_note_extend_failed(const char *from_name, const char *to_name);
 void rep_hist_dump_stats(time_t now, int severity);
 void rep_hist_dump_stats(time_t now, int severity);
 void rep_hist_note_bytes_read(size_t num_bytes, time_t when);
 void rep_hist_note_bytes_read(size_t num_bytes, time_t when);
 void rep_hist_note_bytes_written(size_t num_bytes, time_t when);
 void rep_hist_note_bytes_written(size_t num_bytes, time_t when);
-void rep_hist_note_exit_bytes_read(uint16_t port, size_t num_bytes,
-                                   time_t now);
-void rep_hist_note_exit_bytes_written(uint16_t port, size_t num_bytes,
-                                      time_t now);
-void rep_hist_note_exit_stream_opened(uint16_t port, time_t now);
-#define rep_hist_note_exit_bytes_read(p,n,t) STMT_NIL
-#define rep_hist_note_exit_bytes_written(p,n,t) STMT_NIL
-#define rep_hist_note_exit_stream_opened(p,t) STMT_NIL
+void rep_hist_note_exit_bytes_read(uint16_t port, size_t num_bytes);
+void rep_hist_note_exit_bytes_written(uint16_t port, size_t num_bytes);
+void rep_hist_note_exit_stream_opened(uint16_t port);
+void rep_hist_exit_stats_init(time_t now);
+void rep_hist_exit_stats_write(time_t now);
 int rep_hist_bandwidth_assess(void);
 int rep_hist_bandwidth_assess(void);
 char *rep_hist_get_bandwidth_lines(int for_extrainfo);
 char *rep_hist_get_bandwidth_lines(int for_extrainfo);
 void rep_hist_update_state(or_state_t *state);
 void rep_hist_update_state(or_state_t *state);
@@ -4195,11 +4197,10 @@ void hs_usage_note_fetch_successful(const char *service_id, time_t now);
 void hs_usage_write_statistics_to_file(time_t now);
 void hs_usage_write_statistics_to_file(time_t now);
 void hs_usage_free_all(void);
 void hs_usage_free_all(void);
-#define DUMP_BUFFER_STATS_INTERVAL (24*60*60)
-void add_circ_to_buffer_stats(circuit_t *circ, time_t end_of_interval);
-void dump_buffer_stats(void);
+void rep_hist_buffer_stats_init(time_t now);
+void rep_hist_buffer_stats_add_circ(circuit_t *circ,
+                                    time_t end_of_interval);
+void rep_hist_buffer_stats_write(time_t now);
 /********************************* rendclient.c ***************************/
 /********************************* rendclient.c ***************************/

+ 72 - 22

@@ -533,13 +533,11 @@ relay_send_command_from_edge(uint16_t stream_id, circuit_t *circ,
   log_debug(LD_OR,"delivering %d cell %s.", relay_command,
   log_debug(LD_OR,"delivering %d cell %s.", relay_command,
             cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward");
             cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward");
   /* If we are sending an END cell and this circuit is used for a tunneled
   /* If we are sending an END cell and this circuit is used for a tunneled
    * directory request, advance its state. */
    * directory request, advance its state. */
   if (relay_command == RELAY_COMMAND_END && circ->dirreq_id)
   if (relay_command == RELAY_COMMAND_END && circ->dirreq_id)
     geoip_change_dirreq_state(circ->dirreq_id, DIRREQ_TUNNELED,
     geoip_change_dirreq_state(circ->dirreq_id, DIRREQ_TUNNELED,
   if (cell_direction == CELL_DIRECTION_OUT && circ->n_conn) {
   if (cell_direction == CELL_DIRECTION_OUT && circ->n_conn) {
     /* if we're using relaybandwidthrate, this conn wants priority */
     /* if we're using relaybandwidthrate, this conn wants priority */
@@ -1047,7 +1045,6 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
                "Begin cell for known stream. Dropping.");
                "Begin cell for known stream. Dropping.");
         return 0;
         return 0;
       if (rh.command == RELAY_COMMAND_BEGIN_DIR) {
       if (rh.command == RELAY_COMMAND_BEGIN_DIR) {
         /* Assign this circuit and its app-ward OR connection a unique ID,
         /* Assign this circuit and its app-ward OR connection a unique ID,
          * so that we can measure download times. The local edge and dir
          * so that we can measure download times. The local edge and dir
@@ -1057,7 +1054,6 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
         circ->dirreq_id = ++next_id;
         circ->dirreq_id = ++next_id;
         TO_CONN(TO_OR_CIRCUIT(circ)->p_conn)->dirreq_id = circ->dirreq_id;
         TO_CONN(TO_OR_CIRCUIT(circ)->p_conn)->dirreq_id = circ->dirreq_id;
       return connection_exit_begin_conn(cell, circ);
       return connection_exit_begin_conn(cell, circ);
@@ -1529,6 +1525,10 @@ static int total_cells_allocated = 0;
 /** A memory pool to allocate packed_cell_t objects. */
 /** A memory pool to allocate packed_cell_t objects. */
 static mp_pool_t *cell_pool = NULL;
 static mp_pool_t *cell_pool = NULL;
+/** Memory pool to allocate insertion_time_elem_t objects used for cell
+ * statistics. */
+static mp_pool_t *it_pool = NULL;
 /** Allocate structures to hold cells. */
 /** Allocate structures to hold cells. */
@@ -1537,7 +1537,8 @@ init_cell_pool(void)
   cell_pool = mp_pool_new(sizeof(packed_cell_t), 128*1024);
   cell_pool = mp_pool_new(sizeof(packed_cell_t), 128*1024);
-/** Free all storage used to hold cells. */
+/** Free all storage used to hold cells (and insertion times if we measure
+ * cell statistics). */
@@ -1546,6 +1547,10 @@ free_cell_pool(void)
     cell_pool = NULL;
     cell_pool = NULL;
+  if (it_pool) {
+    mp_pool_destroy(it_pool);
+    it_pool = NULL;
+  }
 /** Free excess storage in cell pool. */
 /** Free excess storage in cell pool. */
@@ -1621,11 +1626,35 @@ void
 cell_queue_append_packed_copy(cell_queue_t *queue, const cell_t *cell)
 cell_queue_append_packed_copy(cell_queue_t *queue, const cell_t *cell)
   packed_cell_t *copy = packed_cell_copy(cell);
   packed_cell_t *copy = packed_cell_copy(cell);
-  /* Remember the exact time when this cell was put in the queue. */
-  if (get_options()->CellStatistics)
-    tor_gettimeofday(&copy->packed_timeval);
+  /* Remember the time when this cell was put in the queue. */
+  if (get_options()->CellStatistics) {
+    struct timeval now;
+    uint32_t added;
+    insertion_time_queue_t *it_queue = queue->insertion_times;
+    if (!it_pool)
+      it_pool = mp_pool_new(sizeof(insertion_time_elem_t), 1024);
+    tor_gettimeofday(&now);
+#define SECONDS_IN_A_DAY 86400L
+    added = (now.tv_sec % SECONDS_IN_A_DAY) * 100L + now.tv_usec / 10000L;
+    if (!it_queue) {
+      it_queue = tor_malloc_zero(sizeof(insertion_time_queue_t));
+      queue->insertion_times = it_queue;
+    }
+    if (it_queue->last && it_queue->last->insertion_time == added) {
+      it_queue->last->counter++;
+    } else {
+      insertion_time_elem_t *elem = mp_pool_get(it_pool);
+      elem->next = NULL;
+      elem->insertion_time = added;
+      elem->counter = 1;
+      if (it_queue->last) {
+        it_queue->last->next = elem;
+        it_queue->last = elem;
+      } else {
+        it_queue->first = it_queue->last = elem;
+      }
+    }
+  }
   cell_queue_append(queue, copy);
   cell_queue_append(queue, copy);
@@ -1642,6 +1671,14 @@ cell_queue_clear(cell_queue_t *queue)
   queue->head = queue->tail = NULL;
   queue->head = queue->tail = NULL;
   queue->n = 0;
   queue->n = 0;
+  if (queue->insertion_times) {
+    while (queue->insertion_times->first) {
+      insertion_time_elem_t *elem = queue->insertion_times->first;
+      queue->insertion_times->first = elem->next;
+      mp_pool_release(elem);
+    }
+    tor_free(queue->insertion_times);
+  }
 /** Extract and return the cell at the head of <b>queue</b>; return NULL if
 /** Extract and return the cell at the head of <b>queue</b>; return NULL if
@@ -1835,28 +1872,41 @@ connection_or_flush_from_first_active_circuit(or_connection_t *conn, int max,
     packed_cell_t *cell = cell_queue_pop(queue);
     packed_cell_t *cell = cell_queue_pop(queue);
     /* Calculate the exact time that this cell has spent in the queue. */
     /* Calculate the exact time that this cell has spent in the queue. */
     if (get_options()->CellStatistics && !CIRCUIT_IS_ORIGIN(circ)) {
     if (get_options()->CellStatistics && !CIRCUIT_IS_ORIGIN(circ)) {
-      struct timeval flushed_from_queue;
+      struct timeval now;
+      uint32_t flushed;
       uint32_t cell_waiting_time;
       uint32_t cell_waiting_time;
-      or_circuit_t *orcirc = TO_OR_CIRCUIT(circ);
-      tor_gettimeofday(&flushed_from_queue);
-      cell_waiting_time = (uint32_t)
-            tv_mdiff(&cell->packed_timeval, &flushed_from_queue);
-      orcirc->total_cell_waiting_time += cell_waiting_time;
-      orcirc->processed_cells++;
+      insertion_time_queue_t *it_queue = queue->insertion_times;
+      tor_gettimeofday(&now);
+      flushed = (now.tv_sec % SECONDS_IN_A_DAY) * 100L +
+                 now.tv_usec / 10000L;
+      if (!it_queue || !it_queue->first) {
+        log_warn(LD_BUG, "Cannot determine insertion time of cell.");
+      } else {
+        or_circuit_t *orcirc = TO_OR_CIRCUIT(circ);
+        insertion_time_elem_t *elem = it_queue->first;
+        cell_waiting_time = (flushed * 10L + SECONDS_IN_A_DAY * 1000L -
+            elem->insertion_time * 10L) % (SECONDS_IN_A_DAY * 1000L);
+        elem->counter--;
+        if (elem->counter < 1) {
+          it_queue->first = elem->next;
+          if (elem == it_queue->last)
+            it_queue->last = NULL;
+          mp_pool_release(elem);
+        }
+        orcirc->total_cell_waiting_time += cell_waiting_time;
+        orcirc->processed_cells++;
+      }
     /* If we just flushed our queue and this circuit is used for a
     /* If we just flushed our queue and this circuit is used for a
      * tunneled directory request, possibly advance its state. */
      * tunneled directory request, possibly advance its state. */
     if (queue->n == 0 && TO_CONN(conn)->dirreq_id)
     if (queue->n == 0 && TO_CONN(conn)->dirreq_id)
     connection_write_to_buf(cell->body, CELL_NETWORK_SIZE, TO_CONN(conn));
     connection_write_to_buf(cell->body, CELL_NETWORK_SIZE, TO_CONN(conn));

+ 143 - 131

@@ -1320,10 +1320,7 @@ rep_hist_note_bytes_read(size_t num_bytes, time_t when)
   add_obs(read_array, when, num_bytes);
   add_obs(read_array, when, num_bytes);
 /* Some constants */
 /* Some constants */
-/** How long are the intervals for measuring exit stats? */
-#define EXIT_STATS_INTERVAL_SEC (24 * 60 * 60)
 /** To what multiple should byte numbers be rounded up? */
 /** To what multiple should byte numbers be rounded up? */
 /** To what multiple should stream counts be rounded up? */
 /** To what multiple should stream counts be rounded up? */
@@ -1337,118 +1334,137 @@ rep_hist_note_bytes_read(size_t num_bytes, time_t when)
 /* The following data structures are arrays and no fancy smartlists or maps,
 /* The following data structures are arrays and no fancy smartlists or maps,
  * so that all write operations can be done in constant time. This comes at
  * so that all write operations can be done in constant time. This comes at
  * the price of some memory (1.25 MB) and linear complexity when writing
  * the price of some memory (1.25 MB) and linear complexity when writing
- * stats. */
+ * stats for measuring relays. */
 /** Number of bytes read in current period by exit port */
 /** Number of bytes read in current period by exit port */
-static uint64_t exit_bytes_read[EXIT_STATS_NUM_PORTS];
+static uint64_t *exit_bytes_read = NULL;
 /** Number of bytes written in current period by exit port */
 /** Number of bytes written in current period by exit port */
-static uint64_t exit_bytes_written[EXIT_STATS_NUM_PORTS];
+static uint64_t *exit_bytes_written = NULL;
 /** Number of streams opened in current period by exit port */
 /** Number of streams opened in current period by exit port */
-static uint32_t exit_streams[EXIT_STATS_NUM_PORTS];
+static uint32_t *exit_streams = NULL;
 /** When does the current exit stats period end? */
 /** When does the current exit stats period end? */
-static time_t end_of_current_exit_stats_period = 0;
+static time_t start_of_exit_stats_interval;
-/** Write exit stats for the current period to disk and reset counters. */
-static void
-write_exit_stats(time_t when)
+/** Initialize exit port stats. */
+rep_hist_exit_stats_init(time_t now)
+  start_of_exit_stats_interval = now;
+  exit_bytes_read = tor_malloc_zero(EXIT_STATS_NUM_PORTS *
+                                    sizeof(uint64_t));
+  exit_bytes_written = tor_malloc_zero(EXIT_STATS_NUM_PORTS *
+                                       sizeof(uint64_t));
+  exit_streams = tor_malloc_zero(EXIT_STATS_NUM_PORTS *
+                                 sizeof(uint32_t));
+/** Write exit stats to $DATADIR/stats/exit-stats and reset counters. */
+rep_hist_exit_stats_write(time_t now)
   char t[ISO_TIME_LEN+1];
   char t[ISO_TIME_LEN+1];
   int r, i, comma;
   int r, i, comma;
   uint64_t *b, total_bytes, threshold_bytes, other_bytes;
   uint64_t *b, total_bytes, threshold_bytes, other_bytes;
   uint32_t other_streams;
   uint32_t other_streams;
-  char *filename = get_datadir_fname("exit-stats");
+  char *statsdir = NULL, *filename = NULL;
   open_file_t *open_file = NULL;
   open_file_t *open_file = NULL;
   FILE *out = NULL;
   FILE *out = NULL;
-  log_debug(LD_HIST, "Considering writing exit port statistics to disk..");
-  while (when > end_of_current_exit_stats_period) {
-    format_iso_time(t, end_of_current_exit_stats_period);
-    log_info(LD_HIST, "Writing exit port statistics to disk for period "
-             "ending at %s.", t);
-    if (!open_file) {
-      out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
-                                        0600, &open_file);
-      if (!out) {
-        log_warn(LD_HIST, "Couldn't open '%s'.", filename);
-        goto done;
-      }
-    }
+  if (!exit_streams)
+    return; /* Not initialized */
-    /* written yyyy-mm-dd HH:MM:SS (n s) */
-    if (fprintf(out, "written %s (%d s)\n", t, EXIT_STATS_INTERVAL_SEC) < 0)
+  statsdir = get_datadir_fname("stats");
+  if (check_private_dir(statsdir, CPD_CREATE) < 0)
+    goto done;
+  filename = get_datadir_fname("stats"PATH_SEPARATOR"exit-stats");
+  format_iso_time(t, now);
+  log_info(LD_HIST, "Writing exit port statistics to disk for period "
+           "ending at %s.", t);
+  if (!open_file) {
+    out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
+                                      0600, &open_file);
+    if (!out) {
+      log_warn(LD_HIST, "Couldn't open '%s'.", filename);
       goto done;
       goto done;
-    /* Count the total number of bytes, so that we can attribute all
-     * observations below a threshold of 1 / EXIT_STATS_THRESHOLD_RECIPROCAL
-     * of all bytes to a special port 'other'. */
-    total_bytes = 0;
-    for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) {
-      total_bytes += exit_bytes_read[i];
-      total_bytes += exit_bytes_written[i];
-    threshold_bytes = total_bytes / EXIT_STATS_THRESHOLD_RECIPROCAL;
-    /* kibibytes-(read|written) port=kibibytes,.. */
-    for (r = 0; r < 2; r++) {
-      b = r ? exit_bytes_read : exit_bytes_written;
-      tor_assert(b);
-      if (fprintf(out, "%s ",
-                  r ? "kibibytes-read" : "kibibytes-written")<0)
-        goto done;
-      comma = 0;
-      other_bytes = 0;
-      for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) {
-        if (b[i] > 0) {
-          if (exit_bytes_read[i] + exit_bytes_written[i] > threshold_bytes) {
-            uint64_t num = round_uint64_to_next_multiple_of(b[i],
-                                                EXIT_STATS_ROUND_UP_BYTES);
-            num /= 1024;
-            if (fprintf(out, "%s%d="U64_FORMAT,
-                        comma++ ? "," : "", i,
-                        U64_PRINTF_ARG(num)) < 0)
-              goto done;
-          } else
-            other_bytes += b[i];
-        }
-      }
-      other_bytes = round_uint64_to_next_multiple_of(other_bytes,
-                                         EXIT_STATS_ROUND_UP_BYTES);
-      other_bytes /= 1024;
-      if (fprintf(out, "%sother="U64_FORMAT"\n",
-                  comma ? "," : "", U64_PRINTF_ARG(other_bytes))<0)
-        goto done;
-    }
-    /* streams-opened port=num,.. */
-    if (fprintf(out, "streams-opened ")<0)
+  }
+  /* written yyyy-mm-dd HH:MM:SS (n s) */
+  if (fprintf(out, "exit-stats-end %s (%d s)\n", t,
+              (unsigned) (now - start_of_exit_stats_interval)) < 0)
+    goto done;
+  /* Count the total number of bytes, so that we can attribute all
+   * observations below a threshold of 1 / EXIT_STATS_THRESHOLD_RECIPROCAL
+   * of all bytes to a special port 'other'. */
+  total_bytes = 0;
+  for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) {
+    total_bytes += exit_bytes_read[i];
+    total_bytes += exit_bytes_written[i];
+  }
+  threshold_bytes = total_bytes / EXIT_STATS_THRESHOLD_RECIPROCAL;
+  /* exit-kibibytes-(read|written) port=kibibytes,.. */
+  for (r = 0; r < 2; r++) {
+    b = r ? exit_bytes_read : exit_bytes_written;
+    tor_assert(b);
+    if (fprintf(out, "%s ",
+                r ? "exit-kibibytes-read"
+                  : "exit-kibibytes-written") < 0)
       goto done;
       goto done;
     comma = 0;
     comma = 0;
-    other_streams = 0;
+    other_bytes = 0;
     for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) {
     for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) {
-      if (exit_streams[i] > 0) {
+      if (b[i] > 0) {
         if (exit_bytes_read[i] + exit_bytes_written[i] > threshold_bytes) {
         if (exit_bytes_read[i] + exit_bytes_written[i] > threshold_bytes) {
-          uint32_t num = round_uint32_to_next_multiple_of(exit_streams[i],
-                                              EXIT_STATS_ROUND_UP_STREAMS);
-          if (fprintf(out, "%s%d=%u",
-                      comma++ ? "," : "", i, num)<0)
+          uint64_t num = round_uint64_to_next_multiple_of(b[i],
+                                              EXIT_STATS_ROUND_UP_BYTES);
+          num /= 1024;
+          if (fprintf(out, "%s%d="U64_FORMAT,
+                      comma++ ? "," : "", i,
+                      U64_PRINTF_ARG(num)) < 0)
             goto done;
             goto done;
         } else
         } else
-          other_streams += exit_streams[i];
+          other_bytes += b[i];
-    other_streams = round_uint32_to_next_multiple_of(other_streams,
-                                         EXIT_STATS_ROUND_UP_STREAMS);
-    if (fprintf(out, "%sother=%u\n",
-                comma ? "," : "", other_streams)<0)
+    other_bytes = round_uint64_to_next_multiple_of(other_bytes,
+                                       EXIT_STATS_ROUND_UP_BYTES);
+    other_bytes /= 1024;
+    if (fprintf(out, "%sother="U64_FORMAT"\n",
+                comma ? "," : "", U64_PRINTF_ARG(other_bytes))<0)
       goto done;
       goto done;
-    /* Reset counters */
-    memset(exit_bytes_read, 0, sizeof(exit_bytes_read));
-    memset(exit_bytes_written, 0, sizeof(exit_bytes_written));
-    memset(exit_streams, 0, sizeof(exit_streams));
-    end_of_current_exit_stats_period += EXIT_STATS_INTERVAL_SEC;
+  /* exit-streams-opened port=num,.. */
+  if (fprintf(out, "exit-streams-opened ") < 0)
+    goto done;
+  comma = 0;
+  other_streams = 0;
+  for (i = 1; i < EXIT_STATS_NUM_PORTS; i++) {
+    if (exit_streams[i] > 0) {
+      if (exit_bytes_read[i] + exit_bytes_written[i] > threshold_bytes) {
+        uint32_t num = round_uint32_to_next_multiple_of(exit_streams[i],
+                                            EXIT_STATS_ROUND_UP_STREAMS);
+        if (fprintf(out, "%s%d=%u",
+                    comma++ ? "," : "", i, num)<0)
+          goto done;
+      } else
+        other_streams += exit_streams[i];
+    }
+  }
+  other_streams = round_uint32_to_next_multiple_of(other_streams,
+                                       EXIT_STATS_ROUND_UP_STREAMS);
+  if (fprintf(out, "%sother=%u\n",
+              comma ? "," : "", other_streams)<0)
+    goto done;
+  /* Reset counters */
+  memset(exit_bytes_read, 0, sizeof(exit_bytes_read));
+  memset(exit_bytes_written, 0, sizeof(exit_bytes_written));
+  memset(exit_streams, 0, sizeof(exit_streams));
+  start_of_exit_stats_interval = now;
   if (open_file)
   if (open_file)
@@ -1457,63 +1473,48 @@ write_exit_stats(time_t when)
   if (open_file)
   if (open_file)
-/** Prepare to add an exit stats observation at second <b>when</b> by
- * checking whether this observation lies in the current observation
- * period; if not, shift the current period forward by one until the
- * reported event fits it and write all results in between to disk. */
-static void
-add_exit_obs(time_t when)
-  if (when > end_of_current_exit_stats_period) {
-    if (end_of_current_exit_stats_period)
-      write_exit_stats(when);
-    else
-      end_of_current_exit_stats_period = when + EXIT_STATS_INTERVAL_SEC;
-  }
+  tor_free(statsdir);
 /** Note that we wrote <b>num_bytes</b> to an exit connection to
 /** Note that we wrote <b>num_bytes</b> to an exit connection to
- * <b>port</b> in second <b>when</b>. */
+ * <b>port</b>. */
-rep_hist_note_exit_bytes_written(uint16_t port, size_t num_bytes,
-                                 time_t when)
+rep_hist_note_exit_bytes_written(uint16_t port, size_t num_bytes)
   if (!get_options()->ExitPortStatistics)
   if (!get_options()->ExitPortStatistics)
-  add_exit_obs(when);
+  if (!exit_bytes_written)
+    return; /* Not initialized */
   exit_bytes_written[port] += num_bytes;
   exit_bytes_written[port] += num_bytes;
   log_debug(LD_HIST, "Written %lu bytes to exit connection to port %d.",
   log_debug(LD_HIST, "Written %lu bytes to exit connection to port %d.",
             (unsigned long)num_bytes, port);
             (unsigned long)num_bytes, port);
 /** Note that we read <b>num_bytes</b> from an exit connection to
 /** Note that we read <b>num_bytes</b> from an exit connection to
- * <b>port</b> in second <b>when</b>. */
+ * <b>port</b>. */
-rep_hist_note_exit_bytes_read(uint16_t port, size_t num_bytes,
-                              time_t when)
+rep_hist_note_exit_bytes_read(uint16_t port, size_t num_bytes)
   if (!get_options()->ExitPortStatistics)
   if (!get_options()->ExitPortStatistics)
-  add_exit_obs(when);
+  if (!exit_bytes_read)
+    return; /* Not initialized */
   exit_bytes_read[port] += num_bytes;
   exit_bytes_read[port] += num_bytes;
   log_debug(LD_HIST, "Read %lu bytes from exit connection to port %d.",
   log_debug(LD_HIST, "Read %lu bytes from exit connection to port %d.",
             (unsigned long)num_bytes, port);
             (unsigned long)num_bytes, port);
-/** Note that we opened an exit stream to <b>port</b> in second
- * <b>when</b>. */
+/** Note that we opened an exit stream to <b>port</b>. */
-rep_hist_note_exit_stream_opened(uint16_t port, time_t when)
+rep_hist_note_exit_stream_opened(uint16_t port)
   if (!get_options()->ExitPortStatistics)
   if (!get_options()->ExitPortStatistics)
-  add_exit_obs(when);
+  if (!exit_streams)
+    return; /* Not initialized */
   log_debug(LD_HIST, "Opened exit stream to port %d", port);
   log_debug(LD_HIST, "Opened exit stream to port %d", port);
 /** Helper: Return the largest value in b->maxima.  (This is equal to the
 /** Helper: Return the largest value in b->maxima.  (This is equal to the
  * most bandwidth used in any NUM_SECS_ROLLING_MEASURE period for the last
  * most bandwidth used in any NUM_SECS_ROLLING_MEASURE period for the last
@@ -2049,6 +2050,9 @@ rep_hist_free_all(void)
+  tor_free(exit_bytes_read);
+  tor_free(exit_bytes_written);
+  tor_free(exit_streams);
   built_last_stability_doc_at = 0;
   built_last_stability_doc_at = 0;
@@ -2603,9 +2607,15 @@ hs_usage_write_statistics_to_file(time_t now)
 /*** cell statistics ***/
 /*** cell statistics ***/
 /** Start of the current buffer stats interval. */
 /** Start of the current buffer stats interval. */
-time_t start_of_buffer_stats_interval;
+static time_t start_of_buffer_stats_interval;
+/** Initialize buffer stats. */
+rep_hist_buffer_stats_init(time_t now)
+  start_of_buffer_stats_interval = now;
 typedef struct circ_buffer_stats_t {
 typedef struct circ_buffer_stats_t {
   uint32_t processed_cells;
   uint32_t processed_cells;
@@ -2621,7 +2631,7 @@ smartlist_t *circuits_for_buffer_stats = NULL;
  * <b>end_of_interval</b> and reset cell counters in case the circuit
  * <b>end_of_interval</b> and reset cell counters in case the circuit
  * remains open in the next measurement interval. */
  * remains open in the next measurement interval. */
-add_circ_to_buffer_stats(circuit_t *circ, time_t end_of_interval)
+rep_hist_buffer_stats_add_circ(circuit_t *circ, time_t end_of_interval)
   circ_buffer_stats_t *stat;
   circ_buffer_stats_t *stat;
   time_t start_of_interval;
   time_t start_of_interval;
@@ -2667,12 +2677,11 @@ _buffer_stats_compare_entries(const void **_a, const void **_b)
     return 0;
     return 0;
-/** Append buffer statistics to local file. */
+/** Write buffer statistics to $DATADIR/stats/buffer-stats. */
+rep_hist_buffer_stats_write(time_t now)
-  time_t now = time(NULL);
-  char *filename;
+  char *statsdir = NULL, *filename = NULL;
   char written[ISO_TIME_LEN+1];
   char written[ISO_TIME_LEN+1];
   open_file_t *open_file = NULL;
   open_file_t *open_file = NULL;
   FILE *out;
   FILE *out;
@@ -2686,7 +2695,7 @@ dump_buffer_stats(void)
   circuit_t *circ;
   circuit_t *circ;
   /* add current circuits to stats */
   /* add current circuits to stats */
   for (circ = _circuit_get_global_list(); circ; circ = circ->next)
   for (circ = _circuit_get_global_list(); circ; circ = circ->next)
-    add_circ_to_buffer_stats(circ, now);
+    rep_hist_buffer_stats_add_circ(circ, now);
   /* calculate deciles */
   /* calculate deciles */
   memset(processed_cells, 0, SHARES * sizeof(int));
   memset(processed_cells, 0, SHARES * sizeof(int));
   memset(circs_in_share, 0, SHARES * sizeof(int));
   memset(circs_in_share, 0, SHARES * sizeof(int));
@@ -2711,14 +2720,17 @@ dump_buffer_stats(void)
       stat, tor_free(stat));
       stat, tor_free(stat));
   /* write to file */
   /* write to file */
-  filename = get_datadir_fname("buffer-stats");
+  statsdir = get_datadir_fname("stats");
+  if (check_private_dir(statsdir, CPD_CREATE) < 0)
+    goto done;
+  filename = get_datadir_fname("stats"PATH_SEPARATOR"buffer-stats");
   out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
   out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
                                     0600, &open_file);
                                     0600, &open_file);
   if (!out)
   if (!out)
     goto done;
     goto done;
   format_iso_time(written, now);
   format_iso_time(written, now);
-  if (fprintf(out, "written %s (%d s)\n", written,
-              DUMP_BUFFER_STATS_INTERVAL) < 0)
+  if (fprintf(out, "cell-stats-end %s (%d s)\n", written,
+              (unsigned) (now - start_of_buffer_stats_interval)) < 0)
     goto done;
     goto done;
   for (i = 0; i < SHARES; i++) {
   for (i = 0; i < SHARES; i++) {
     tor_snprintf(buf, sizeof(buf), "%d", !circs_in_share[i] ? 0 :
     tor_snprintf(buf, sizeof(buf), "%d", !circs_in_share[i] ? 0 :
@@ -2726,7 +2738,7 @@ dump_buffer_stats(void)
     smartlist_add(str_build, tor_strdup(buf));
     smartlist_add(str_build, tor_strdup(buf));
   str = smartlist_join_strings(str_build, ",", 0, NULL);
   str = smartlist_join_strings(str_build, ",", 0, NULL);
-  if (fprintf(out, "processed-cells %s\n", str) < 0)
+  if (fprintf(out, "cell-processed-cells %s\n", str) < 0)
     goto done;
     goto done;
   SMARTLIST_FOREACH(str_build, char *, c, tor_free(c));
   SMARTLIST_FOREACH(str_build, char *, c, tor_free(c));
@@ -2737,7 +2749,7 @@ dump_buffer_stats(void)
     smartlist_add(str_build, tor_strdup(buf));
     smartlist_add(str_build, tor_strdup(buf));
   str = smartlist_join_strings(str_build, ",", 0, NULL);
   str = smartlist_join_strings(str_build, ",", 0, NULL);
-  if (fprintf(out, "queued-cells %s\n", str) < 0)
+  if (fprintf(out, "cell-queued-cells %s\n", str) < 0)
     goto done;
     goto done;
   SMARTLIST_FOREACH(str_build, char *, c, tor_free(c));
   SMARTLIST_FOREACH(str_build, char *, c, tor_free(c));
@@ -2748,13 +2760,13 @@ dump_buffer_stats(void)
     smartlist_add(str_build, tor_strdup(buf));
     smartlist_add(str_build, tor_strdup(buf));
   str = smartlist_join_strings(str_build, ",", 0, NULL);
   str = smartlist_join_strings(str_build, ",", 0, NULL);
-  if (fprintf(out, "time-in-queue %s\n", str) < 0)
+  if (fprintf(out, "cell-time-in-queue %s\n", str) < 0)
     goto done;
     goto done;
   SMARTLIST_FOREACH(str_build, char *, c, tor_free(c));
   SMARTLIST_FOREACH(str_build, char *, c, tor_free(c));
   str_build = NULL;
   str_build = NULL;
-  if (fprintf(out, "number-of-circuits-per-share %d\n",
+  if (fprintf(out, "cell-circuits-per-decile %d\n",
               (number_of_circuits + SHARES - 1) / SHARES) < 0)
               (number_of_circuits + SHARES - 1) / SHARES) < 0)
     goto done;
     goto done;
@@ -2763,6 +2775,7 @@ dump_buffer_stats(void)
   if (open_file)
   if (open_file)
+  tor_free(statsdir);
   if (str_build) {
   if (str_build) {
     SMARTLIST_FOREACH(str_build, char *, c, tor_free(c));
     SMARTLIST_FOREACH(str_build, char *, c, tor_free(c));
@@ -2770,5 +2783,4 @@ dump_buffer_stats(void)
 #undef SHARES
 #undef SHARES

+ 134 - 13

@@ -1269,6 +1269,7 @@ router_rebuild_descriptor(int force)
   uint32_t addr;
   uint32_t addr;
   char platform[256];
   char platform[256];
   int hibernating = we_are_hibernating();
   int hibernating = we_are_hibernating();
+  size_t ei_size;
   or_options_t *options = get_options();
   or_options_t *options = get_options();
   if (desc_clean_since && !force)
   if (desc_clean_since && !force)
@@ -1382,9 +1383,10 @@ router_rebuild_descriptor(int force)
   ei->cache_info.published_on = ri->cache_info.published_on;
   ei->cache_info.published_on = ri->cache_info.published_on;
   memcpy(ei->cache_info.identity_digest, ri->cache_info.identity_digest,
   memcpy(ei->cache_info.identity_digest, ri->cache_info.identity_digest,
-  ei->cache_info.signed_descriptor_body = tor_malloc(8192);
-  if (extrainfo_dump_to_string(ei->cache_info.signed_descriptor_body, 8192,
-                               ei, get_identity_key()) < 0) {
+  ei_size = options->ExtraInfoStatistics ? MAX_EXTRAINFO_UPLOAD_SIZE : 8192;
+  ei->cache_info.signed_descriptor_body = tor_malloc(ei_size);
+  if (extrainfo_dump_to_string(ei->cache_info.signed_descriptor_body,
+                               ei_size, ei, get_identity_key()) < 0) {
     log_warn(LD_BUG, "Couldn't generate extra-info descriptor.");
     log_warn(LD_BUG, "Couldn't generate extra-info descriptor.");
     return -1;
     return -1;
@@ -1822,6 +1824,57 @@ router_dump_router_to_string(char *s, size_t maxlen, routerinfo_t *router,
   return (int)written+1;
   return (int)written+1;
+/** Load the contents of <b>filename</b>, find the last line starting with
+ * <b>end_line</b>, ensure that its timestamp is not more than 25 hours in
+ * the past or more than 1 hour in the future with respect to <b>now</b>,
+ * and write the file contents starting with that line to **<b>out</b>.
+ * Return 1 for success, 0 if the file does not exist or does not contain
+ * a line matching these criteria, or -1 for failure. */
+static int
+load_stats_file(const char *filename, const char *end_line, time_t now,
+                char **out)
+  int r = -1;
+  char *fname = get_datadir_fname(filename);
+  char *contents, *start = NULL, *tmp, timestr[ISO_TIME_LEN+1];
+  time_t written;
+  switch (file_status(fname)) {
+    case FN_FILE:
+      /* X022 Find an alternative to reading the whole file to memory. */
+      if ((contents = read_file_to_str(fname, 0, NULL))) {
+        tmp = strstr(contents, end_line);
+        /* Find last block starting with end_line */
+        while (tmp) {
+          start = tmp;
+          tmp = strstr(tmp + 1, end_line);
+        }
+        if (!start)
+          goto notfound;
+        if (strlen(start) < strlen(end_line) + 1 + sizeof(timestr))
+          goto notfound;
+        strlcpy(timestr, start + 1 + strlen(end_line), sizeof(timestr));
+        if (parse_iso_time(timestr, &written) < 0)
+          goto notfound;
+        if (written < now - (25*60*60) || written > now + (1*60*60))
+          goto notfound;
+        *out = tor_strdup(start);
+        r = 1;
+      }
+     notfound:
+      tor_free(contents);
+      break;
+    case FN_NOENT:
+      r = 0;
+      break;
+    case FN_ERROR:
+    case FN_DIR:
+    default:
+      break;
+  }
+  tor_free(fname);
+  return r;
 /** Write the contents of <b>extrainfo</b> to the <b>maxlen</b>-byte string
 /** Write the contents of <b>extrainfo</b> to the <b>maxlen</b>-byte string
  * <b>s</b>, signing them with <b>ident_key</b>.  Return 0 on success,
  * <b>s</b>, signing them with <b>ident_key</b>.  Return 0 on success,
  * negative on failure. */
  * negative on failure. */
@@ -1836,6 +1889,7 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
   char *bandwidth_usage;
   char *bandwidth_usage;
   int result;
   int result;
   size_t len;
   size_t len;
+  static int write_stats_to_extrainfo = 1;
   base16_encode(identity, sizeof(identity),
   base16_encode(identity, sizeof(identity),
                 extrainfo->cache_info.identity_digest, DIGEST_LEN);
                 extrainfo->cache_info.identity_digest, DIGEST_LEN);
@@ -1847,6 +1901,61 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
                         "published %s\n%s",
                         "published %s\n%s",
                         extrainfo->nickname, identity,
                         extrainfo->nickname, identity,
                         published, bandwidth_usage);
                         published, bandwidth_usage);
+  if (options->ExtraInfoStatistics && write_stats_to_extrainfo) {
+    char *contents = NULL;
+    time_t since = time(NULL) - (24*60*60);
+    log_info(LD_GENERAL, "Adding stats to extra-info descriptor.");
+    if (options->DirReqStatistics &&
+        load_stats_file("stats"PATH_SEPARATOR"dirreq-stats",
+                        "dirreq-stats-end", since, &contents) > 0) {
+      int pos = strlen(s);
+      if (strlcpy(s + pos, contents, maxlen - strlen(s)) !=
+          strlen(contents)) {
+        log_warn(LD_DIR, "Could not write dirreq-stats to extra-info "
+                 "descriptor.");
+        s[pos] = '\0';
+      }
+      tor_free(contents);
+    }
+    if (options->EntryStatistics &&
+        load_stats_file("stats"PATH_SEPARATOR"entry-stats",
+                        "entry-stats-end", since, &contents) > 0) {
+      int pos = strlen(s);
+      if (strlcpy(s + pos, contents, maxlen - strlen(s)) !=
+          strlen(contents)) {
+        log_warn(LD_DIR, "Could not write entry-stats to extra-info "
+                 "descriptor.");
+        s[pos] = '\0';
+      }
+      tor_free(contents);
+    }
+    if (options->CellStatistics &&
+        load_stats_file("stats"PATH_SEPARATOR"buffer-stats",
+                        "cell-stats-end", since, &contents) > 0) {
+      int pos = strlen(s);
+      if (strlcpy(s + pos, contents, maxlen - strlen(s)) !=
+          strlen(contents)) {
+        log_warn(LD_DIR, "Could not write buffer-stats to extra-info "
+                 "descriptor.");
+        s[pos] = '\0';
+      }
+      tor_free(contents);
+    }
+    if (options->ExitPortStatistics &&
+        load_stats_file("stats"PATH_SEPARATOR"exit-stats",
+                        "exit-stats-end", since, &contents) > 0) {
+      int pos = strlen(s);
+      if (strlcpy(s + pos, contents, maxlen - strlen(s)) !=
+          strlen(contents)) {
+        log_warn(LD_DIR, "Could not write exit-stats to extra-info "
+                 "descriptor.");
+        s[pos] = '\0';
+      }
+      tor_free(contents);
+    }
+  }
   if (result<0)
   if (result<0)
     return -1;
     return -1;
@@ -1875,7 +1984,6 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
   if (router_append_dirobj_signature(s+len, maxlen-len, digest, ident_key)<0)
   if (router_append_dirobj_signature(s+len, maxlen-len, digest, ident_key)<0)
     return -1;
     return -1;
     char *cp, *s_dup;
     char *cp, *s_dup;
     extrainfo_t *ei_tmp;
     extrainfo_t *ei_tmp;
@@ -1890,7 +1998,24 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
+  if (options->ExtraInfoStatistics && write_stats_to_extrainfo) {
+    char *cp, *s_dup;
+    extrainfo_t *ei_tmp;
+    cp = s_dup = tor_strdup(s);
+    ei_tmp = extrainfo_parse_entry_from_string(cp, NULL, 1, NULL);
+    if (!ei_tmp) {
+      log_warn(LD_GENERAL,
+               "We just generated an extra-info descriptor with "
+               "statistics that we can't parse. Not adding statistics to "
+               "this or any future extra-info descriptors. Descriptor "
+               "was:\n%s", s);
+      write_stats_to_extrainfo = 0;
+      extrainfo_dump_to_string(s, maxlen, extrainfo, ident_key);
+    }
+    tor_free(s_dup);
+    extrainfo_free(ei_tmp);
+  }
   return (int)strlen(s)+1;
   return (int)strlen(s)+1;
@@ -1905,13 +2030,9 @@ char *
 extrainfo_get_client_geoip_summary(time_t now)
 extrainfo_get_client_geoip_summary(time_t now)
   static time_t last_purged_at = 0;
   static time_t last_purged_at = 0;
-  int geoip_purge_interval = 48*60*60;
-  geoip_purge_interval = DIR_RECORD_USAGE_RETAIN_IPS;
-  geoip_purge_interval = ENTRY_RECORD_USAGE_RETAIN_IPS;
+  int geoip_purge_interval =
+      (get_options()->DirReqStatistics || get_options()->EntryStatistics) ?
   if (now > last_purged_at+geoip_purge_interval) {
   if (now > last_purged_at+geoip_purge_interval) {
     /* (Note that this also discards items in the client history with
     /* (Note that this also discards items in the client history with
      * action GEOIP_CLIENT_NETWORKSTATUS{_V2}, which doesn't matter
      * action GEOIP_CLIENT_NETWORKSTATUS{_V2}, which doesn't matter
@@ -1920,7 +2041,7 @@ extrainfo_get_client_geoip_summary(time_t now)
     last_purged_at = now;
     last_purged_at = now;
-  return geoip_get_client_history(now, GEOIP_CLIENT_CONNECT);
+  return geoip_get_client_history_bridge(now, GEOIP_CLIENT_CONNECT);
 /** Return true iff <b>s</b> is a legally valid server nickname. */
 /** Return true iff <b>s</b> is a legally valid server nickname. */

+ 50 - 0

@@ -62,6 +62,31 @@ typedef enum {
@@ -257,6 +282,31 @@ static token_rule_t extrainfo_token_table[] = {
   T0N("opt",                 K_OPT,             CONCAT_ARGS, OBJ_OK ),
   T0N("opt",                 K_OPT,             CONCAT_ARGS, OBJ_OK ),
   T01("read-history",        K_READ_HISTORY,        ARGS,    NO_OBJ ),
   T01("read-history",        K_READ_HISTORY,        ARGS,    NO_OBJ ),
   T01("write-history",       K_WRITE_HISTORY,       ARGS,    NO_OBJ ),
   T01("write-history",       K_WRITE_HISTORY,       ARGS,    NO_OBJ ),
+  T01("dirreq-stats-end",    K_DIRREQ_END,          ARGS,    NO_OBJ ),
+  T01("dirreq-v2-ips",       K_DIRREQ_V2_IPS,       ARGS,    NO_OBJ ),
+  T01("dirreq-v3-ips",       K_DIRREQ_V3_IPS,       ARGS,    NO_OBJ ),
+  T01("dirreq-v2-reqs",      K_DIRREQ_V2_REQS,      ARGS,    NO_OBJ ),
+  T01("dirreq-v3-reqs",      K_DIRREQ_V3_REQS,      ARGS,    NO_OBJ ),
+  T01("dirreq-v2-share",     K_DIRREQ_V2_SHARE,     ARGS,    NO_OBJ ),
+  T01("dirreq-v3-share",     K_DIRREQ_V3_SHARE,     ARGS,    NO_OBJ ),
+  T01("dirreq-v2-resp",      K_DIRREQ_V2_RESP,      ARGS,    NO_OBJ ),
+  T01("dirreq-v3-resp",      K_DIRREQ_V3_RESP,      ARGS,    NO_OBJ ),
+  T01("dirreq-v2-direct-dl", K_DIRREQ_V2_DIR,       ARGS,    NO_OBJ ),
+  T01("dirreq-v3-direct-dl", K_DIRREQ_V3_DIR,       ARGS,    NO_OBJ ),
+  T01("dirreq-v2-tunneled-dl", K_DIRREQ_V2_TUN,     ARGS,    NO_OBJ ),
+  T01("dirreq-v3-tunneled-dl", K_DIRREQ_V3_TUN,     ARGS,    NO_OBJ ),
+  T01("entry-stats-end",     K_ENTRY_END,           ARGS,    NO_OBJ ),
+  T01("entry-ips",           K_ENTRY_IPS,           ARGS,    NO_OBJ ),
+  T01("cell-stats-end",      K_CELL_END,            ARGS,    NO_OBJ ),
+  T01("cell-processed-cells", K_CELL_PROCESSED,     ARGS,    NO_OBJ ),
+  T01("cell-queued-cells",   K_CELL_QUEUED,         ARGS,    NO_OBJ ),
+  T01("cell-time-in-queue",  K_CELL_TIME,           ARGS,    NO_OBJ ),
+  T01("cell-circuits-per-decile", K_CELL_CIRCS,     ARGS,    NO_OBJ ),
+  T01("exit-stats-end",      K_EXIT_END,            ARGS,    NO_OBJ ),
+  T01("exit-kibibytes-written", K_EXIT_WRITTEN,     ARGS,    NO_OBJ ),
+  T01("exit-kibibytes-read", K_EXIT_READ,           ARGS,    NO_OBJ ),
+  T01("exit-streams-opened", K_EXIT_OPENED,         ARGS,    NO_OBJ ),
   T1_START( "extra-info",          K_EXTRA_INFO,          GE(2),   NO_OBJ ),
   T1_START( "extra-info",          K_EXTRA_INFO,          GE(2),   NO_OBJ ),

+ 4 - 2

@@ -4774,14 +4774,16 @@ test_geoip(void)
   /* and 17 observations in ZZ... */
   /* and 17 observations in ZZ... */
   for (i=110; i < 127; ++i)
   for (i=110; i < 127; ++i)
     geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now);
     geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now);
-  s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
+  s = geoip_get_client_history_bridge(now+5*24*60*60,
+                                      GEOIP_CLIENT_CONNECT);
   test_streq("zz=24,ab=16,xy=8", s);
   test_streq("zz=24,ab=16,xy=8", s);
   /* Now clear out all the AB observations. */
   /* Now clear out all the AB observations. */
-  s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
+  s = geoip_get_client_history_bridge(now+5*24*60*60,
+                                      GEOIP_CLIENT_CONNECT);
   test_streq("zz=24,xy=8", s);
   test_streq("zz=24,xy=8", s);