Browse Source

New code to implement proposal for local geoip stats. Only enabled with --enable-geoip-stats passed to configure.

svn:r14802
Nick Mathewson 17 years ago
parent
commit
ac330d9ba7
12 changed files with 137 additions and 24 deletions
  1. 3 0
      ChangeLog
  2. 7 0
      configure.in
  3. 3 3
      doc/TODO
  4. 1 3
      src/common/util.c
  5. 3 0
      src/or/config.c
  6. 1 1
      src/or/connection_or.c
  7. 20 0
      src/or/directory.c
  8. 63 6
      src/or/geoip.c
  9. 7 0
      src/or/main.c
  10. 20 2
      src/or/or.h
  11. 1 1
      src/or/router.c
  12. 8 8
      src/or/test.c

+ 3 - 0
ChangeLog

@@ -104,6 +104,9 @@ Changes in version 0.2.1.1-alpha - 2008-??-??
       Robert Hogan. Fixes the first part of bug 681.
       Robert Hogan. Fixes the first part of bug 681.
     - Make bridge authorities never serve extrainfo docs.
     - Make bridge authorities never serve extrainfo docs.
     - Allow comments in geoip file.
     - Allow comments in geoip file.
+    - New configure/torrc options (--enable-geoip-stats,
+      DirRecordUsageByCountry) to record how many IPs we've served directory
+      info to in each country code.
 
 
   o Minor features (security):
   o Minor features (security):
     - Reject requests for reverse-dns lookup of names in a private
     - Reject requests for reverse-dns lookup of names in a private

+ 7 - 0
configure.in

@@ -87,6 +87,13 @@ case $host in
      ;;
      ;;
 esac
 esac
 
 
+AC_ARG_ENABLE(geoip-stats,
+     AS_HELP_STRING(--enable-geoip-stats, enable code for directories to collect per-country statistics))
+
+if test "$enable_geoip_stats" = "yes"; then
+  AC_DEFINE(ENABLE_GEOIP_STATS, 1, [Defined if we try to collect per-country statistics])
+fi
+
 AC_ARG_ENABLE(gcc-warnings,
 AC_ARG_ENABLE(gcc-warnings,
      AS_HELP_STRING(--enable-gcc-warnings, enable verbose warnings))
      AS_HELP_STRING(--enable-gcc-warnings, enable verbose warnings))
 
 

+ 3 - 3
doc/TODO

@@ -289,10 +289,10 @@ Bugs/issues for Tor 0.2.0.x:
       too much.
       too much.
   o teach geoip_parse_entry() to skip over lines that start with #, so we
   o teach geoip_parse_entry() to skip over lines that start with #, so we
     can put a little note at the top of the geoip file to say what it is.
     can put a little note at the top of the geoip file to say what it is.
-N d we should have an off-by-default way for relays to dump geoip data to
+  . we should have an off-by-default way for relays to dump geoip data to
     a file in their data directory, for measurement purposes.
     a file in their data directory, for measurement purposes.
-    - Basic implementation
+    o Basic implementation
-    - Include probability-of-selection
+N   - Include probability-of-selection
 R d let bridges set relaybandwidthrate as low as 5kb
 R d let bridges set relaybandwidthrate as low as 5kb
 R - bug: if we launch using bridges, and then stop using bridges, we
 R - bug: if we launch using bridges, and then stop using bridges, we
     still have our bridges in our entryguards section, and may use them.
     still have our bridges in our entryguards section, and may use them.

+ 1 - 3
src/common/util.c

@@ -1559,7 +1559,6 @@ start_writing_to_file(const char *fname, int open_flags, int mode,
   tor_assert((open_flags & (O_BINARY|O_TEXT)) != 0);
   tor_assert((open_flags & (O_BINARY|O_TEXT)) != 0);
 #endif
 #endif
   new_file->fd = -1;
   new_file->fd = -1;
-  tempname_len = strlen(fname)+16;
   tor_assert(tempname_len > strlen(fname)); /*check for overflow*/
   tor_assert(tempname_len > strlen(fname)); /*check for overflow*/
   new_file->filename = tor_strdup(fname);
   new_file->filename = tor_strdup(fname);
   if (open_flags & O_APPEND) {
   if (open_flags & O_APPEND) {
@@ -1577,8 +1576,7 @@ start_writing_to_file(const char *fname, int open_flags, int mode,
     new_file->rename_on_close = 1;
     new_file->rename_on_close = 1;
   }
   }
 
 
-  if ((new_file->fd = open(open_name, open_flags, mode))
+  if ((new_file->fd = open(open_name, open_flags, mode)) < 0) {
-      < 0) {
     log(LOG_WARN, LD_FS, "Couldn't open \"%s\" (%s) for writing: %s",
     log(LOG_WARN, LD_FS, "Couldn't open \"%s\" (%s) for writing: %s",
         open_name, fname, strerror(errno));
         open_name, fname, strerror(errno));
     goto err;
     goto err;

+ 3 - 0
src/or/config.c

@@ -179,6 +179,9 @@ static config_var_t _option_vars[] = {
   V(DirPolicy,                   LINELIST, NULL),
   V(DirPolicy,                   LINELIST, NULL),
   V(DirPort,                     UINT,     "0"),
   V(DirPort,                     UINT,     "0"),
   OBSOLETE("DirPostPeriod"),
   OBSOLETE("DirPostPeriod"),
+#ifdef ENABLE_GEOIP_STATS
+  V(DirRecordUsageByCountry,     BOOL,     "0"),
+#endif
   VAR("DirServer",               LINELIST, DirServers, NULL),
   VAR("DirServer",               LINELIST, DirServers, NULL),
   V(DNSPort,                     UINT,     "0"),
   V(DNSPort,                     UINT,     "0"),
   V(DNSListenAddress,            LINELIST, NULL),
   V(DNSListenAddress,            LINELIST, NULL),

+ 1 - 1
src/or/connection_or.c

@@ -901,7 +901,7 @@ connection_or_set_state_open(or_connection_t *conn)
   } else {
   } else {
     /* only report it to the geoip module if it's not a known router */
     /* only report it to the geoip module if it's not a known router */
     if (!router_get_by_digest(conn->identity_digest))
     if (!router_get_by_digest(conn->identity_digest))
-      geoip_note_client_seen(TO_CONN(conn)->addr, now);
+      geoip_note_client_seen(GEOIP_CLIENT_CONNECT, TO_CONN(conn)->addr, now);
   }
   }
   if (conn->handshake_state) {
   if (conn->handshake_state) {
     or_handshake_state_free(conn->handshake_state);
     or_handshake_state_free(conn->handshake_state);

+ 20 - 0
src/or/directory.c

@@ -2484,6 +2484,26 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
       goto done;
       goto done;
     }
     }
 
 
+#ifdef ENABLE_GEOIP_STATS
+    {
+      geoip_client_action_t act =
+        is_v3 ? GEOIP_CLIENT_NETWORKSTATUS : GEOIP_CLIENT_NETWORKSTATUS_V2;
+      uint32_t addr = conn->_base.addr;
+
+      if (conn->_base.linked_conn) {
+        connection_t *c = conn->_base.linked_conn;
+        if (c->type == CONN_TYPE_EXIT) {
+          circuit_t *circ = TO_EDGE_CONN(c)->on_circuit;
+          if (! CIRCUIT_IS_ORIGIN(circ)) {
+            or_connection_t *orconn = TO_OR_CIRCUIT(circ)->p_conn;
+            addr = orconn->_base.addr;
+          }
+        }
+      }
+      geoip_note_client_seen(act, addr, time(NULL));
+    }
+#endif
+
     // note_request(request_type,dlen);
     // note_request(request_type,dlen);
     (void) request_type;
     (void) request_type;
     write_http_response_header(conn, -1, compressed,
     write_http_response_header(conn, -1, compressed,

+ 63 - 6
src/or/geoip.c

@@ -131,7 +131,7 @@ _geoip_compare_key_to_entry(const void *_key, const void **_member)
  *   "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
  *   "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
  * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
  * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
  * integers, and CC is a country code.
  * integers, and CC is a country code.
- * 
+ *
  * It also recognizes, and skips over, blank lines and lines that start
  * It also recognizes, and skips over, blank lines and lines that start
  * with '#' (comments).
  * with '#' (comments).
  */
  */
@@ -208,9 +208,12 @@ geoip_is_loaded(void)
 typedef struct clientmap_entry_t {
 typedef struct clientmap_entry_t {
   HT_ENTRY(clientmap_entry_t) node;
   HT_ENTRY(clientmap_entry_t) node;
   uint32_t ipaddr;
   uint32_t ipaddr;
-  time_t last_seen;
+  time_t last_seen; /* The last 2 bits of this value hold the client
+                     * operation. */
 } clientmap_entry_t;
 } clientmap_entry_t;
 
 
+#define ACTION_MASK 3
+
 /** Map from client IP address to last time seen. */
 /** Map from client IP address to last time seen. */
 static HT_HEAD(clientmap, clientmap_entry_t) client_history =
 static HT_HEAD(clientmap, clientmap_entry_t) client_history =
      HT_INITIALIZER();
      HT_INITIALIZER();
@@ -238,12 +241,28 @@ HT_GENERATE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
 /** Note that we've seen a client connect from the IP <b>addr</b> (host order)
 /** Note that we've seen a client connect from the IP <b>addr</b> (host order)
  * at time <b>now</b>. Ignored by all but bridges. */
  * at time <b>now</b>. Ignored by all but bridges. */
 void
 void
-geoip_note_client_seen(uint32_t addr, time_t now)
+geoip_note_client_seen(geoip_client_action_t action,
+                       uint32_t addr, time_t now)
 {
 {
   or_options_t *options = get_options();
   or_options_t *options = get_options();
   clientmap_entry_t lookup, *ent;
   clientmap_entry_t lookup, *ent;
-  if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
+  if (action == GEOIP_CLIENT_CONNECT) {
+    if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
+      return;
+  } else {
+#ifndef ENABLE_GEOIP_STATS
     return;
     return;
+#else
+    if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
+        !options->DirRecordUsageByCountry)
+      return;
+#endif
+  }
+
+  /* We use the low 3 bits of the time to encode the action. Since we're
+   * potentially remembering times of clients, we don't want to make
+   * clientmap_entry_t larger than it has to be. */
+  now = (now & ~ACTION_MASK) | (((int)action) & ACTION_MASK);
   lookup.ipaddr = addr;
   lookup.ipaddr = addr;
   ent = HT_FIND(clientmap, &client_history, &lookup);
   ent = HT_FIND(clientmap, &client_history, &lookup);
   if (ent) {
   if (ent) {
@@ -328,7 +347,7 @@ _c_hist_compare(const void **_a, const void **_b)
  * that country, and cc is a lowercased country code.  Returns NULL if we don't
  * that country, and cc is a lowercased country code.  Returns NULL if we don't
  * want to export geoip data yet. */
  * want to export geoip data yet. */
 char *
 char *
-geoip_get_client_history(time_t now)
+geoip_get_client_history(time_t now, geoip_client_action_t action)
 {
 {
   char *result = NULL;
   char *result = NULL;
   if (!geoip_is_loaded())
   if (!geoip_is_loaded())
@@ -343,7 +362,10 @@ geoip_get_client_history(time_t now)
     unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
     unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
     unsigned total = 0;
     unsigned total = 0;
     HT_FOREACH(ent, clientmap, &client_history) {
     HT_FOREACH(ent, clientmap, &client_history) {
-      int country = geoip_get_country_by_ip((*ent)->ipaddr);
+      int country;
+      if (((*ent)->last_seen & ACTION_MASK) != action)
+        continue;
+      country = geoip_get_country_by_ip((*ent)->ipaddr);
       if (country < 0)
       if (country < 0)
         continue;
         continue;
       tor_assert(0 <= country && country < n_countries);
       tor_assert(0 <= country && country < n_countries);
@@ -404,6 +426,41 @@ geoip_get_client_history(time_t now)
   return result;
   return result;
 }
 }
 
 
+void
+dump_geoip_stats(void)
+{
+#ifdef ENABLE_GEOIP_STATS
+  time_t now = time(NULL);
+  char *filename = get_datadir_fname("geoip-stats");
+  char *data_v2 = NULL, *data_v3 = NULL;
+  char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
+  open_file_t *open_file = NULL;
+  FILE *out;
+
+  data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
+  data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
+  format_iso_time(since, geoip_get_history_start());
+  format_iso_time(written, now);
+  if (!data_v2 || !data_v3)
+    goto done;
+  out = start_writing_to_stdio_file(filename, 0, 0600, &open_file);
+  if (!out)
+    goto done;
+  if (fprintf(out, "written %s\nstarted-at %s\nns %s\nns-v2%s\n",
+              written, since, data_v3, data_v2) < 0)
+    goto done;
+
+  finish_writing_to_file(open_file);
+  open_file = NULL;
+ done:
+  if (open_file)
+    abort_writing_to_file(open_file);
+  tor_free(filename);
+  tor_free(data_v2);
+  tor_free(data_v3);
+#endif
+}
+
 /** Helper used to implement GETINFO ip-to-country/... controller command. */
 /** Helper used to implement GETINFO ip-to-country/... controller command. */
 int
 int
 getinfo_helper_geoip(control_connection_t *control_conn,
 getinfo_helper_geoip(control_connection_t *control_conn,

+ 7 - 0
src/or/main.c

@@ -832,6 +832,7 @@ run_scheduled_events(time_t now)
   static time_t time_to_clean_caches = 0;
   static time_t time_to_clean_caches = 0;
   static time_t time_to_recheck_bandwidth = 0;
   static time_t time_to_recheck_bandwidth = 0;
   static time_t time_to_check_for_expired_networkstatus = 0;
   static time_t time_to_check_for_expired_networkstatus = 0;
+  static time_t time_to_dump_geoip_stats = 0;
   or_options_t *options = get_options();
   or_options_t *options = get_options();
   int i;
   int i;
   int have_dir_info;
   int have_dir_info;
@@ -958,6 +959,12 @@ run_scheduled_events(time_t now)
     time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL;
     time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL;
   }
   }
 
 
+  if (time_to_dump_geoip_stats < now) {
+#define DUMP_GEOIP_STATS_INTERVAL (60*60);
+    time_to_dump_geoip_stats = now + DUMP_GEOIP_STATS_INTERVAL;
+    dump_geoip_stats();
+  }
+
   /** 2. Periodically, we consider getting a new directory, getting a
   /** 2. Periodically, we consider getting a new directory, getting a
    * new running-routers list, and/or force-uploading our descriptor
    * new running-routers list, and/or force-uploading our descriptor
    * (if we've passed our internal checks). */
    * (if we've passed our internal checks). */

+ 20 - 2
src/or/or.h

@@ -2358,6 +2358,10 @@ typedef struct {
    * count of how many client addresses have contacted us so that we can help
    * count of how many client addresses have contacted us so that we can help
    * the bridge authority guess which countries have blocked access to us. */
    * the bridge authority guess which countries have blocked access to us. */
   int BridgeRecordUsageByCountry;
   int BridgeRecordUsageByCountry;
+#ifdef ENABLE_GEOIP_STATS
+  int DirRecordUsageByCountry;
+#endif
+
   /** Optionally, a file with GeoIP data. */
   /** Optionally, a file with GeoIP data. */
   char *GeoIPFile;
   char *GeoIPFile;
 
 
@@ -3294,13 +3298,27 @@ int geoip_get_country_by_ip(uint32_t ipaddr);
 int geoip_get_n_countries(void);
 int geoip_get_n_countries(void);
 const char *geoip_get_country_name(int num);
 const char *geoip_get_country_name(int num);
 int geoip_is_loaded(void);
 int geoip_is_loaded(void);
-void geoip_note_client_seen(uint32_t addr, time_t now);
+/** Indicates an action that we might be noting geoip statistics on.
+ * Note that if we're noticing CONNECT, we're a bridge, and if we're noticing
+ * the others, we're not.
+ */
+typedef enum {
+  /** We've noticed a connection as a bridge relay. */
+  GEOIP_CLIENT_CONNECT = 0,
+  /** We've served a networkstatus consensus as a directory server. */
+  GEOIP_CLIENT_NETWORKSTATUS = 1,
+  /** We've served a v2 networkstatus consensus as a directory server. */
+  GEOIP_CLIENT_NETWORKSTATUS_V2 = 2,
+} geoip_client_action_t;
+void geoip_note_client_seen(geoip_client_action_t action,
+                            uint32_t addr, time_t now);
 void geoip_remove_old_clients(time_t cutoff);
 void geoip_remove_old_clients(time_t cutoff);
 time_t geoip_get_history_start(void);
 time_t geoip_get_history_start(void);
-char *geoip_get_client_history(time_t now);
+char *geoip_get_client_history(time_t now, geoip_client_action_t action);
 int getinfo_helper_geoip(control_connection_t *control_conn,
 int getinfo_helper_geoip(control_connection_t *control_conn,
                          const char *question, char **answer);
                          const char *question, char **answer);
 void geoip_free_all(void);
 void geoip_free_all(void);
+void dump_geoip_stats(void);
 
 
 /********************************* hibernate.c **********************/
 /********************************* hibernate.c **********************/
 
 

+ 1 - 1
src/or/router.c

@@ -1830,7 +1830,7 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
       geoip_remove_old_clients(now-48*60*60);
       geoip_remove_old_clients(now-48*60*60);
       last_purged_at = now;
       last_purged_at = now;
     }
     }
-    geoip_summary = geoip_get_client_history(time(NULL));
+    geoip_summary = geoip_get_client_history(time(NULL), GEOIP_CLIENT_CONNECT);
     if (geoip_summary) {
     if (geoip_summary) {
       char geoip_start[ISO_TIME_LEN+1];
       char geoip_start[ISO_TIME_LEN+1];
       format_iso_time(geoip_start, geoip_get_history_start());
       format_iso_time(geoip_start, geoip_get_history_start());

+ 8 - 8
src/or/test.c

@@ -3908,28 +3908,28 @@ test_geoip(void)
   get_options()->BridgeRecordUsageByCountry = 1;
   get_options()->BridgeRecordUsageByCountry = 1;
   /* Put 9 observations in AB... */
   /* Put 9 observations in AB... */
   for (i=32; i < 40; ++i)
   for (i=32; i < 40; ++i)
-    geoip_note_client_seen(i, now);
+    geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now);
-  geoip_note_client_seen(225, now);
+  geoip_note_client_seen(GEOIP_CLIENT_CONNECT, 225, now);
   /* and 3 observations in XY, several times. */
   /* and 3 observations in XY, several times. */
   for (j=0; j < 10; ++j)
   for (j=0; j < 10; ++j)
     for (i=52; i < 55; ++i)
     for (i=52; i < 55; ++i)
-      geoip_note_client_seen(i, now-3600);
+      geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-3600);
   /* and 17 observations in ZZ... */
   /* and 17 observations in ZZ... */
   for (i=110; i < 127; ++i)
   for (i=110; i < 127; ++i)
-    geoip_note_client_seen(i, now-7200);
+    geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-7200);
-  s = geoip_get_client_history(now+5*24*60*60);
+  s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
   test_assert(s);
   test_assert(s);
   test_streq("zz=24,ab=16", s);
   test_streq("zz=24,ab=16", s);
   tor_free(s);
   tor_free(s);
 
 
   /* Now clear out all the zz observations. */
   /* Now clear out all the zz observations. */
   geoip_remove_old_clients(now-6000);
   geoip_remove_old_clients(now-6000);
-  s = geoip_get_client_history(now+5*24*60*60);
+  s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
   test_assert(! s); /* There are only 12 observations left.  Not enough to
   test_assert(! s); /* There are only 12 observations left.  Not enough to
                        build an answer.  Add 4 more in XY... */
                        build an answer.  Add 4 more in XY... */
   for (i=55; i < 59; ++i)
   for (i=55; i < 59; ++i)
-    geoip_note_client_seen(i, now-3600);
+    geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-3600);
-  s = geoip_get_client_history(now+5*24*60*60);
+  s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
   test_assert(s);
   test_assert(s);
   test_streq("ab=16", s);
   test_streq("ab=16", s);
   tor_free(s);
   tor_free(s);