Browse Source

New code to implement proposal for local geoip stats. Only enabled with --enable-geoip-stats passed to configure.

svn:r14802
Nick Mathewson 16 years ago
parent
commit
ac330d9ba7
12 changed files with 137 additions and 24 deletions
  1. 3 0
      ChangeLog
  2. 7 0
      configure.in
  3. 3 3
      doc/TODO
  4. 1 3
      src/common/util.c
  5. 3 0
      src/or/config.c
  6. 1 1
      src/or/connection_or.c
  7. 20 0
      src/or/directory.c
  8. 63 6
      src/or/geoip.c
  9. 7 0
      src/or/main.c
  10. 20 2
      src/or/or.h
  11. 1 1
      src/or/router.c
  12. 8 8
      src/or/test.c

+ 3 - 0
ChangeLog

@@ -104,6 +104,9 @@ Changes in version 0.2.1.1-alpha - 2008-??-??
       Robert Hogan. Fixes the first part of bug 681.
     - Make bridge authorities never serve extrainfo docs.
     - Allow comments in geoip file.
+    - New configure/torrc options (--enable-geoip-stats,
+      DirRecordUsageByCountry) to record how many IPs we've served directory
+      info to in each country code.
 
   o Minor features (security):
     - Reject requests for reverse-dns lookup of names in a private

+ 7 - 0
configure.in

@@ -87,6 +87,13 @@ case $host in
      ;;
 esac
 
+AC_ARG_ENABLE(geoip-stats,
+     AS_HELP_STRING(--enable-geoip-stats, enable code for directories to collect per-country statistics))
+
+if test "$enable_geoip_stats" = "yes"; then
+  AC_DEFINE(ENABLE_GEOIP_STATS, 1, [Defined if we try to collect per-country statistics])
+fi
+
 AC_ARG_ENABLE(gcc-warnings,
      AS_HELP_STRING(--enable-gcc-warnings, enable verbose warnings))
 

+ 3 - 3
doc/TODO

@@ -289,10 +289,10 @@ Bugs/issues for Tor 0.2.0.x:
       too much.
   o teach geoip_parse_entry() to skip over lines that start with #, so we
     can put a little note at the top of the geoip file to say what it is.
-N d we should have an off-by-default way for relays to dump geoip data to
+  . we should have an off-by-default way for relays to dump geoip data to
     a file in their data directory, for measurement purposes.
-    - Basic implementation
-    - Include probability-of-selection
+    o Basic implementation
+N   - Include probability-of-selection
 R d let bridges set relaybandwidthrate as low as 5kb
 R - bug: if we launch using bridges, and then stop using bridges, we
     still have our bridges in our entryguards section, and may use them.

+ 1 - 3
src/common/util.c

@@ -1559,7 +1559,6 @@ start_writing_to_file(const char *fname, int open_flags, int mode,
   tor_assert((open_flags & (O_BINARY|O_TEXT)) != 0);
 #endif
   new_file->fd = -1;
-  tempname_len = strlen(fname)+16;
   tor_assert(tempname_len > strlen(fname)); /*check for overflow*/
   new_file->filename = tor_strdup(fname);
   if (open_flags & O_APPEND) {
@@ -1577,8 +1576,7 @@ start_writing_to_file(const char *fname, int open_flags, int mode,
     new_file->rename_on_close = 1;
   }
 
-  if ((new_file->fd = open(open_name, open_flags, mode))
-      < 0) {
+  if ((new_file->fd = open(open_name, open_flags, mode)) < 0) {
     log(LOG_WARN, LD_FS, "Couldn't open \"%s\" (%s) for writing: %s",
         open_name, fname, strerror(errno));
     goto err;

+ 3 - 0
src/or/config.c

@@ -179,6 +179,9 @@ static config_var_t _option_vars[] = {
   V(DirPolicy,                   LINELIST, NULL),
   V(DirPort,                     UINT,     "0"),
   OBSOLETE("DirPostPeriod"),
+#ifdef ENABLE_GEOIP_STATS
+  V(DirRecordUsageByCountry,     BOOL,     "0"),
+#endif
   VAR("DirServer",               LINELIST, DirServers, NULL),
   V(DNSPort,                     UINT,     "0"),
   V(DNSListenAddress,            LINELIST, NULL),

+ 1 - 1
src/or/connection_or.c

@@ -901,7 +901,7 @@ connection_or_set_state_open(or_connection_t *conn)
   } else {
     /* only report it to the geoip module if it's not a known router */
     if (!router_get_by_digest(conn->identity_digest))
-      geoip_note_client_seen(TO_CONN(conn)->addr, now);
+      geoip_note_client_seen(GEOIP_CLIENT_CONNECT, TO_CONN(conn)->addr, now);
   }
   if (conn->handshake_state) {
     or_handshake_state_free(conn->handshake_state);

+ 20 - 0
src/or/directory.c

@@ -2484,6 +2484,26 @@ directory_handle_command_get(dir_connection_t *conn, const char *headers,
       goto done;
     }
 
+#ifdef ENABLE_GEOIP_STATS
+    {
+      geoip_client_action_t act =
+        is_v3 ? GEOIP_CLIENT_NETWORKSTATUS : GEOIP_CLIENT_NETWORKSTATUS_V2;
+      uint32_t addr = conn->_base.addr;
+
+      if (conn->_base.linked_conn) {
+        connection_t *c = conn->_base.linked_conn;
+        if (c->type == CONN_TYPE_EXIT) {
+          circuit_t *circ = TO_EDGE_CONN(c)->on_circuit;
+          if (! CIRCUIT_IS_ORIGIN(circ)) {
+            or_connection_t *orconn = TO_OR_CIRCUIT(circ)->p_conn;
+            addr = orconn->_base.addr;
+          }
+        }
+      }
+      geoip_note_client_seen(act, addr, time(NULL));
+    }
+#endif
+
     // note_request(request_type,dlen);
     (void) request_type;
     write_http_response_header(conn, -1, compressed,

+ 63 - 6
src/or/geoip.c

@@ -131,7 +131,7 @@ _geoip_compare_key_to_entry(const void *_key, const void **_member)
  *   "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
  * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
  * integers, and CC is a country code.
- * 
+ *
  * It also recognizes, and skips over, blank lines and lines that start
  * with '#' (comments).
  */
@@ -208,9 +208,12 @@ geoip_is_loaded(void)
 typedef struct clientmap_entry_t {
   HT_ENTRY(clientmap_entry_t) node;
   uint32_t ipaddr;
-  time_t last_seen;
+  time_t last_seen; /* The last 2 bits of this value hold the client
+                     * operation. */
 } clientmap_entry_t;
 
+#define ACTION_MASK 3
+
 /** Map from client IP address to last time seen. */
 static HT_HEAD(clientmap, clientmap_entry_t) client_history =
      HT_INITIALIZER();
@@ -238,12 +241,28 @@ HT_GENERATE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
 /** Note that we've seen a client connect from the IP <b>addr</b> (host order)
  * at time <b>now</b>. Ignored by all but bridges. */
 void
-geoip_note_client_seen(uint32_t addr, time_t now)
+geoip_note_client_seen(geoip_client_action_t action,
+                       uint32_t addr, time_t now)
 {
   or_options_t *options = get_options();
   clientmap_entry_t lookup, *ent;
-  if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
+  if (action == GEOIP_CLIENT_CONNECT) {
+    if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
+      return;
+  } else {
+#ifndef ENABLE_GEOIP_STATS
     return;
+#else
+    if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
+        !options->DirRecordUsageByCountry)
+      return;
+#endif
+  }
+
+  /* We use the low 3 bits of the time to encode the action. Since we're
+   * potentially remembering times of clients, we don't want to make
+   * clientmap_entry_t larger than it has to be. */
+  now = (now & ~ACTION_MASK) | (((int)action) & ACTION_MASK);
   lookup.ipaddr = addr;
   ent = HT_FIND(clientmap, &client_history, &lookup);
   if (ent) {
@@ -328,7 +347,7 @@ _c_hist_compare(const void **_a, const void **_b)
  * that country, and cc is a lowercased country code.  Returns NULL if we don't
  * want to export geoip data yet. */
 char *
-geoip_get_client_history(time_t now)
+geoip_get_client_history(time_t now, geoip_client_action_t action)
 {
   char *result = NULL;
   if (!geoip_is_loaded())
@@ -343,7 +362,10 @@ geoip_get_client_history(time_t now)
     unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
     unsigned total = 0;
     HT_FOREACH(ent, clientmap, &client_history) {
-      int country = geoip_get_country_by_ip((*ent)->ipaddr);
+      int country;
+      if (((*ent)->last_seen & ACTION_MASK) != action)
+        continue;
+      country = geoip_get_country_by_ip((*ent)->ipaddr);
       if (country < 0)
         continue;
       tor_assert(0 <= country && country < n_countries);
@@ -404,6 +426,41 @@ geoip_get_client_history(time_t now)
   return result;
 }
 
+void
+dump_geoip_stats(void)
+{
+#ifdef ENABLE_GEOIP_STATS
+  time_t now = time(NULL);
+  char *filename = get_datadir_fname("geoip-stats");
+  char *data_v2 = NULL, *data_v3 = NULL;
+  char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
+  open_file_t *open_file = NULL;
+  FILE *out;
+
+  data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
+  data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
+  format_iso_time(since, geoip_get_history_start());
+  format_iso_time(written, now);
+  if (!data_v2 || !data_v3)
+    goto done;
+  out = start_writing_to_stdio_file(filename, 0, 0600, &open_file);
+  if (!out)
+    goto done;
+  if (fprintf(out, "written %s\nstarted-at %s\nns %s\nns-v2%s\n",
+              written, since, data_v3, data_v2) < 0)
+    goto done;
+
+  finish_writing_to_file(open_file);
+  open_file = NULL;
+ done:
+  if (open_file)
+    abort_writing_to_file(open_file);
+  tor_free(filename);
+  tor_free(data_v2);
+  tor_free(data_v3);
+#endif
+}
+
 /** Helper used to implement GETINFO ip-to-country/... controller command. */
 int
 getinfo_helper_geoip(control_connection_t *control_conn,

+ 7 - 0
src/or/main.c

@@ -832,6 +832,7 @@ run_scheduled_events(time_t now)
   static time_t time_to_clean_caches = 0;
   static time_t time_to_recheck_bandwidth = 0;
   static time_t time_to_check_for_expired_networkstatus = 0;
+  static time_t time_to_dump_geoip_stats = 0;
   or_options_t *options = get_options();
   int i;
   int have_dir_info;
@@ -958,6 +959,12 @@ run_scheduled_events(time_t now)
     time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL;
   }
 
+  if (time_to_dump_geoip_stats < now) {
+#define DUMP_GEOIP_STATS_INTERVAL (60*60);
+    time_to_dump_geoip_stats = now + DUMP_GEOIP_STATS_INTERVAL;
+    dump_geoip_stats();
+  }
+
   /** 2. Periodically, we consider getting a new directory, getting a
    * new running-routers list, and/or force-uploading our descriptor
    * (if we've passed our internal checks). */

+ 20 - 2
src/or/or.h

@@ -2358,6 +2358,10 @@ typedef struct {
    * count of how many client addresses have contacted us so that we can help
    * the bridge authority guess which countries have blocked access to us. */
   int BridgeRecordUsageByCountry;
+#ifdef ENABLE_GEOIP_STATS
+  int DirRecordUsageByCountry;
+#endif
+
   /** Optionally, a file with GeoIP data. */
   char *GeoIPFile;
 
@@ -3294,13 +3298,27 @@ int geoip_get_country_by_ip(uint32_t ipaddr);
 int geoip_get_n_countries(void);
 const char *geoip_get_country_name(int num);
 int geoip_is_loaded(void);
-void geoip_note_client_seen(uint32_t addr, time_t now);
+/** Indicates an action that we might be noting geoip statistics on.
+ * Note that if we're noticing CONNECT, we're a bridge, and if we're noticing
+ * the others, we're not.
+ */
+typedef enum {
+  /** We've noticed a connection as a bridge relay. */
+  GEOIP_CLIENT_CONNECT = 0,
+  /** We've served a networkstatus consensus as a directory server. */
+  GEOIP_CLIENT_NETWORKSTATUS = 1,
+  /** We've served a v2 networkstatus consensus as a directory server. */
+  GEOIP_CLIENT_NETWORKSTATUS_V2 = 2,
+} geoip_client_action_t;
+void geoip_note_client_seen(geoip_client_action_t action,
+                            uint32_t addr, time_t now);
 void geoip_remove_old_clients(time_t cutoff);
 time_t geoip_get_history_start(void);
-char *geoip_get_client_history(time_t now);
+char *geoip_get_client_history(time_t now, geoip_client_action_t action);
 int getinfo_helper_geoip(control_connection_t *control_conn,
                          const char *question, char **answer);
 void geoip_free_all(void);
+void dump_geoip_stats(void);
 
 /********************************* hibernate.c **********************/
 

+ 1 - 1
src/or/router.c

@@ -1830,7 +1830,7 @@ extrainfo_dump_to_string(char *s, size_t maxlen, extrainfo_t *extrainfo,
       geoip_remove_old_clients(now-48*60*60);
       last_purged_at = now;
     }
-    geoip_summary = geoip_get_client_history(time(NULL));
+    geoip_summary = geoip_get_client_history(time(NULL), GEOIP_CLIENT_CONNECT);
     if (geoip_summary) {
       char geoip_start[ISO_TIME_LEN+1];
       format_iso_time(geoip_start, geoip_get_history_start());

+ 8 - 8
src/or/test.c

@@ -3908,28 +3908,28 @@ test_geoip(void)
   get_options()->BridgeRecordUsageByCountry = 1;
   /* Put 9 observations in AB... */
   for (i=32; i < 40; ++i)
-    geoip_note_client_seen(i, now);
-  geoip_note_client_seen(225, now);
+    geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now);
+  geoip_note_client_seen(GEOIP_CLIENT_CONNECT, 225, now);
   /* and 3 observations in XY, several times. */
   for (j=0; j < 10; ++j)
     for (i=52; i < 55; ++i)
-      geoip_note_client_seen(i, now-3600);
+      geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-3600);
   /* and 17 observations in ZZ... */
   for (i=110; i < 127; ++i)
-    geoip_note_client_seen(i, now-7200);
-  s = geoip_get_client_history(now+5*24*60*60);
+    geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-7200);
+  s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
   test_assert(s);
   test_streq("zz=24,ab=16", s);
   tor_free(s);
 
   /* Now clear out all the zz observations. */
   geoip_remove_old_clients(now-6000);
-  s = geoip_get_client_history(now+5*24*60*60);
+  s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
   test_assert(! s); /* There are only 12 observations left.  Not enough to
                        build an answer.  Add 4 more in XY... */
   for (i=55; i < 59; ++i)
-    geoip_note_client_seen(i, now-3600);
-  s = geoip_get_client_history(now+5*24*60*60);
+    geoip_note_client_seen(GEOIP_CLIENT_CONNECT, i, now-3600);
+  s = geoip_get_client_history(now+5*24*60*60, GEOIP_CLIENT_CONNECT);
   test_assert(s);
   test_streq("ab=16", s);
   tor_free(s);