Browse Source

r15530@tombo: nickm | 2007-12-17 16:54:03 -0500
First wodge of geoip code so bridges can figure out which countries are blocking them.


svn:r12845

Nick Mathewson 18 years ago
parent
commit
820159cac5
8 changed files with 332 additions and 13 deletions
  1. 20 8
      doc/TODO
  2. 2 2
      src/or/Makefile.am
  3. 8 0
      src/or/config.c
  4. 6 3
      src/or/connection_or.c
  5. 275 0
      src/or/geoip.c
  6. 1 0
      src/or/main.c
  7. 19 0
      src/or/or.h
  8. 1 0
      src/or/rephist.c

+ 20 - 8
doc/TODO

@@ -28,20 +28,32 @@ Features blocking 0.2.0.x:
   - mirror tor downloads on (via) tor dir caches
   - mirror tor downloads on (via) tor dir caches
 R   . spec
 R   . spec
     d deploy
     d deploy
-  - geoip caching and publishing for bridges
+  . geoip caching and publishing for bridges
 R   . spec
 R   . spec
-?   - deploy
+    - Implement
+      . Code to load a geoip file from disk
+        o Truncated format
+        - Full format.
+        o Actually invoke
+      o Code to store a GEOIP file in memory.
+      o Code to remember client IPs.
+      . Code to generate history lines
+        - Make history lines match spec.
+      - Controller interface
+      - Track consecutive time up, not time since last-forgotten IP.
+      - Add log lines.
+    - Tests
     d let Vidalia use the geoip data too rather than doing its own
     d let Vidalia use the geoip data too rather than doing its own
       anonymized queries
       anonymized queries
-  - bridge address disbursal strategies
+  o bridge address disbursal strategies
     o get the cached-descriptors* to bridges@moria
     o get the cached-descriptors* to bridges@moria
-    - parse out bridge addresses from cached-descriptors*
+    o parse out bridge addresses from cached-descriptors*
       (or parse them out before Tonga sends them)
       (or parse them out before Tonga sends them)
       (or get Tonga's Tor to write them out better in the first place)
       (or get Tonga's Tor to write them out better in the first place)
-N   * answer by IP/timestamp
-      - run a little web server on moria?
-N   d answer by answering email to bridges@torproject
-      - keep track of which addresses have been answered already
+    o answer by IP/timestamp
+      o run a little web server on moria?
+    o answer by answering email to bridges@torproject
+      o keep track of which addresses have been answered already
 R - bridge communities
 R - bridge communities
     - spec
     - spec
     - deploy
     - deploy

+ 2 - 2
src/or/Makefile.am

@@ -16,7 +16,7 @@ tor_SOURCES = buffers.c circuitbuild.c circuitlist.c \
 	circuituse.c command.c config.c \
 	circuituse.c command.c config.c \
 	connection.c connection_edge.c connection_or.c control.c \
 	connection.c connection_edge.c connection_or.c control.c \
 	cpuworker.c directory.c dirserv.c dirvote.c \
 	cpuworker.c directory.c dirserv.c dirvote.c \
-	dns.c dnsserv.c hibernate.c main.c $(tor_platform_source) \
+	dns.c dnsserv.c geoip.c hibernate.c main.c $(tor_platform_source) \
 	networkstatus.c \
 	networkstatus.c \
 	onion.c policies.c relay.c rendcommon.c rendclient.c rendmid.c \
 	onion.c policies.c relay.c rendcommon.c rendclient.c rendmid.c \
 	rendservice.c rephist.c router.c routerlist.c routerparse.c \
 	rendservice.c rephist.c router.c routerlist.c routerparse.c \
@@ -38,7 +38,7 @@ test_SOURCES = buffers.c circuitbuild.c circuitlist.c \
 	circuituse.c command.c config.c \
 	circuituse.c command.c config.c \
 	connection.c connection_edge.c connection_or.c control.c \
 	connection.c connection_edge.c connection_or.c control.c \
 	cpuworker.c directory.c dirserv.c dirvote.c \
 	cpuworker.c directory.c dirserv.c dirvote.c \
-	dns.c dnsserv.c hibernate.c main.c $(tor_platform_source) \
+	dns.c dnsserv.c geoip.c hibernate.c main.c $(tor_platform_source) \
 	networkstatus.c \
 	networkstatus.c \
 	onion.c policies.c relay.c rendcommon.c rendclient.c rendmid.c \
 	onion.c policies.c relay.c rendcommon.c rendclient.c rendmid.c \
 	rendservice.c rephist.c router.c routerlist.c routerparse.c \
 	rendservice.c rephist.c router.c routerlist.c routerparse.c \

+ 8 - 0
src/or/config.c

@@ -150,6 +150,7 @@ static config_var_t _option_vars[] = {
   V(BandwidthRate,               MEMUNIT,  "5 MB"),
   V(BandwidthRate,               MEMUNIT,  "5 MB"),
   V(BridgeAuthoritativeDir,      BOOL,     "0"),
   V(BridgeAuthoritativeDir,      BOOL,     "0"),
   VAR("Bridge",                  LINELIST, Bridges,    NULL),
   VAR("Bridge",                  LINELIST, Bridges,    NULL),
+  V(BridgeRecordUsageByCountry,  BOOL,     "1"),
   V(BridgeRelay,                 BOOL,     "0"),
   V(BridgeRelay,                 BOOL,     "0"),
   V(CircuitBuildTimeout,         INTERVAL, "1 minute"),
   V(CircuitBuildTimeout,         INTERVAL, "1 minute"),
   V(CircuitIdleTimeout,          INTERVAL, "1 hour"),
   V(CircuitIdleTimeout,          INTERVAL, "1 hour"),
@@ -191,6 +192,7 @@ static config_var_t _option_vars[] = {
   V(FetchServerDescriptors,      BOOL,     "1"),
   V(FetchServerDescriptors,      BOOL,     "1"),
   V(FetchHidServDescriptors,     BOOL,     "1"),
   V(FetchHidServDescriptors,     BOOL,     "1"),
   V(FetchUselessDescriptors,     BOOL,     "0"),
   V(FetchUselessDescriptors,     BOOL,     "0"),
+  V(GEOIPFile,                   STRING,   NULL),
   V(Group,                       STRING,   NULL),
   V(Group,                       STRING,   NULL),
   V(HardwareAccel,               BOOL,     "0"),
   V(HardwareAccel,               BOOL,     "0"),
   V(HashedControlPassword,       LINELIST, NULL),
   V(HashedControlPassword,       LINELIST, NULL),
@@ -1214,6 +1216,12 @@ options_act(or_options_t *old_options)
       init_keys();
       init_keys();
   }
   }
 
 
+  /* Maybe load geoip file */
+  if (options->GEOIPFile &&
+      ((!old_options || !opt_streq(old_options->GEOIPFile, options->GEOIPFile))
+       || !geoip_is_loaded())) {
+    geoip_load_file(options->GEOIPFile);
+  }
   /* Check if we need to parse and add the EntryNodes config option. */
   /* Check if we need to parse and add the EntryNodes config option. */
   if (options->EntryNodes &&
   if (options->EntryNodes &&
       (!old_options ||
       (!old_options ||

+ 6 - 3
src/or/connection_or.c

@@ -893,17 +893,20 @@ int
 connection_or_set_state_open(or_connection_t *conn)
 connection_or_set_state_open(or_connection_t *conn)
 {
 {
   int started_here = connection_or_nonopen_was_started_here(conn);
   int started_here = connection_or_nonopen_was_started_here(conn);
+  time_t now = time(NULL);
   conn->_base.state = OR_CONN_STATE_OPEN;
   conn->_base.state = OR_CONN_STATE_OPEN;
   control_event_or_conn_status(conn, OR_CONN_EVENT_CONNECTED, 0);
   control_event_or_conn_status(conn, OR_CONN_EVENT_CONNECTED, 0);
 
 
   if (started_here) {
   if (started_here) {
-    rep_hist_note_connect_succeeded(conn->identity_digest, time(NULL));
-    if (entry_guard_register_connect_status(conn->identity_digest, 1,
-                                            time(NULL)) < 0) {
+    rep_hist_note_connect_succeeded(conn->identity_digest, now);
+    if (entry_guard_register_connect_status(conn->identity_digest,
+                                            1, now) < 0) {
       /* pending circs get closed in circuit_about_to_close_connection() */
       /* pending circs get closed in circuit_about_to_close_connection() */
       return -1;
       return -1;
     }
     }
     router_set_status(conn->identity_digest, 1);
     router_set_status(conn->identity_digest, 1);
+  } else {
+    geoip_note_client_seen(TO_CONN(conn)->addr, now);
   }
   }
   if (conn->handshake_state) {
   if (conn->handshake_state) {
     or_handshake_state_free(conn->handshake_state);
     or_handshake_state_free(conn->handshake_state);

+ 275 - 0
src/or/geoip.c

@@ -0,0 +1,275 @@
+/* Copyright (c) 2007, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+/* $Id: /tor/trunk/src/or/networkstatus.c 15493 2007-12-16T18:33:25.055570Z nickm  $ */
+const char geoip_c_id[] =
+  "$Id: /tor/trunk/src/or/networkstatus.c 15493 2007-12-16T18:33:25.055570Z nickm  $";
+
+#define GEOIP_PRIVATE
+#include "or.h"
+#include "ht.h"
+
+/** DOCDOC this whole file */
+
+typedef struct geoip_entry_t {
+  uint32_t ip_low;
+  uint32_t ip_high;
+  int country;
+} geoip_entry_t;
+
+static smartlist_t *geoip_countries = NULL;
+static strmap_t *country_idxplus1_by_lc_code = NULL;
+static smartlist_t *geoip_entries = NULL;
+
+void
+geoip_add_entry(uint32_t low, uint32_t high, const char *country)
+{
+  uintptr_t idx;
+  geoip_entry_t *ent;
+  void *_idxplus1 = strmap_get_lc(country_idxplus1_by_lc_code, country);
+
+  if (!_idxplus1) {
+    char *c = tor_strdup(country);
+    tor_strlower(c);
+    smartlist_add(geoip_countries, c);
+    idx = smartlist_len(geoip_countries) + 1;
+    strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
+  } else {
+    idx = ((uintptr_t)_idxplus1)-1;
+  }
+  ent = tor_malloc_zero(sizeof(geoip_entry_t));
+  ent->ip_low = low;
+  ent->ip_high = high;
+  ent->country = idx;
+  smartlist_add(geoip_entries, ent);
+}
+
+static int
+_geoip_compare_entries(const void **_a, const void **_b)
+{
+  const geoip_entry_t *a = *_a, *b = *_b;
+  if (a->ip_low < b->ip_low)
+    return -1;
+  else if (a->ip_low > b->ip_low)
+    return 1;
+  else
+    return 0;
+}
+
+static int
+_geoip_compare_key_to_entry(const void *_key, const void **_member)
+{
+  const uint32_t addr = *(uint32_t *)_key;
+  const geoip_entry_t *entry = *_member;
+  if (addr < entry->ip_low)
+    return -1;
+  else if (addr > entry->ip_high)
+    return 1;
+  else
+    return 0;
+}
+
+int
+geoip_load_file(const char *filename)
+{
+  FILE *f;
+  geoip_free_all();
+  if (!(f = fopen(filename, "r"))) {
+    log_warn(LD_GENERAL, "Failed to open GEOIP file %s.", filename);
+    return -1;
+  }
+  geoip_countries = smartlist_create();
+  geoip_entries = smartlist_create();
+  country_idxplus1_by_lc_code = strmap_new();
+  while (!feof(f)) {
+    unsigned int low, high;
+    char b[3];
+    if (fscanf(f, "%u,%u,%2s", &low, &high, b) == 3) {
+      geoip_add_entry(low, high, b);
+    }
+  }
+  /*XXXX020 abort and return -1 if */
+  fclose(f);
+
+  smartlist_sort(geoip_entries, _geoip_compare_entries);
+  return 0;
+}
+
+int
+geoip_get_country_by_ip(uint32_t ipaddr)
+{
+  geoip_entry_t *ent;
+  if (!geoip_entries)
+    return -1;
+  ent = smartlist_bsearch(geoip_entries, &ipaddr, _geoip_compare_key_to_entry);
+  return ent ? ent->country : -1;
+}
+
+int
+geoip_get_n_countries(void)
+{
+  return smartlist_len(geoip_countries);
+}
+
+const char *
+geoip_get_country_name(int num)
+{
+  if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries))
+    return smartlist_get(geoip_countries, num);
+  else
+    return "??";
+}
+
+int
+geoip_is_loaded(void)
+{
+  return geoip_countries != NULL && geoip_entries != NULL;
+}
+
+/** DOCDOC */
+typedef struct clientmap_entry_t {
+  HT_ENTRY(clientmap_entry_t) node;
+  uint32_t ipaddr;
+  time_t last_seen;
+} clientmap_entry_t;
+
+static HT_HEAD(clientmap, clientmap_entry_t) client_history =
+     HT_INITIALIZER();
+static time_t client_history_starts = 0;
+
+static INLINE unsigned
+clientmap_entry_hash(const clientmap_entry_t *a)
+{
+  return ht_improve_hash((unsigned) a->ipaddr);
+}
+static INLINE int
+clientmap_entries_eq(const clientmap_entry_t *a, const clientmap_entry_t *b)
+{
+  return a->ipaddr == b->ipaddr;
+}
+
+HT_PROTOTYPE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
+             clientmap_entries_eq);
+HT_GENERATE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
+            clientmap_entries_eq, 0.6, malloc, realloc, free);
+
+/** DOCDOC */
+void
+geoip_note_client_seen(uint32_t addr, time_t now)
+{
+  or_options_t *options = get_options();
+  clientmap_entry_t lookup, *ent;
+  if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
+    return;
+  lookup.ipaddr = addr;
+  ent = HT_FIND(clientmap, &client_history, &lookup);
+  if (ent) {
+    ent->last_seen = now;
+  } else {
+    ent = tor_malloc_zero(sizeof(clientmap_entry_t));
+    ent->ipaddr = addr;
+    ent->last_seen = now;
+    HT_INSERT(clientmap, &client_history, ent);
+  }
+  if (!client_history_starts)
+    client_history_starts = now;
+}
+
+static int
+_remove_old_client_helper(struct clientmap_entry_t *ent, void *_cutoff)
+{
+  time_t cutoff = *(time_t*)_cutoff;
+  if (ent->last_seen < cutoff) {
+    tor_free(ent);
+    return 1;
+  } else {
+    return 0;
+  }
+}
+
+void
+geoip_remove_old_clients(time_t cutoff)
+{
+  clientmap_HT_FOREACH_FN(&client_history,
+                          _remove_old_client_helper,
+                          &cutoff);
+  if (client_history_starts < cutoff)
+    client_history_starts = cutoff;
+}
+
+#define MIN_IPS_TO_NOTE_COUNTRY 8
+#define MIN_IPS_TO_NOTE_ANYTHING 16
+#define IP_GRANULARITY 8
+
+char *
+geoip_get_client_history(time_t now)
+{
+  char *result = NULL;
+  if (!geoip_is_loaded())
+    return NULL;
+  if (client_history_starts < (now - 12*60*60)) {
+    char buf[32];
+    smartlist_t *chunks = NULL;
+    int n_countries = geoip_get_n_countries();
+    int i;
+    clientmap_entry_t **ent;
+    unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
+    unsigned total = 0;
+    HT_FOREACH(ent, clientmap, &client_history) {
+      int country = geoip_get_country_by_ip((*ent)->ipaddr);
+      if (country < 0)
+        continue;
+      tor_assert(0 <= country && country < n_countries);
+      ++counts[country];
+      ++total;
+    }
+    if (total < MIN_IPS_TO_NOTE_ANYTHING)
+      goto done;
+    chunks = smartlist_create();
+    for (i = 0; i < n_countries; ++i) {
+      unsigned c = counts[i];
+      const char *countrycode;
+      if (c >= MIN_IPS_TO_NOTE_COUNTRY) {
+        c -= c % IP_GRANULARITY;
+        countrycode = geoip_get_country_name(i);
+        tor_snprintf(buf, sizeof(buf), "%s=%u", countrycode, c);
+        smartlist_add(chunks, tor_strdup(buf));
+      }
+    }
+    result = smartlist_join_strings(chunks, ",", 0, NULL);
+  done:
+    tor_free(counts);
+    if (chunks) {
+      SMARTLIST_FOREACH(chunks, char *, c, tor_free(c));
+      smartlist_free(chunks);
+    }
+  }
+  return result;
+}
+
+
+void
+geoip_free_all(void)
+{
+  if (geoip_countries) {
+    SMARTLIST_FOREACH(geoip_countries, char *, cp, tor_free(cp));
+    smartlist_free(geoip_countries);
+  }
+  if (country_idxplus1_by_lc_code)
+    strmap_free(country_idxplus1_by_lc_code, NULL);
+  if (geoip_entries) {
+    SMARTLIST_FOREACH(geoip_entries, geoip_entry_t *, ent, tor_free(ent));
+    smartlist_free(geoip_entries);
+  }
+  {
+    clientmap_entry_t **ent, **next, *this;
+    for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) {
+      this = *ent;
+      next = HT_NEXT_RMV(clientmap, &client_history, ent);
+      tor_free(this);
+    }
+    HT_CLEAR(clientmap, &client_history);
+  }
+  geoip_countries = NULL;
+  country_idxplus1_by_lc_code = NULL;
+  geoip_entries = NULL;
+}

+ 1 - 0
src/or/main.c

@@ -1803,6 +1803,7 @@ tor_free_all(int postfork)
   if (!postfork) {
   if (!postfork) {
     evdns_shutdown(1);
     evdns_shutdown(1);
   }
   }
+  geoip_free_all();
   dirvote_free_all();
   dirvote_free_all();
   routerlist_free_all();
   routerlist_free_all();
   networkstatus_free_all();
   networkstatus_free_all();

+ 19 - 0
src/or/or.h

@@ -2315,6 +2315,10 @@ typedef struct {
   /** DOCDOC here and in tor.1 */
   /** DOCDOC here and in tor.1 */
   int LearnAuthorityAddrFromCerts;
   int LearnAuthorityAddrFromCerts;
 
 
+  /** DOCDOC here and in tor.1 */
+  int BridgeRecordUsageByCountry;
+  char *GEOIPFile;
+
 } or_options_t;
 } or_options_t;
 
 
 /** Persistent state for an onion router, as saved to disk. */
 /** Persistent state for an onion router, as saved to disk. */
@@ -3192,6 +3196,21 @@ void dnsserv_resolved(edge_connection_t *conn,
 void dnsserv_reject_request(edge_connection_t *conn);
 void dnsserv_reject_request(edge_connection_t *conn);
 void dnsserv_launch_request(const char *name, int is_reverse);
 void dnsserv_launch_request(const char *name, int is_reverse);
 
 
+/********************************* geoip.c **************************/
+
+#ifdef GEOIP_PRIVATE
+void geoip_add_entry(uint32_t low, uint32_t high, const char *country);
+#endif
+int geoip_load_file(const char *filename);
+int geoip_get_country_by_ip(uint32_t ipaddr);
+int geoip_get_n_countries(void);
+const char *geoip_get_country_name(int num);
+int geoip_is_loaded(void);
+void geoip_note_client_seen(uint32_t addr, time_t now);
+void geoip_remove_old_clients(time_t cutoff);
+char *geoip_get_client_history(time_t now);
+void geoip_free_all(void);
+
 /********************************* hibernate.c **********************/
 /********************************* hibernate.c **********************/
 
 
 int accounting_parse_options(or_options_t *options, int validate_only);
 int accounting_parse_options(or_options_t *options, int validate_only);

+ 1 - 0
src/or/rephist.c

@@ -13,6 +13,7 @@ const char rephist_c_id[] =
  **/
  **/
 
 
 #include "or.h"
 #include "or.h"
+#include "ht.h"
 
 
 static void bw_arrays_init(void);
 static void bw_arrays_init(void);
 static void predicted_ports_init(void);
 static void predicted_ports_init(void);