Browse Source

Change the way how directories that are configured with --enable-geoip-stats write geoip stats to disk.

- Write geoip stats to disk every 24 hours, not every hour.
- Remove configuration options and define reasonable defaults.
- Clear history of client requests every 24 hours (which wasn't done at
  all before).
Karsten Loesing 16 years ago
parent
commit
54c97c9133
6 changed files with 45 additions and 28 deletions
  1. 2 0
      ChangeLog
  2. 4 4
      src/or/config.c
  3. 24 12
      src/or/geoip.c
  4. 0 8
      src/or/main.c
  5. 10 2
      src/or/or.h
  6. 5 2
      src/or/router.c

+ 2 - 0
ChangeLog

@@ -6,6 +6,8 @@ Changes in version 0.2.2.1-alpha - 2009-??-??
     - The memarea code now uses a sentinel value at the end of each area
     - The memarea code now uses a sentinel value at the end of each area
       to make sure nothing writes beyond the end of an area.  This might
       to make sure nothing writes beyond the end of an area.  This might
       help debug some conceivable causes of bug 930.
       help debug some conceivable causes of bug 930.
+    - Directories that are configured with the --enable-geoip-stats flag
+      now write their GeoIP stats to disk exactly every 24 hours.
 
 
   o Deprecated and removed features:
   o Deprecated and removed features:
     - The controller no longer accepts the old obsolete "addr-mappings/"
     - The controller no longer accepts the old obsolete "addr-mappings/"

+ 4 - 4
src/or/config.c

@@ -187,10 +187,10 @@ static config_var_t _option_vars[] = {
   V(DirPortFrontPage,            FILENAME, NULL),
   V(DirPortFrontPage,            FILENAME, NULL),
   OBSOLETE("DirPostPeriod"),
   OBSOLETE("DirPostPeriod"),
 #ifdef ENABLE_GEOIP_STATS
 #ifdef ENABLE_GEOIP_STATS
-  V(DirRecordUsageByCountry,     BOOL,     "0"),
-  V(DirRecordUsageGranularity,   UINT,     "4"),
-  V(DirRecordUsageRetainIPs,     INTERVAL, "14 days"),
-  V(DirRecordUsageSaveInterval,  INTERVAL, "6 hours"),
+  OBSOLETE("DirRecordUsageByCountry"),
+  OBSOLETE("DirRecordUsageGranularity"),
+  OBSOLETE("DirRecordUsageRetainIPs"),
+  OBSOLETE("DirRecordUsageSaveInterval"),
 #endif
 #endif
   VAR("DirServer",               LINELIST, DirServers, NULL),
   VAR("DirServer",               LINELIST, DirServers, NULL),
   V(DNSPort,                     UINT,     "0"),
   V(DNSPort,                     UINT,     "0"),

+ 24 - 12
src/or/geoip.c

@@ -12,6 +12,7 @@
 #include "ht.h"
 #include "ht.h"
 
 
 static void clear_geoip_db(void);
 static void clear_geoip_db(void);
+static void dump_geoip_stats(void);
 
 
 /** An entry from the GeoIP file: maps an IP range to a country. */
 /** An entry from the GeoIP file: maps an IP range to a country. */
 typedef struct geoip_entry_t {
 typedef struct geoip_entry_t {
@@ -21,9 +22,9 @@ typedef struct geoip_entry_t {
 } geoip_entry_t;
 } geoip_entry_t;
 
 
 /** For how many periods should we remember per-country request history? */
 /** For how many periods should we remember per-country request history? */
-#define REQUEST_HIST_LEN 3
+#define REQUEST_HIST_LEN 1
 /** How long are the periods for which we should remember request history? */
 /** How long are the periods for which we should remember request history? */
-#define REQUEST_HIST_PERIOD (8*60*60)
+#define REQUEST_HIST_PERIOD (24*60*60)
 
 
 /** A per-country record for GeoIP request history. */
 /** A per-country record for GeoIP request history. */
 typedef struct geoip_country_t {
 typedef struct geoip_country_t {
@@ -313,8 +314,7 @@ geoip_note_client_seen(geoip_client_action_t action,
 #ifndef ENABLE_GEOIP_STATS
 #ifndef ENABLE_GEOIP_STATS
     return;
     return;
 #else
 #else
-    if (options->BridgeRelay || options->BridgeAuthoritativeDir ||
-        !options->DirRecordUsageByCountry)
+    if (options->BridgeRelay || options->BridgeAuthoritativeDir)
       return;
       return;
 #endif
 #endif
   }
   }
@@ -327,6 +327,14 @@ geoip_note_client_seen(geoip_client_action_t action,
       current_request_period_starts = now;
       current_request_period_starts = now;
       break;
       break;
     }
     }
+    /* Also discard all items in the client history that are too old.
+     * (This only works here because bridge and directory stats are
+     * independent. Otherwise, we'd only want to discard those items
+     * with action GEOIP_CLIENT_NETWORKSTATUS{_V2}.) */
+    geoip_remove_old_clients(current_request_period_starts);
+    /* Before rotating, write the current stats to disk. */
+    dump_geoip_stats();
+    /* Now rotate request period */
     SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
     SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
         memmove(&c->n_v2_ns_requests[0], &c->n_v2_ns_requests[1],
         memmove(&c->n_v2_ns_requests[0], &c->n_v2_ns_requests[1],
                 sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
                 sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
@@ -458,9 +466,13 @@ char *
 geoip_get_client_history(time_t now, geoip_client_action_t action)
 geoip_get_client_history(time_t now, geoip_client_action_t action)
 {
 {
   char *result = NULL;
   char *result = NULL;
+  unsigned min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
+#ifdef ENABLE_GEOIP_STATS
+  min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
+#endif
   if (!geoip_is_loaded())
   if (!geoip_is_loaded())
     return NULL;
     return NULL;
-  if (client_history_starts < (now - GEOIP_MIN_OBSERVATION_TIME)) {
+  if (client_history_starts < (now - min_observation_time)) {
     char buf[32];
     char buf[32];
     smartlist_t *chunks = NULL;
     smartlist_t *chunks = NULL;
     smartlist_t *entries = NULL;
     smartlist_t *entries = NULL;
@@ -471,8 +483,7 @@ geoip_get_client_history(time_t now, geoip_client_action_t action)
     unsigned total = 0;
     unsigned total = 0;
     unsigned granularity = IP_GRANULARITY;
     unsigned granularity = IP_GRANULARITY;
 #ifdef ENABLE_GEOIP_STATS
 #ifdef ENABLE_GEOIP_STATS
-    if (get_options()->DirRecordUsageByCountry)
-      granularity = get_options()->DirRecordUsageGranularity;
+    granularity = DIR_RECORD_USAGE_GRANULARITY;
 #endif
 #endif
     HT_FOREACH(ent, clientmap, &client_history) {
     HT_FOREACH(ent, clientmap, &client_history) {
       int country;
       int country;
@@ -538,12 +549,13 @@ geoip_get_request_history(time_t now, geoip_client_action_t action)
   smartlist_t *entries, *strings;
   smartlist_t *entries, *strings;
   char *result;
   char *result;
   unsigned granularity = IP_GRANULARITY;
   unsigned granularity = IP_GRANULARITY;
+  unsigned min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
 #ifdef ENABLE_GEOIP_STATS
 #ifdef ENABLE_GEOIP_STATS
-  if (get_options()->DirRecordUsageByCountry)
-    granularity = get_options()->DirRecordUsageGranularity;
+  granularity = DIR_RECORD_USAGE_GRANULARITY;
+  min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
 #endif
 #endif
 
 
-  if (client_history_starts >= (now - GEOIP_MIN_OBSERVATION_TIME))
+  if (client_history_starts >= (now - min_observation_time))
     return NULL;
     return NULL;
   if (action != GEOIP_CLIENT_NETWORKSTATUS &&
   if (action != GEOIP_CLIENT_NETWORKSTATUS &&
       action != GEOIP_CLIENT_NETWORKSTATUS_V2)
       action != GEOIP_CLIENT_NETWORKSTATUS_V2)
@@ -584,7 +596,7 @@ geoip_get_request_history(time_t now, geoip_client_action_t action)
 }
 }
 
 
 /** Store all our geoip statistics into $DATADIR/geoip-stats. */
 /** Store all our geoip statistics into $DATADIR/geoip-stats. */
-void
+static void
 dump_geoip_stats(void)
 dump_geoip_stats(void)
 {
 {
 #ifdef ENABLE_GEOIP_STATS
 #ifdef ENABLE_GEOIP_STATS
@@ -601,7 +613,7 @@ dump_geoip_stats(void)
   data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
   data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
   format_iso_time(since, geoip_get_history_start());
   format_iso_time(since, geoip_get_history_start());
   format_iso_time(written, now);
   format_iso_time(written, now);
-  out = start_writing_to_stdio_file(filename, OPEN_FLAGS_REPLACE,
+  out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
                                     0600, &open_file);
                                     0600, &open_file);
   if (!out)
   if (!out)
     goto done;
     goto done;

+ 0 - 8
src/or/main.c

@@ -807,7 +807,6 @@ run_scheduled_events(time_t now)
   static time_t time_to_clean_caches = 0;
   static time_t time_to_clean_caches = 0;
   static time_t time_to_recheck_bandwidth = 0;
   static time_t time_to_recheck_bandwidth = 0;
   static time_t time_to_check_for_expired_networkstatus = 0;
   static time_t time_to_check_for_expired_networkstatus = 0;
-  static time_t time_to_dump_geoip_stats = 0;
   static time_t time_to_retry_dns_init = 0;
   static time_t time_to_retry_dns_init = 0;
   or_options_t *options = get_options();
   or_options_t *options = get_options();
   int i;
   int i;
@@ -935,13 +934,6 @@ run_scheduled_events(time_t now)
     time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL;
     time_to_check_for_expired_networkstatus = now + CHECK_EXPIRED_NS_INTERVAL;
   }
   }
 
 
-  if (time_to_dump_geoip_stats < now) {
-#define DUMP_GEOIP_STATS_INTERVAL (60*60);
-    if (time_to_dump_geoip_stats)
-      dump_geoip_stats();
-    time_to_dump_geoip_stats = now + DUMP_GEOIP_STATS_INTERVAL;
-  }
-
   /* Remove old information from rephist and the rend cache. */
   /* Remove old information from rephist and the rend cache. */
   if (time_to_clean_caches < now) {
   if (time_to_clean_caches < now) {
     rep_history_clean(now - options->RephistTrackTime);
     rep_history_clean(now - options->RephistTrackTime);

+ 10 - 2
src/or/or.h

@@ -2528,7 +2528,7 @@ typedef struct {
    * the bridge authority guess which countries have blocked access to us. */
    * the bridge authority guess which countries have blocked access to us. */
   int BridgeRecordUsageByCountry;
   int BridgeRecordUsageByCountry;
 
 
-#ifdef ENABLE_GEOIP_STATS
+#if 0
   /** If true, and Tor is built with GEOIP_STATS support, and we're a
   /** If true, and Tor is built with GEOIP_STATS support, and we're a
    * directory, record how many directory requests we get from each country. */
    * directory, record how many directory requests we get from each country. */
   int DirRecordUsageByCountry;
   int DirRecordUsageByCountry;
@@ -3583,6 +3583,15 @@ int dnsserv_launch_request(const char *name, int is_reverse);
 
 
 /********************************* geoip.c **************************/
 /********************************* geoip.c **************************/
 
 
+/** Round all GeoIP results to the next multiple of this value, to avoid
+ * leaking information. */
+#define DIR_RECORD_USAGE_GRANULARITY 8
+/** Time interval: Flush geoip data to disk this often. */
+#define DIR_RECORD_USAGE_RETAIN_IPS (24*60*60)
+/** How long do we have to have observed per-country request history before
+ * we are willing to talk about it? */
+#define DIR_RECORD_USAGE_MIN_OBSERVATION_TIME (24*60*60)
+
 #ifdef GEOIP_PRIVATE
 #ifdef GEOIP_PRIVATE
 int geoip_parse_entry(const char *line);
 int geoip_parse_entry(const char *line);
 #endif
 #endif
@@ -3614,7 +3623,6 @@ char *geoip_get_request_history(time_t now, geoip_client_action_t action);
 int getinfo_helper_geoip(control_connection_t *control_conn,
 int getinfo_helper_geoip(control_connection_t *control_conn,
                          const char *question, char **answer);
                          const char *question, char **answer);
 void geoip_free_all(void);
 void geoip_free_all(void);
-void dump_geoip_stats(void);
 
 
 /********************************* hibernate.c **********************/
 /********************************* hibernate.c **********************/
 
 

+ 5 - 2
src/or/router.c

@@ -1915,10 +1915,13 @@ extrainfo_get_client_geoip_summary(time_t now)
   static time_t last_purged_at = 0;
   static time_t last_purged_at = 0;
   int geoip_purge_interval = 48*60*60;
   int geoip_purge_interval = 48*60*60;
 #ifdef ENABLE_GEOIP_STATS
 #ifdef ENABLE_GEOIP_STATS
-  if (get_options()->DirRecordUsageByCountry)
-    geoip_purge_interval = get_options()->DirRecordUsageRetainIPs;
+  geoip_purge_interval = DIR_RECORD_USAGE_RETAIN_IPS;
 #endif
 #endif
   if (now > last_purged_at+geoip_purge_interval) {
   if (now > last_purged_at+geoip_purge_interval) {
+    /* (Note that this also discards items in the client history with
+     * action GEOIP_CLIENT_NETWORKSTATUS{_V2}, which doesn't matter
+     * because bridge and directory stats are independent. Keep in mind
+     * for future extensions, though.) */
     geoip_remove_old_clients(now-geoip_purge_interval);
     geoip_remove_old_clients(now-geoip_purge_interval);
     last_purged_at = now;
     last_purged_at = now;
   }
   }