| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667 | /* Copyright (c) 2007-2008, The Tor Project, Inc. *//* See LICENSE for licensing information *//* $Id: /tor/trunk/src/or/networkstatus.c 15493 2007-12-16T18:33:25.055570Z nickm  $ */const char geoip_c_id[] =  "$Id: /tor/trunk/src/or/networkstatus.c 15493 2007-12-16T18:33:25.055570Z nickm  $";/** * \file geoip.c * \brief Functions related to maintaining an IP-to-country database and to *    summarizing client connections by country. */#define GEOIP_PRIVATE#include "or.h"#include "ht.h"static void clear_geoip_db(void);/** An entry from the GeoIP file: maps an IP range to a country. */typedef struct geoip_entry_t {  uint32_t ip_low; /**< The lowest IP in the range, in host order */  uint32_t ip_high; /**< The highest IP in the range, in host order */  intptr_t country; /**< An index into geoip_countries */} geoip_entry_t;/** DOCDOC */#define REQUEST_HIST_LEN 3#define REQUEST_HIST_PERIOD (8*60*60)typedef struct geoip_country_t {  char countrycode[3];  uint32_t n_v2_ns_requests[REQUEST_HIST_LEN];  uint32_t n_v3_ns_requests[REQUEST_HIST_LEN];} geoip_country_t;/** A list of geoip_country_t */static smartlist_t *geoip_countries = NULL;/** A map from lowercased country codes to their position in geoip_countries. * The index is encoded in the pointer, and 1 is added so that NULL can mean * not found. */static strmap_t *country_idxplus1_by_lc_code = NULL;/** A list of all known geoip_entry_t, sorted by ip_low. */static smartlist_t *geoip_entries = NULL;/** Add an entry to the GeoIP table, mapping all IPs between <b>low</b> and * <b>high</b>, inclusive, to the 2-letter country code <b>country</b>. */static voidgeoip_add_entry(uint32_t low, uint32_t high, const char *country){  intptr_t idx;  geoip_entry_t *ent;  void *_idxplus1;  if (high < low)    return;  _idxplus1 = strmap_get_lc(country_idxplus1_by_lc_code, country);  if (!_idxplus1) {    geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t));    strlcpy(c->countrycode, country, sizeof(c->countrycode));    tor_strlower(c->countrycode);    smartlist_add(geoip_countries, c);    idx = smartlist_len(geoip_countries) - 1;    strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));  } else {    idx = ((uintptr_t)_idxplus1)-1;  }  {    geoip_country_t *c = smartlist_get(geoip_countries, idx);    tor_assert(!strcasecmp(c->countrycode, country));  }  ent = tor_malloc_zero(sizeof(geoip_entry_t));  ent->ip_low = low;  ent->ip_high = high;  ent->country = idx;  smartlist_add(geoip_entries, ent);}/** Add an entry to the GeoIP table, parsing it from <b>line</b>.  The * format is as for geoip_load_file(). *//*private*/ intgeoip_parse_entry(const char *line){  unsigned int low, high;  char b[3];  if (!geoip_countries) {    geoip_countries = smartlist_create();    geoip_entries = smartlist_create();    country_idxplus1_by_lc_code = strmap_new();  }  while (TOR_ISSPACE(*line))    ++line;  if (*line == '#')    return 0;  if (sscanf(line,"%u,%u,%2s", &low, &high, b) == 3) {    geoip_add_entry(low, high, b);    return 0;  } else if (sscanf(line,"\"%u\",\"%u\",\"%2s\",", &low, &high, b) == 3) {    geoip_add_entry(low, high, b);    return 0;  } else {    log_warn(LD_GENERAL, "Unable to parse line from GEOIP file: %s",             escaped(line));    return -1;  }}/** Sorting helper: return -1, 1, or 0 based on comparison of two * geoip_entry_t */static int_geoip_compare_entries(const void **_a, const void **_b){  const geoip_entry_t *a = *_a, *b = *_b;  if (a->ip_low < b->ip_low)    return -1;  else if (a->ip_low > b->ip_low)    return 1;  else    return 0;}/** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer * to a uint32_t in host order) to a geoip_entry_t */static int_geoip_compare_key_to_entry(const void *_key, const void **_member){  const uint32_t addr = *(uint32_t *)_key;  const geoip_entry_t *entry = *_member;  if (addr < entry->ip_low)    return -1;  else if (addr > entry->ip_high)    return 1;  else    return 0;}/** Return 1 if we should collect geoip stats on bridge users, and * include them in our extrainfo descriptor. Else return 0. */intshould_record_bridge_info(or_options_t *options){  return options->BridgeRelay && options->BridgeRecordUsageByCountry;}/** Clear the GeoIP database and reload it from the file * <b>filename</b>. Return 0 on success, -1 on failure. * * Recognized line formats are: *   INTIPLOW,INTIPHIGH,CC * and *   "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME" * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned * integers, and CC is a country code. * * It also recognizes, and skips over, blank lines and lines that start * with '#' (comments). */intgeoip_load_file(const char *filename, or_options_t *options){  FILE *f;  int severity = should_record_bridge_info(options) ? LOG_WARN : LOG_INFO;  clear_geoip_db();  if (!(f = fopen(filename, "r"))) {    log_fn(severity, LD_GENERAL, "Failed to open GEOIP file %s.", filename);    return -1;  }  geoip_countries = smartlist_create();  geoip_entries = smartlist_create();  country_idxplus1_by_lc_code = strmap_new();  log_info(LD_GENERAL, "Parsing GEOIP file.");  while (!feof(f)) {    char buf[512];    if (fgets(buf, (int)sizeof(buf), f) == NULL)      break;    /* FFFF track full country name. */    geoip_parse_entry(buf);  }  /*XXXX020 abort and return -1 if no entries/illformed?*/  fclose(f);  smartlist_sort(geoip_entries, _geoip_compare_entries);  return 0;}/** Given an IP address in host order, return a number representing the * country to which that address belongs, or -1 for unknown.  The return value * will always be less than geoip_get_n_countries().  To decode it, * call geoip_get_country_name(). */intgeoip_get_country_by_ip(uint32_t ipaddr){  geoip_entry_t *ent;  if (!geoip_entries)    return -1;  ent = smartlist_bsearch(geoip_entries, &ipaddr, _geoip_compare_key_to_entry);  return ent ? (int)ent->country : -1;}/** Return the number of countries recognized by the GeoIP database. */intgeoip_get_n_countries(void){  return (int) smartlist_len(geoip_countries);}/** Return the two-letter country code associated with the number <b>num</b>, * or "??" for an unknown value. */const char *geoip_get_country_name(int num){  if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) {    geoip_country_t *c = smartlist_get(geoip_countries, num);    return c->countrycode;  } else    return "??";}/** Return true iff we have loaded a GeoIP database.*/intgeoip_is_loaded(void){  return geoip_countries != NULL && geoip_entries != NULL;}/** Entry in a map from IP address to the last time we've seen an incoming * connection from that IP address. Used by bridges only, to track which * countries have them blocked. */typedef struct clientmap_entry_t {  HT_ENTRY(clientmap_entry_t) node;  uint32_t ipaddr;  time_t last_seen; /* The last 2 bits of this value hold the client                     * operation. */} clientmap_entry_t;#define ACTION_MASK 3/** Map from client IP address to last time seen. */static HT_HEAD(clientmap, clientmap_entry_t) client_history =     HT_INITIALIZER();/** Time at which we started tracking client IP history. */static time_t client_history_starts = 0;/** DOCDOC */static time_t current_request_period_starts = 0;static int n_old_request_periods = 0;/** Hashtable helper: compute a hash of a clientmap_entry_t. */static INLINE unsignedclientmap_entry_hash(const clientmap_entry_t *a){  return ht_improve_hash((unsigned) a->ipaddr);}/** Hashtable helper: compare two clientmap_entry_t values for equality. */static INLINE intclientmap_entries_eq(const clientmap_entry_t *a, const clientmap_entry_t *b){  return a->ipaddr == b->ipaddr;}HT_PROTOTYPE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,             clientmap_entries_eq);HT_GENERATE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,            clientmap_entries_eq, 0.6, malloc, realloc, free);/** Note that we've seen a client connect from the IP <b>addr</b> (host order) * at time <b>now</b>. Ignored by all but bridges. */voidgeoip_note_client_seen(geoip_client_action_t action,                       uint32_t addr, time_t now){  or_options_t *options = get_options();  clientmap_entry_t lookup, *ent;  if (action == GEOIP_CLIENT_CONNECT) {    if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))      return;  } else {#ifndef ENABLE_GEOIP_STATS    return;#else    if (options->BridgeRelay || options->BridgeAuthoritativeDir ||        !options->DirRecordUsageByCountry)      return;#endif  }  /* DOCDOC */  while (current_request_period_starts + REQUEST_HIST_PERIOD < now) {    if (!geoip_countries)      geoip_countries = smartlist_create();    if (!current_request_period_starts) {      current_request_period_starts = now;      break;    }    SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {        memmove(&c->n_v2_ns_requests[0], &c->n_v2_ns_requests[1],                sizeof(uint32_t)*(REQUEST_HIST_LEN-1));        memmove(&c->n_v3_ns_requests[0], &c->n_v3_ns_requests[1],                sizeof(uint32_t)*(REQUEST_HIST_LEN-1));        c->n_v2_ns_requests[REQUEST_HIST_LEN-1] = 0;        c->n_v3_ns_requests[REQUEST_HIST_LEN-1] = 0;      });    current_request_period_starts += REQUEST_HIST_PERIOD;    if (n_old_request_periods < REQUEST_HIST_LEN-1)      ++n_old_request_periods;   }  /* We use the low 3 bits of the time to encode the action. Since we're   * potentially remembering tons of clients, we don't want to make   * clientmap_entry_t larger than it has to be. */  now = (now & ~ACTION_MASK) | (((int)action) & ACTION_MASK);  lookup.ipaddr = addr;  ent = HT_FIND(clientmap, &client_history, &lookup);  if (ent) {    ent->last_seen = now;  } else {    ent = tor_malloc_zero(sizeof(clientmap_entry_t));    ent->ipaddr = addr;    ent->last_seen = now;    HT_INSERT(clientmap, &client_history, ent);  }  if (action == GEOIP_CLIENT_NETWORKSTATUS ||      action == GEOIP_CLIENT_NETWORKSTATUS_V2) {    int country_idx = geoip_get_country_by_ip(addr);    if (country_idx >= 0 && country_idx < smartlist_len(geoip_countries)) {      geoip_country_t *country = smartlist_get(geoip_countries, country_idx);      if (action == GEOIP_CLIENT_NETWORKSTATUS)        ++country->n_v3_ns_requests[REQUEST_HIST_LEN-1];      else        ++country->n_v2_ns_requests[REQUEST_HIST_LEN-1];    }  }  if (!client_history_starts) {    client_history_starts = now;    current_request_period_starts = now;  }}/** HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's * older than a certain time. */static int_remove_old_client_helper(struct clientmap_entry_t *ent, void *_cutoff){  time_t cutoff = *(time_t*)_cutoff;  if (ent->last_seen < cutoff) {    tor_free(ent);    return 1;  } else {    return 0;  }}/** Forget about all clients that haven't connected since <b>cutoff</b>. */voidgeoip_remove_old_clients(time_t cutoff){  clientmap_HT_FOREACH_FN(&client_history,                          _remove_old_client_helper,                          &cutoff);  if (client_history_starts < cutoff)    client_history_starts = cutoff;}/** Do not mention any country from which fewer than this number of IPs have * connected.  This conceivably avoids reporting information that could * deanonymize users, though analysis is lacking. */#define MIN_IPS_TO_NOTE_COUNTRY 0/** Do not report any geoip data at all if we have fewer than this number of * IPs to report about. */#define MIN_IPS_TO_NOTE_ANYTHING 0/** When reporting geoip data about countries, round up to the nearest * multiple of this value. */#define IP_GRANULARITY 8/** Return the time at which we started recording geoip data. */time_tgeoip_get_history_start(void){  return client_history_starts;}/** Helper type: used to sort per-country totals by value. */typedef struct c_hist_t {  char country[3]; /**< Two-letter country code. */  unsigned total; /**< Total IP addresses seen in this country. */} c_hist_t;/** Sorting helper: return -1, 1, or 0 based on comparison of two * geoip_entry_t.  Sort in descending order of total, and then by country * code. */static int_c_hist_compare(const void **_a, const void **_b){  const c_hist_t *a = *_a, *b = *_b;  if (a->total > b->total)    return -1;  else if (a->total < b->total)    return 1;  else    return strcmp(a->country, b->country);}/*DOCDOC*/#define GEOIP_MIN_OBSERVATION_TIME (12*60*60)static INLINE unsignedround_to_next_multiple_of(unsigned number, unsigned divisor){  number += divisor - 1;  number -= number % divisor;  return number;}/** Return a newly allocated comma-separated string containing entries for all * the countries from which we've seen enough clients connect. The entry * format is cc=num where num is the number of IPs we've seen connecting from * that country, and cc is a lowercased country code.  Returns NULL if we don't * want to export geoip data yet. */char *geoip_get_client_history(time_t now, geoip_client_action_t action){  char *result = NULL;  if (!geoip_is_loaded())    return NULL;  if (client_history_starts < (now - GEOIP_MIN_OBSERVATION_TIME)) {    char buf[32];    smartlist_t *chunks = NULL;    smartlist_t *entries = NULL;    int n_countries = geoip_get_n_countries();    int i;    clientmap_entry_t **ent;    unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);    unsigned total = 0;    unsigned granularity = IP_GRANULARITY;#ifdef ENABLE_GEOIP_STATS    if (get_options()->DirRecordUsageByCountry)      granularity = get_options()->DirRecordUsageGranularity;#endif    HT_FOREACH(ent, clientmap, &client_history) {      int country;      if (((*ent)->last_seen & ACTION_MASK) != action)        continue;      country = geoip_get_country_by_ip((*ent)->ipaddr);      if (country < 0)        continue;      tor_assert(0 <= country && country < n_countries);      ++counts[country];      ++total;    }    /* Don't record anything if we haven't seen enough IPs. */#if (MIN_IPS_TO_NOTE_ANYTHING > 0)    if (total < MIN_IPS_TO_NOTE_ANYTHING)      goto done;#endif    /* Make a list of c_hist_t */    entries = smartlist_create();    for (i = 0; i < n_countries; ++i) {      unsigned c = counts[i];      const char *countrycode;      c_hist_t *ent;      /* Only report a country if it has a minimum number of IPs. */#if (MIN_IPS_TO_NOTE_COUNTRY > 0)      if (c >= MIN_IPS_TO_NOTE_COUNTRY) {#else      if (c > 0) {#endif        c = round_to_next_multiple_of(c, granularity);        countrycode = geoip_get_country_name(i);        ent = tor_malloc(sizeof(c_hist_t));        strlcpy(ent->country, countrycode, sizeof(ent->country));        ent->total = c;        smartlist_add(entries, ent);      }    }    /* Sort entries. Note that we must do this _AFTER_ rounding, or else     * the sort order could leak info. */    smartlist_sort(entries, _c_hist_compare);    /* Build the result. */    chunks = smartlist_create();    SMARTLIST_FOREACH(entries, c_hist_t *, ch, {        tor_snprintf(buf, sizeof(buf), "%s=%u", ch->country, ch->total);        smartlist_add(chunks, tor_strdup(buf));      });    result = smartlist_join_strings(chunks, ",", 0, NULL);#if (MIN_IPS_TO_NOTE_ANYTHING > 0)  done:#endif    tor_free(counts);    if (chunks) {      SMARTLIST_FOREACH(chunks, char *, c, tor_free(c));      smartlist_free(chunks);    }    if (entries) {      SMARTLIST_FOREACH(entries, c_hist_t *, c, tor_free(c));      smartlist_free(entries);    }  }  return result;}/**DOCDOC*/char *geoip_get_request_history(time_t now, geoip_client_action_t action){  smartlist_t *entries, *strings;  char *result;  unsigned granularity = IP_GRANULARITY;#ifdef ENABLE_GEOIP_STATS  if (get_options()->DirRecordUsageByCountry)    granularity = get_options()->DirRecordUsageGranularity;#endif  if (client_history_starts >= (now - GEOIP_MIN_OBSERVATION_TIME))    return NULL;  if (action != GEOIP_CLIENT_NETWORKSTATUS &&      action != GEOIP_CLIENT_NETWORKSTATUS_V2)    return NULL;  if (!geoip_countries)    return NULL;  entries = smartlist_create();  SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {      uint32_t *n = (action == GEOIP_CLIENT_NETWORKSTATUS)        ? c->n_v3_ns_requests : c->n_v2_ns_requests;      uint32_t tot = 0;      int i;      c_hist_t *ent;      for (i=0; i < REQUEST_HIST_LEN; ++i)        tot += n[i];      if (!tot)        continue;      ent = tor_malloc_zero(sizeof(c_hist_t));      strlcpy(ent->country, c->countrycode, sizeof(ent->country));      ent->total = round_to_next_multiple_of(tot, granularity);      smartlist_add(entries, ent);  });  smartlist_sort(entries, _c_hist_compare);  strings = smartlist_create();  SMARTLIST_FOREACH(entries, c_hist_t *, ent, {      char buf[32];      tor_snprintf(buf, sizeof(buf), "%s=%u", ent->country, ent->total);      smartlist_add(strings, tor_strdup(buf));    });  result = smartlist_join_strings(strings, ",", 0, NULL);  SMARTLIST_FOREACH(strings, char *, cp, tor_free(cp));  SMARTLIST_FOREACH(entries, c_hist_t *, ent, tor_free(ent));  smartlist_free(strings);  smartlist_free(entries);  return result;}voiddump_geoip_stats(void){#ifdef ENABLE_GEOIP_STATS  time_t now = time(NULL);  time_t request_start;  char *filename = get_datadir_fname("geoip-stats");  char *data_v2 = NULL, *data_v3 = NULL;  char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];  open_file_t *open_file = NULL;  double v2_share = 0.0, v3_share = 0.0;  FILE *out;  data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);  data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);  format_iso_time(since, geoip_get_history_start());  format_iso_time(written, now);  out = start_writing_to_stdio_file(filename, OPEN_FLAGS_REPLACE,                                    0600, &open_file);  if (!out)    goto done;  if (fprintf(out, "written %s\nstarted-at %s\nns-ips %s\nns-v2-ips %s\n",              written, since,              data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)    goto done;  tor_free(data_v2);  tor_free(data_v3);  request_start = current_request_period_starts -    (n_old_request_periods * REQUEST_HIST_PERIOD);  format_iso_time(since, request_start);  data_v2 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);  data_v3 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS);  if (fprintf(out, "requests-start %s\nn-ns-reqs %s\nn-v2-ns-reqs %s\n",              since,              data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)    goto done;  if (!router_get_my_share_of_directory_requests(&v2_share, &v3_share)) {    if (fprintf(out, "v2-ns-share %0.2lf%%\n", v2_share*100) < 0)      goto done;    if (fprintf(out, "v3-ns-share %0.2lf%%\n", v3_share*100) < 0)      goto done;  }  finish_writing_to_file(open_file);  open_file = NULL; done:  if (open_file)    abort_writing_to_file(open_file);  tor_free(filename);  tor_free(data_v2);  tor_free(data_v3);#endif}/** Helper used to implement GETINFO ip-to-country/... controller command. */intgetinfo_helper_geoip(control_connection_t *control_conn,                     const char *question, char **answer){  (void)control_conn;  if (geoip_is_loaded() && !strcmpstart(question, "ip-to-country/")) {    int c;    uint32_t ip;    struct in_addr in;    question += strlen("ip-to-country/");    if (tor_inet_aton(question, &in) != 0) {      ip = ntohl(in.s_addr);      c = geoip_get_country_by_ip(ip);      *answer = tor_strdup(geoip_get_country_name(c));    }  }  return 0;}/** Release all storage held by the GeoIP database. */static voidclear_geoip_db(void){  if (geoip_countries) {    SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, tor_free(c));    smartlist_free(geoip_countries);  }  if (country_idxplus1_by_lc_code)    strmap_free(country_idxplus1_by_lc_code, NULL);  if (geoip_entries) {    SMARTLIST_FOREACH(geoip_entries, geoip_entry_t *, ent, tor_free(ent));    smartlist_free(geoip_entries);  }  geoip_countries = NULL;  country_idxplus1_by_lc_code = NULL;  geoip_entries = NULL;}/** Release all storage held in this file. */voidgeoip_free_all(void){  clientmap_entry_t **ent, **next, *this;  for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) {    this = *ent;    next = HT_NEXT_RMV(clientmap, &client_history, ent);    tor_free(this);  }  HT_CLEAR(clientmap, &client_history);  clear_geoip_db();}
 |