geoip.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. /* Copyright (c) 2007-2008, The Tor Project, Inc. */
  2. /* See LICENSE for licensing information */
  3. /* $Id: /tor/trunk/src/or/networkstatus.c 15493 2007-12-16T18:33:25.055570Z nickm $ */
  4. const char geoip_c_id[] =
  5. "$Id: /tor/trunk/src/or/networkstatus.c 15493 2007-12-16T18:33:25.055570Z nickm $";
  6. /**
  7. * \file geoip.c
  8. * \brief Functions related to maintaining an IP-to-country database and to
  9. * summarizing client connections by country.
  10. */
  11. #define GEOIP_PRIVATE
  12. #include "or.h"
  13. #include "ht.h"
  14. static void clear_geoip_db(void);
  15. /** An entry from the GeoIP file: maps an IP range to a country. */
  16. typedef struct geoip_entry_t {
  17. uint32_t ip_low; /**< The lowest IP in the range, in host order */
  18. uint32_t ip_high; /**< The highest IP in the range, in host order */
  19. int country; /**< An index into geoip_countries */
  20. } geoip_entry_t;
  21. /** A list of lowercased two-letter country codes. */
  22. static smartlist_t *geoip_countries = NULL;
  23. /** A map from lowercased country codes to their position in geoip_countries.
  24. * The index is encoded in the pointer, and 1 is added so that NULL can mean
  25. * not found. */
  26. static strmap_t *country_idxplus1_by_lc_code = NULL;
  27. /** A list of all known geoip_entry_t, sorted by ip_low. */
  28. static smartlist_t *geoip_entries = NULL;
  29. /** Add an entry to the GeoIP table, mapping all IPs between <b>low</b> and
  30. * <b>high</b>, inclusive, to the 2-letter country code <b>country</b>.
  31. */
  32. static void
  33. geoip_add_entry(uint32_t low, uint32_t high, const char *country)
  34. {
  35. uintptr_t idx;
  36. geoip_entry_t *ent;
  37. void *_idxplus1;
  38. if (high < low)
  39. return;
  40. _idxplus1 = strmap_get_lc(country_idxplus1_by_lc_code, country);
  41. if (!_idxplus1) {
  42. char *c = tor_strdup(country);
  43. tor_strlower(c);
  44. smartlist_add(geoip_countries, c);
  45. idx = smartlist_len(geoip_countries) - 1;
  46. strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
  47. } else {
  48. idx = ((uintptr_t)_idxplus1)-1;
  49. }
  50. tor_assert(!strcasecmp(smartlist_get(geoip_countries, idx), country));
  51. ent = tor_malloc_zero(sizeof(geoip_entry_t));
  52. ent->ip_low = low;
  53. ent->ip_high = high;
  54. ent->country = idx;
  55. smartlist_add(geoip_entries, ent);
  56. }
  57. /** Add an entry to the GeoIP table, parsing it from <b>line</b>. The
  58. * format is as for geoip_load_file. */
  59. /*private*/ int
  60. geoip_parse_entry(const char *line)
  61. {
  62. unsigned int low, high;
  63. char b[3];
  64. if (!geoip_countries) {
  65. geoip_countries = smartlist_create();
  66. geoip_entries = smartlist_create();
  67. country_idxplus1_by_lc_code = strmap_new();
  68. }
  69. if (sscanf(line,"%u,%u,%2s", &low, &high, b) == 3) {
  70. geoip_add_entry(low, high, b);
  71. return 0;
  72. } else if (sscanf(line,"\"%u\",\"%u\",\"%2s\",", &low, &high, b) == 3) {
  73. geoip_add_entry(low, high, b);
  74. return 0;
  75. } else {
  76. log_warn(LD_GENERAL, "Unable to parse line from GEOIP file: %s",
  77. escaped(line));
  78. return -1;
  79. }
  80. }
  81. /** Sorting helper: return -1, 1, or 0 based on comparison of two
  82. * geoip_entry_t */
  83. static int
  84. _geoip_compare_entries(const void **_a, const void **_b)
  85. {
  86. const geoip_entry_t *a = *_a, *b = *_b;
  87. if (a->ip_low < b->ip_low)
  88. return -1;
  89. else if (a->ip_low > b->ip_low)
  90. return 1;
  91. else
  92. return 0;
  93. }
  94. /** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer
  95. * to a uint32_t in host order) to a geoip_entry_t */
  96. static int
  97. _geoip_compare_key_to_entry(const void *_key, const void **_member)
  98. {
  99. const uint32_t addr = *(uint32_t *)_key;
  100. const geoip_entry_t *entry = *_member;
  101. if (addr < entry->ip_low)
  102. return -1;
  103. else if (addr > entry->ip_high)
  104. return 1;
  105. else
  106. return 0;
  107. }
  108. /** Clear the GeoIP database and reload it from the file
  109. * <b>filename</b>. Return 0 on success, -1 on failure.
  110. *
  111. * Recognized line formats are:
  112. * INTIPLOW,INTIPHIGH,CC
  113. * and
  114. * "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
  115. * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
  116. * integers, and CC is a country code.
  117. */
  118. int
  119. geoip_load_file(const char *filename)
  120. {
  121. FILE *f;
  122. clear_geoip_db();
  123. if (!(f = fopen(filename, "r"))) {
  124. log_warn(LD_GENERAL, "Failed to open GEOIP file %s.", filename);
  125. return -1;
  126. }
  127. geoip_countries = smartlist_create();
  128. geoip_entries = smartlist_create();
  129. country_idxplus1_by_lc_code = strmap_new();
  130. log_info(LD_GENERAL, "Parsing GEOIP file.");
  131. while (!feof(f)) {
  132. char buf[512];
  133. if (fgets(buf, sizeof(buf), f) == NULL)
  134. break;
  135. /* FFFF track full country name. */
  136. geoip_parse_entry(buf);
  137. }
  138. /*XXXX020 abort and return -1 if no entries/illformed?*/
  139. fclose(f);
  140. smartlist_sort(geoip_entries, _geoip_compare_entries);
  141. return 0;
  142. }
  143. /** Given an IP address in host order, return a number representing the
  144. * country to which that address belongs, or -1 for unknown. The return value
  145. * will always be less than geoip_get_n_countries(). To decode it,
  146. * call geoip_get_country_name().
  147. */
  148. int
  149. geoip_get_country_by_ip(uint32_t ipaddr)
  150. {
  151. geoip_entry_t *ent;
  152. if (!geoip_entries)
  153. return -1;
  154. ent = smartlist_bsearch(geoip_entries, &ipaddr, _geoip_compare_key_to_entry);
  155. return ent ? ent->country : -1;
  156. }
  157. /** Return the number of countries recognized by the GeoIP database. */
  158. int
  159. geoip_get_n_countries(void)
  160. {
  161. return smartlist_len(geoip_countries);
  162. }
  163. /** Return the two-letter country code associated with the number <b>num</b>,
  164. * or "??" for an unknown value. */
  165. const char *
  166. geoip_get_country_name(int num)
  167. {
  168. if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries))
  169. return smartlist_get(geoip_countries, num);
  170. else
  171. return "??";
  172. }
  173. /** Return true iff we have loaded a GeoIP database.*/
  174. int
  175. geoip_is_loaded(void)
  176. {
  177. return geoip_countries != NULL && geoip_entries != NULL;
  178. }
  179. /** Entry in a map from IP address to the last time we've seen an incoming
  180. * connection from that IP address. Used by bridges only, to track which
  181. * countries have them blocked. */
  182. typedef struct clientmap_entry_t {
  183. HT_ENTRY(clientmap_entry_t) node;
  184. uint32_t ipaddr;
  185. time_t last_seen;
  186. } clientmap_entry_t;
  187. /** Map from client IP address to last time seen. */
  188. static HT_HEAD(clientmap, clientmap_entry_t) client_history =
  189. HT_INITIALIZER();
  190. /** Time at which we started tracking client history. */
  191. static time_t client_history_starts = 0;
  192. /** Hashtable helper: compute a hash of a clientmap_entry_t. */
  193. static INLINE unsigned
  194. clientmap_entry_hash(const clientmap_entry_t *a)
  195. {
  196. return ht_improve_hash((unsigned) a->ipaddr);
  197. }
  198. /** Hashtable helper: compare two clientmap_entry_t values for equality. */
  199. static INLINE int
  200. clientmap_entries_eq(const clientmap_entry_t *a, const clientmap_entry_t *b)
  201. {
  202. return a->ipaddr == b->ipaddr;
  203. }
  204. HT_PROTOTYPE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
  205. clientmap_entries_eq);
  206. HT_GENERATE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
  207. clientmap_entries_eq, 0.6, malloc, realloc, free);
  208. /** Note that we've seen a client connect from the IP <b>addr</b> (host order)
  209. * at time <b>now</b>. Ignored by all but bridges. */
  210. void
  211. geoip_note_client_seen(uint32_t addr, time_t now)
  212. {
  213. or_options_t *options = get_options();
  214. clientmap_entry_t lookup, *ent;
  215. if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
  216. return;
  217. lookup.ipaddr = addr;
  218. ent = HT_FIND(clientmap, &client_history, &lookup);
  219. if (ent) {
  220. ent->last_seen = now;
  221. } else {
  222. ent = tor_malloc_zero(sizeof(clientmap_entry_t));
  223. ent->ipaddr = addr;
  224. ent->last_seen = now;
  225. HT_INSERT(clientmap, &client_history, ent);
  226. }
  227. if (!client_history_starts)
  228. client_history_starts = now;
  229. }
  230. /** HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's
  231. * older than a certain time. */
  232. static int
  233. _remove_old_client_helper(struct clientmap_entry_t *ent, void *_cutoff)
  234. {
  235. time_t cutoff = *(time_t*)_cutoff;
  236. if (ent->last_seen < cutoff) {
  237. tor_free(ent);
  238. return 1;
  239. } else {
  240. return 0;
  241. }
  242. }
  243. /** Forget about all clients that haven't connected since <b>cutoff</b>. */
  244. void
  245. geoip_remove_old_clients(time_t cutoff)
  246. {
  247. clientmap_HT_FOREACH_FN(&client_history,
  248. _remove_old_client_helper,
  249. &cutoff);
  250. if (client_history_starts < cutoff)
  251. client_history_starts = cutoff;
  252. }
  253. /** Do not mention any country from which fewer than this number of IPs have
  254. * connected. This avoids reporting information that could deanonymize
  255. * users. */
  256. #define MIN_IPS_TO_NOTE_COUNTRY 8
  257. /** Do not report any geoip data at all if we have fewer than this number of
  258. * IPs to report about. */
  259. #define MIN_IPS_TO_NOTE_ANYTHING 16
  260. /** When reporting geoip data about countries, round down to the nearest
  261. * multiple of this value. */
  262. #define IP_GRANULARITY 8
  263. /** Return the time at which we started recording geoip data. */
  264. time_t
  265. geoip_get_history_start(void)
  266. {
  267. return client_history_starts;
  268. }
  269. /* Helper type: used to sort results by value. */
  270. typedef struct c_hist_t {
  271. char country[3];
  272. unsigned total;
  273. } c_hist_t;
  274. /** Sorting helper: return -1, 1, or 0 based on comparison of two
  275. * geoip_entry_t. Sort in descending order of total, and then by country
  276. * code. */
  277. static int
  278. _c_hist_compare(const void **_a, const void **_b)
  279. {
  280. const c_hist_t *a = *_a, *b = *_b;
  281. if (a->total > b->total)
  282. return -1;
  283. else if (a->total < b->total)
  284. return 1;
  285. else
  286. return strcmp(a->country, b->country);
  287. }
  288. /** Return a newly allocated comma-separated string containing entries for all
  289. * the countries from which we've seen enough clients connect. The entry
  290. * format is cc=num where num is the number of IPs we've seen connecting from
  291. * that country, and cc is a lowercased country code. Returns NULL if we don't
  292. * want to export geoip data yet. */
  293. char *
  294. geoip_get_client_history(time_t now)
  295. {
  296. char *result = NULL;
  297. if (!geoip_is_loaded())
  298. return NULL;
  299. if (client_history_starts < (now - 12*60*60)) {
  300. char buf[32];
  301. smartlist_t *chunks = NULL;
  302. smartlist_t *entries = NULL;
  303. int n_countries = geoip_get_n_countries();
  304. int i;
  305. clientmap_entry_t **ent;
  306. unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
  307. unsigned total = 0;
  308. HT_FOREACH(ent, clientmap, &client_history) {
  309. int country = geoip_get_country_by_ip((*ent)->ipaddr);
  310. if (country < 0)
  311. continue;
  312. tor_assert(0 <= country && country < n_countries);
  313. ++counts[country];
  314. ++total;
  315. }
  316. /* Don't record anything if we haven't seen enough IPs. */
  317. if (total < MIN_IPS_TO_NOTE_ANYTHING)
  318. goto done;
  319. /* Make a list of c_hist_t */
  320. entries = smartlist_create();
  321. for (i = 0; i < n_countries; ++i) {
  322. unsigned c = counts[i];
  323. const char *countrycode;
  324. c_hist_t *ent;
  325. /* Only report a country if it has a minimum number of IPs. */
  326. if (c >= MIN_IPS_TO_NOTE_COUNTRY) {
  327. /* Round up to the next multiple of IP_GRANULARITY */
  328. c += IP_GRANULARITY-1;
  329. c -= c % IP_GRANULARITY;
  330. countrycode = geoip_get_country_name(i);
  331. ent = tor_malloc(sizeof(c_hist_t));
  332. strlcpy(ent->country, countrycode, sizeof(ent->country));
  333. ent->total = c;
  334. smartlist_add(entries, ent);
  335. }
  336. }
  337. /* Sort entries. Note that we must do this _AFTER_ rounding, or else
  338. * the sort order could leak info. */
  339. smartlist_sort(entries, _c_hist_compare);
  340. /* Build the result. */
  341. chunks = smartlist_create();
  342. SMARTLIST_FOREACH(entries, c_hist_t *, ch, {
  343. tor_snprintf(buf, sizeof(buf), "%s=%u", ch->country, ch->total);
  344. smartlist_add(chunks, tor_strdup(buf));
  345. });
  346. result = smartlist_join_strings(chunks, ",", 0, NULL);
  347. done:
  348. tor_free(counts);
  349. if (chunks) {
  350. SMARTLIST_FOREACH(chunks, char *, c, tor_free(c));
  351. smartlist_free(chunks);
  352. }
  353. if (entries) {
  354. SMARTLIST_FOREACH(entries, c_hist_t *, c, tor_free(c));
  355. smartlist_free(entries);
  356. }
  357. }
  358. return result;
  359. }
  360. /** Helper used to implement GETINFO ip-to-country/... controller command. */
  361. int
  362. getinfo_helper_geoip(control_connection_t *control_conn,
  363. const char *question, char **answer)
  364. {
  365. (void)control_conn;
  366. if (geoip_is_loaded() && !strcmpstart(question, "ip-to-country/")) {
  367. int c;
  368. uint32_t ip;
  369. struct in_addr in;
  370. question += strlen("ip-to-country/");
  371. if (tor_inet_aton(question, &in) != 0) {
  372. ip = ntohl(in.s_addr);
  373. c = geoip_get_country_by_ip(ip);
  374. *answer = tor_strdup(geoip_get_country_name(c));
  375. }
  376. }
  377. return 0;
  378. }
  379. /** Release all storage held by the GeoIP database. */
  380. static void
  381. clear_geoip_db(void)
  382. {
  383. if (geoip_countries) {
  384. SMARTLIST_FOREACH(geoip_countries, char *, cp, tor_free(cp));
  385. smartlist_free(geoip_countries);
  386. }
  387. if (country_idxplus1_by_lc_code)
  388. strmap_free(country_idxplus1_by_lc_code, NULL);
  389. if (geoip_entries) {
  390. SMARTLIST_FOREACH(geoip_entries, geoip_entry_t *, ent, tor_free(ent));
  391. smartlist_free(geoip_entries);
  392. }
  393. geoip_countries = NULL;
  394. country_idxplus1_by_lc_code = NULL;
  395. geoip_entries = NULL;
  396. }
  397. /** Release all storage held in this file. */
  398. void
  399. geoip_free_all(void)
  400. {
  401. clientmap_entry_t **ent, **next, *this;
  402. for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) {
  403. this = *ent;
  404. next = HT_NEXT_RMV(clientmap, &client_history, ent);
  405. tor_free(this);
  406. }
  407. HT_CLEAR(clientmap, &client_history);
  408. clear_geoip_db();
  409. }