geoip.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819
  1. /* Copyright (c) 2007-2009, The Tor Project, Inc. */
  2. /* See LICENSE for licensing information */
  3. /**
  4. * \file geoip.c
  5. * \brief Functions related to maintaining an IP-to-country database and to
  6. * summarizing client connections by country.
  7. */
  8. #define GEOIP_PRIVATE
  9. #include "or.h"
  10. #include "ht.h"
  11. static void clear_geoip_db(void);
  12. static void dump_geoip_stats(void);
  13. static void dump_entry_stats(void);
  14. /** An entry from the GeoIP file: maps an IP range to a country. */
  15. typedef struct geoip_entry_t {
  16. uint32_t ip_low; /**< The lowest IP in the range, in host order */
  17. uint32_t ip_high; /**< The highest IP in the range, in host order */
  18. intptr_t country; /**< An index into geoip_countries */
  19. } geoip_entry_t;
  20. /** For how many periods should we remember per-country request history? */
  21. #define REQUEST_HIST_LEN 1
  22. /** How long are the periods for which we should remember request history? */
  23. #define REQUEST_HIST_PERIOD (24*60*60)
  24. /** A per-country record for GeoIP request history. */
  25. typedef struct geoip_country_t {
  26. char countrycode[3];
  27. uint32_t n_v2_ns_requests[REQUEST_HIST_LEN];
  28. uint32_t n_v3_ns_requests[REQUEST_HIST_LEN];
  29. } geoip_country_t;
  30. /** A list of geoip_country_t */
  31. static smartlist_t *geoip_countries = NULL;
  32. /** A map from lowercased country codes to their position in geoip_countries.
  33. * The index is encoded in the pointer, and 1 is added so that NULL can mean
  34. * not found. */
  35. static strmap_t *country_idxplus1_by_lc_code = NULL;
  36. /** A list of all known geoip_entry_t, sorted by ip_low. */
  37. static smartlist_t *geoip_entries = NULL;
  38. /** Return the index of the <b>country</b>'s entry in the GeoIP DB
  39. * if it is a valid 2-letter country code, otherwise return -1.
  40. */
  41. country_t
  42. geoip_get_country(const char *country)
  43. {
  44. void *_idxplus1;
  45. intptr_t idx;
  46. _idxplus1 = strmap_get_lc(country_idxplus1_by_lc_code, country);
  47. if (!_idxplus1)
  48. return -1;
  49. idx = ((uintptr_t)_idxplus1)-1;
  50. return (country_t)idx;
  51. }
  52. /** Add an entry to the GeoIP table, mapping all IPs between <b>low</b> and
  53. * <b>high</b>, inclusive, to the 2-letter country code <b>country</b>.
  54. */
  55. static void
  56. geoip_add_entry(uint32_t low, uint32_t high, const char *country)
  57. {
  58. intptr_t idx;
  59. geoip_entry_t *ent;
  60. void *_idxplus1;
  61. if (high < low)
  62. return;
  63. _idxplus1 = strmap_get_lc(country_idxplus1_by_lc_code, country);
  64. if (!_idxplus1) {
  65. geoip_country_t *c = tor_malloc_zero(sizeof(geoip_country_t));
  66. strlcpy(c->countrycode, country, sizeof(c->countrycode));
  67. tor_strlower(c->countrycode);
  68. smartlist_add(geoip_countries, c);
  69. idx = smartlist_len(geoip_countries) - 1;
  70. strmap_set_lc(country_idxplus1_by_lc_code, country, (void*)(idx+1));
  71. } else {
  72. idx = ((uintptr_t)_idxplus1)-1;
  73. }
  74. {
  75. geoip_country_t *c = smartlist_get(geoip_countries, idx);
  76. tor_assert(!strcasecmp(c->countrycode, country));
  77. }
  78. ent = tor_malloc_zero(sizeof(geoip_entry_t));
  79. ent->ip_low = low;
  80. ent->ip_high = high;
  81. ent->country = idx;
  82. smartlist_add(geoip_entries, ent);
  83. }
  84. /** Add an entry to the GeoIP table, parsing it from <b>line</b>. The
  85. * format is as for geoip_load_file(). */
  86. /*private*/ int
  87. geoip_parse_entry(const char *line)
  88. {
  89. unsigned int low, high;
  90. char b[3];
  91. if (!geoip_countries) {
  92. geoip_countries = smartlist_create();
  93. geoip_entries = smartlist_create();
  94. country_idxplus1_by_lc_code = strmap_new();
  95. }
  96. while (TOR_ISSPACE(*line))
  97. ++line;
  98. if (*line == '#')
  99. return 0;
  100. if (sscanf(line,"%u,%u,%2s", &low, &high, b) == 3) {
  101. geoip_add_entry(low, high, b);
  102. return 0;
  103. } else if (sscanf(line,"\"%u\",\"%u\",\"%2s\",", &low, &high, b) == 3) {
  104. geoip_add_entry(low, high, b);
  105. return 0;
  106. } else {
  107. log_warn(LD_GENERAL, "Unable to parse line from GEOIP file: %s",
  108. escaped(line));
  109. return -1;
  110. }
  111. }
  112. /** Sorting helper: return -1, 1, or 0 based on comparison of two
  113. * geoip_entry_t */
  114. static int
  115. _geoip_compare_entries(const void **_a, const void **_b)
  116. {
  117. const geoip_entry_t *a = *_a, *b = *_b;
  118. if (a->ip_low < b->ip_low)
  119. return -1;
  120. else if (a->ip_low > b->ip_low)
  121. return 1;
  122. else
  123. return 0;
  124. }
  125. /** bsearch helper: return -1, 1, or 0 based on comparison of an IP (a pointer
  126. * to a uint32_t in host order) to a geoip_entry_t */
  127. static int
  128. _geoip_compare_key_to_entry(const void *_key, const void **_member)
  129. {
  130. const uint32_t addr = *(uint32_t *)_key;
  131. const geoip_entry_t *entry = *_member;
  132. if (addr < entry->ip_low)
  133. return -1;
  134. else if (addr > entry->ip_high)
  135. return 1;
  136. else
  137. return 0;
  138. }
  139. /** Return 1 if we should collect geoip stats on bridge users, and
  140. * include them in our extrainfo descriptor. Else return 0. */
  141. int
  142. should_record_bridge_info(or_options_t *options)
  143. {
  144. return options->BridgeRelay && options->BridgeRecordUsageByCountry;
  145. }
  146. /** Clear the GeoIP database and reload it from the file
  147. * <b>filename</b>. Return 0 on success, -1 on failure.
  148. *
  149. * Recognized line formats are:
  150. * INTIPLOW,INTIPHIGH,CC
  151. * and
  152. * "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
  153. * where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as 4-byte unsigned
  154. * integers, and CC is a country code.
  155. *
  156. * It also recognizes, and skips over, blank lines and lines that start
  157. * with '#' (comments).
  158. */
  159. int
  160. geoip_load_file(const char *filename, or_options_t *options)
  161. {
  162. FILE *f;
  163. const char *msg = "";
  164. int severity = options_need_geoip_info(options, &msg) ? LOG_WARN : LOG_INFO;
  165. clear_geoip_db();
  166. if (!(f = fopen(filename, "r"))) {
  167. log_fn(severity, LD_GENERAL, "Failed to open GEOIP file %s. %s",
  168. filename, msg);
  169. return -1;
  170. }
  171. if (!geoip_countries) {
  172. geoip_country_t *geoip_unresolved;
  173. geoip_countries = smartlist_create();
  174. /* Add a geoip_country_t for requests that could not be resolved to a
  175. * country as first element (index 0) to geoip_countries. */
  176. geoip_unresolved = tor_malloc_zero(sizeof(geoip_country_t));
  177. strlcpy(geoip_unresolved->countrycode, "??",
  178. sizeof(geoip_unresolved->countrycode));
  179. smartlist_add(geoip_countries, geoip_unresolved);
  180. country_idxplus1_by_lc_code = strmap_new();
  181. }
  182. if (geoip_entries) {
  183. SMARTLIST_FOREACH(geoip_entries, geoip_entry_t *, e, tor_free(e));
  184. smartlist_free(geoip_entries);
  185. }
  186. geoip_entries = smartlist_create();
  187. log_notice(LD_GENERAL, "Parsing GEOIP file.");
  188. while (!feof(f)) {
  189. char buf[512];
  190. if (fgets(buf, (int)sizeof(buf), f) == NULL)
  191. break;
  192. /* FFFF track full country name. */
  193. geoip_parse_entry(buf);
  194. }
  195. /*XXXX abort and return -1 if no entries/illformed?*/
  196. fclose(f);
  197. smartlist_sort(geoip_entries, _geoip_compare_entries);
  198. /* Okay, now we need to maybe change our mind about what is in which
  199. * country. */
  200. refresh_all_country_info();
  201. return 0;
  202. }
  203. /** Given an IP address in host order, return a number representing the
  204. * country to which that address belongs, or -1 for unknown. The return value
  205. * will always be less than geoip_get_n_countries(). To decode it,
  206. * call geoip_get_country_name().
  207. */
  208. int
  209. geoip_get_country_by_ip(uint32_t ipaddr)
  210. {
  211. geoip_entry_t *ent;
  212. if (!geoip_entries)
  213. return -1;
  214. ent = smartlist_bsearch(geoip_entries, &ipaddr, _geoip_compare_key_to_entry);
  215. return ent ? (int)ent->country : -1;
  216. }
  217. /** Return the number of countries recognized by the GeoIP database. */
  218. int
  219. geoip_get_n_countries(void)
  220. {
  221. return (int) smartlist_len(geoip_countries);
  222. }
  223. /** Return the two-letter country code associated with the number <b>num</b>,
  224. * or "??" for an unknown value. */
  225. const char *
  226. geoip_get_country_name(country_t num)
  227. {
  228. if (geoip_countries && num >= 0 && num < smartlist_len(geoip_countries)) {
  229. geoip_country_t *c = smartlist_get(geoip_countries, num);
  230. return c->countrycode;
  231. } else
  232. return "??";
  233. }
  234. /** Return true iff we have loaded a GeoIP database.*/
  235. int
  236. geoip_is_loaded(void)
  237. {
  238. return geoip_countries != NULL && geoip_entries != NULL;
  239. }
  240. /** Entry in a map from IP address to the last time we've seen an incoming
  241. * connection from that IP address. Used by bridges only, to track which
  242. * countries have them blocked. */
  243. typedef struct clientmap_entry_t {
  244. HT_ENTRY(clientmap_entry_t) node;
  245. uint32_t ipaddr;
  246. unsigned int last_seen_in_minutes:30;
  247. unsigned int action:2;
  248. } clientmap_entry_t;
  249. #define ACTION_MASK 3
  250. /** Map from client IP address to last time seen. */
  251. static HT_HEAD(clientmap, clientmap_entry_t) client_history =
  252. HT_INITIALIZER();
  253. /** Time at which we started tracking client IP history. */
  254. static time_t client_history_starts = 0;
  255. /** When did the current period of checking per-country request history
  256. * start? */
  257. static time_t current_request_period_starts = 0;
  258. /** How many older request periods are we remembering? */
  259. static int n_old_request_periods = 0;
  260. /** Hashtable helper: compute a hash of a clientmap_entry_t. */
  261. static INLINE unsigned
  262. clientmap_entry_hash(const clientmap_entry_t *a)
  263. {
  264. return ht_improve_hash((unsigned) a->ipaddr);
  265. }
  266. /** Hashtable helper: compare two clientmap_entry_t values for equality. */
  267. static INLINE int
  268. clientmap_entries_eq(const clientmap_entry_t *a, const clientmap_entry_t *b)
  269. {
  270. return a->ipaddr == b->ipaddr && a->action == b->action;
  271. }
  272. HT_PROTOTYPE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
  273. clientmap_entries_eq);
  274. HT_GENERATE(clientmap, clientmap_entry_t, node, clientmap_entry_hash,
  275. clientmap_entries_eq, 0.6, malloc, realloc, free);
  276. /** How often do we update our estimate which share of v2 and v3 directory
  277. * requests is sent to us? We could as well trigger updates of shares from
  278. * network status updates, but that means adding a lot of calls into code
  279. * that is independent from geoip stats (and keeping them up-to-date). We
  280. * are perfectly fine with an approximation of 15-minute granularity. */
  281. #define REQUEST_SHARE_INTERVAL (15 * 60)
  282. /** When did we last determine which share of v2 and v3 directory requests
  283. * is sent to us? */
  284. static time_t last_time_determined_shares = 0;
  285. /** Sum of products of v2 shares times the number of seconds for which we
  286. * consider these shares as valid. */
  287. static double v2_share_times_seconds;
  288. /** Sum of products of v3 shares times the number of seconds for which we
  289. * consider these shares as valid. */
  290. static double v3_share_times_seconds;
  291. /** Number of seconds we are determining v2 and v3 shares. */
  292. static int share_seconds;
  293. /** Try to determine which fraction of v2 and v3 directory requests aimed at
  294. * caches will be sent to us at time <b>now</b> and store that value in
  295. * order to take a mean value later on. */
  296. static void
  297. geoip_determine_shares(time_t now)
  298. {
  299. double v2_share = 0.0, v3_share = 0.0;
  300. if (router_get_my_share_of_directory_requests(&v2_share, &v3_share) < 0)
  301. return;
  302. if (last_time_determined_shares) {
  303. v2_share_times_seconds += v2_share *
  304. ((double) (now - last_time_determined_shares));
  305. v3_share_times_seconds += v3_share *
  306. ((double) (now - last_time_determined_shares));
  307. share_seconds += now - last_time_determined_shares;
  308. }
  309. last_time_determined_shares = now;
  310. }
  311. /** Calculate which fraction of v2 and v3 directory requests aimed at caches
  312. * have been sent to us since the last call of this function up to time
  313. * <b>now</b>. Set *<b>v2_share_out</b> and *<b>v3_share_out</b> to the
  314. * fractions of v2 and v3 protocol shares we expect to have seen. Reset
  315. * counters afterwards. Return 0 on success, -1 on failure (e.g. when zero
  316. * seconds have passed since the last call).*/
  317. static int
  318. geoip_get_mean_shares(time_t now, double *v2_share_out,
  319. double *v3_share_out)
  320. {
  321. geoip_determine_shares(now);
  322. if (!share_seconds)
  323. return -1;
  324. *v2_share_out = v2_share_times_seconds / ((double) share_seconds);
  325. *v3_share_out = v3_share_times_seconds / ((double) share_seconds);
  326. v2_share_times_seconds = v3_share_times_seconds = 0.0;
  327. share_seconds = 0;
  328. return 0;
  329. }
  330. /** Note that we've seen a client connect from the IP <b>addr</b> (host order)
  331. * at time <b>now</b>. Ignored by all but bridges and directories if
  332. * configured accordingly. */
  333. void
  334. geoip_note_client_seen(geoip_client_action_t action,
  335. uint32_t addr, time_t now)
  336. {
  337. or_options_t *options = get_options();
  338. clientmap_entry_t lookup, *ent;
  339. if (action == GEOIP_CLIENT_CONNECT) {
  340. #ifdef ENABLE_ENTRY_STATS
  341. if (!options->EntryStatistics)
  342. return;
  343. #else
  344. if (!(options->BridgeRelay && options->BridgeRecordUsageByCountry))
  345. return;
  346. #endif
  347. /* Did we recently switch from bridge to relay or back? */
  348. if (client_history_starts > now)
  349. return;
  350. } else {
  351. #ifndef ENABLE_GEOIP_STATS
  352. return;
  353. #else
  354. if (options->BridgeRelay || options->BridgeAuthoritativeDir)
  355. return;
  356. #endif
  357. }
  358. /* Rotate the current request period. */
  359. while (current_request_period_starts + REQUEST_HIST_PERIOD < now) {
  360. if (!geoip_countries)
  361. geoip_countries = smartlist_create();
  362. if (!current_request_period_starts) {
  363. current_request_period_starts = now;
  364. break;
  365. }
  366. /* Also discard all items in the client history that are too old.
  367. * (This only works here because bridge and directory stats are
  368. * independent. Otherwise, we'd only want to discard those items
  369. * with action GEOIP_CLIENT_NETWORKSTATUS{_V2}.) */
  370. geoip_remove_old_clients(current_request_period_starts);
  371. /* Before rotating, write the current stats to disk. */
  372. dump_geoip_stats();
  373. if (get_options()->EntryStatistics)
  374. dump_entry_stats();
  375. /* Now rotate request period */
  376. SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
  377. memmove(&c->n_v2_ns_requests[0], &c->n_v2_ns_requests[1],
  378. sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
  379. memmove(&c->n_v3_ns_requests[0], &c->n_v3_ns_requests[1],
  380. sizeof(uint32_t)*(REQUEST_HIST_LEN-1));
  381. c->n_v2_ns_requests[REQUEST_HIST_LEN-1] = 0;
  382. c->n_v3_ns_requests[REQUEST_HIST_LEN-1] = 0;
  383. });
  384. current_request_period_starts += REQUEST_HIST_PERIOD;
  385. if (n_old_request_periods < REQUEST_HIST_LEN-1)
  386. ++n_old_request_periods;
  387. }
  388. lookup.ipaddr = addr;
  389. lookup.action = (int)action;
  390. ent = HT_FIND(clientmap, &client_history, &lookup);
  391. if (ent) {
  392. ent->last_seen_in_minutes = now / 60;
  393. } else {
  394. ent = tor_malloc_zero(sizeof(clientmap_entry_t));
  395. ent->ipaddr = addr;
  396. ent->last_seen_in_minutes = now / 60;
  397. ent->action = (int)action;
  398. HT_INSERT(clientmap, &client_history, ent);
  399. }
  400. if (action == GEOIP_CLIENT_NETWORKSTATUS ||
  401. action == GEOIP_CLIENT_NETWORKSTATUS_V2) {
  402. int country_idx = geoip_get_country_by_ip(addr);
  403. if (country_idx < 0)
  404. country_idx = 0; /** unresolved requests are stored at index 0. */
  405. if (country_idx >= 0 && country_idx < smartlist_len(geoip_countries)) {
  406. geoip_country_t *country = smartlist_get(geoip_countries, country_idx);
  407. if (action == GEOIP_CLIENT_NETWORKSTATUS)
  408. ++country->n_v3_ns_requests[REQUEST_HIST_LEN-1];
  409. else
  410. ++country->n_v2_ns_requests[REQUEST_HIST_LEN-1];
  411. }
  412. /* Periodically determine share of requests that we should see */
  413. if (last_time_determined_shares + REQUEST_SHARE_INTERVAL < now)
  414. geoip_determine_shares(now);
  415. }
  416. if (!client_history_starts) {
  417. client_history_starts = now;
  418. current_request_period_starts = now;
  419. }
  420. }
  421. /** HT_FOREACH helper: remove a clientmap_entry_t from the hashtable if it's
  422. * older than a certain time. */
  423. static int
  424. _remove_old_client_helper(struct clientmap_entry_t *ent, void *_cutoff)
  425. {
  426. time_t cutoff = *(time_t*)_cutoff / 60;
  427. if (ent->last_seen_in_minutes < cutoff) {
  428. tor_free(ent);
  429. return 1;
  430. } else {
  431. return 0;
  432. }
  433. }
  434. /** Forget about all clients that haven't connected since <b>cutoff</b>.
  435. * If <b>cutoff</b> is in the future, clients won't be added to the history
  436. * until this time is reached. This is useful to prevent relays that switch
  437. * to bridges from reporting unbelievable numbers of clients. */
  438. void
  439. geoip_remove_old_clients(time_t cutoff)
  440. {
  441. clientmap_HT_FOREACH_FN(&client_history,
  442. _remove_old_client_helper,
  443. &cutoff);
  444. if (client_history_starts < cutoff)
  445. client_history_starts = cutoff;
  446. }
  447. /** Do not mention any country from which fewer than this number of IPs have
  448. * connected. This conceivably avoids reporting information that could
  449. * deanonymize users, though analysis is lacking. */
  450. #define MIN_IPS_TO_NOTE_COUNTRY 1
  451. /** Do not report any geoip data at all if we have fewer than this number of
  452. * IPs to report about. */
  453. #define MIN_IPS_TO_NOTE_ANYTHING 1
  454. /** When reporting geoip data about countries, round up to the nearest
  455. * multiple of this value. */
  456. #define IP_GRANULARITY 8
  457. /** Return the time at which we started recording geoip data. */
  458. time_t
  459. geoip_get_history_start(void)
  460. {
  461. return client_history_starts;
  462. }
  463. /** Helper type: used to sort per-country totals by value. */
  464. typedef struct c_hist_t {
  465. char country[3]; /**< Two-letter country code. */
  466. unsigned total; /**< Total IP addresses seen in this country. */
  467. } c_hist_t;
  468. /** Sorting helper: return -1, 1, or 0 based on comparison of two
  469. * geoip_entry_t. Sort in descending order of total, and then by country
  470. * code. */
  471. static int
  472. _c_hist_compare(const void **_a, const void **_b)
  473. {
  474. const c_hist_t *a = *_a, *b = *_b;
  475. if (a->total > b->total)
  476. return -1;
  477. else if (a->total < b->total)
  478. return 1;
  479. else
  480. return strcmp(a->country, b->country);
  481. }
  482. /** How long do we have to have observed per-country request history before we
  483. * are willing to talk about it? */
  484. #define GEOIP_MIN_OBSERVATION_TIME (12*60*60)
  485. /** Return a newly allocated comma-separated string containing entries for all
  486. * the countries from which we've seen enough clients connect. The entry
  487. * format is cc=num where num is the number of IPs we've seen connecting from
  488. * that country, and cc is a lowercased country code. Returns NULL if we don't
  489. * want to export geoip data yet. */
  490. char *
  491. geoip_get_client_history(time_t now, geoip_client_action_t action)
  492. {
  493. char *result = NULL;
  494. int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
  495. #ifdef ENABLE_GEOIP_STATS
  496. min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
  497. #endif
  498. if (!geoip_is_loaded())
  499. return NULL;
  500. if (client_history_starts < (now - min_observation_time)) {
  501. char buf[32];
  502. smartlist_t *chunks = NULL;
  503. smartlist_t *entries = NULL;
  504. int n_countries = geoip_get_n_countries();
  505. int i;
  506. clientmap_entry_t **ent;
  507. unsigned *counts = tor_malloc_zero(sizeof(unsigned)*n_countries);
  508. unsigned total = 0;
  509. unsigned granularity = IP_GRANULARITY;
  510. #ifdef ENABLE_GEOIP_STATS
  511. granularity = DIR_RECORD_USAGE_GRANULARITY;
  512. #endif
  513. HT_FOREACH(ent, clientmap, &client_history) {
  514. int country;
  515. if ((*ent)->action != (int)action)
  516. continue;
  517. country = geoip_get_country_by_ip((*ent)->ipaddr);
  518. if (country < 0)
  519. country = 0; /** unresolved requests are stored at index 0. */
  520. tor_assert(0 <= country && country < n_countries);
  521. ++counts[country];
  522. ++total;
  523. }
  524. /* Don't record anything if we haven't seen enough IPs. */
  525. if (total < MIN_IPS_TO_NOTE_ANYTHING)
  526. goto done;
  527. /* Make a list of c_hist_t */
  528. entries = smartlist_create();
  529. for (i = 0; i < n_countries; ++i) {
  530. unsigned c = counts[i];
  531. const char *countrycode;
  532. c_hist_t *ent;
  533. /* Only report a country if it has a minimum number of IPs. */
  534. if (c >= MIN_IPS_TO_NOTE_COUNTRY) {
  535. c = round_to_next_multiple_of(c, granularity);
  536. countrycode = geoip_get_country_name(i);
  537. ent = tor_malloc(sizeof(c_hist_t));
  538. strlcpy(ent->country, countrycode, sizeof(ent->country));
  539. ent->total = c;
  540. smartlist_add(entries, ent);
  541. }
  542. }
  543. /* Sort entries. Note that we must do this _AFTER_ rounding, or else
  544. * the sort order could leak info. */
  545. smartlist_sort(entries, _c_hist_compare);
  546. /* Build the result. */
  547. chunks = smartlist_create();
  548. SMARTLIST_FOREACH(entries, c_hist_t *, ch, {
  549. tor_snprintf(buf, sizeof(buf), "%s=%u", ch->country, ch->total);
  550. smartlist_add(chunks, tor_strdup(buf));
  551. });
  552. result = smartlist_join_strings(chunks, ",", 0, NULL);
  553. done:
  554. tor_free(counts);
  555. if (chunks) {
  556. SMARTLIST_FOREACH(chunks, char *, c, tor_free(c));
  557. smartlist_free(chunks);
  558. }
  559. if (entries) {
  560. SMARTLIST_FOREACH(entries, c_hist_t *, c, tor_free(c));
  561. smartlist_free(entries);
  562. }
  563. }
  564. return result;
  565. }
  566. /** Return a newly allocated string holding the per-country request history
  567. * for <b>action</b> in a format suitable for an extra-info document, or NULL
  568. * on failure. */
  569. char *
  570. geoip_get_request_history(time_t now, geoip_client_action_t action)
  571. {
  572. smartlist_t *entries, *strings;
  573. char *result;
  574. unsigned granularity = IP_GRANULARITY;
  575. int min_observation_time = GEOIP_MIN_OBSERVATION_TIME;
  576. #ifdef ENABLE_GEOIP_STATS
  577. granularity = DIR_RECORD_USAGE_GRANULARITY;
  578. min_observation_time = DIR_RECORD_USAGE_MIN_OBSERVATION_TIME;
  579. #endif
  580. if (client_history_starts >= (now - min_observation_time))
  581. return NULL;
  582. if (action != GEOIP_CLIENT_NETWORKSTATUS &&
  583. action != GEOIP_CLIENT_NETWORKSTATUS_V2)
  584. return NULL;
  585. if (!geoip_countries)
  586. return NULL;
  587. entries = smartlist_create();
  588. SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, {
  589. uint32_t *n = (action == GEOIP_CLIENT_NETWORKSTATUS)
  590. ? c->n_v3_ns_requests : c->n_v2_ns_requests;
  591. uint32_t tot = 0;
  592. int i;
  593. c_hist_t *ent;
  594. for (i=0; i < REQUEST_HIST_LEN; ++i)
  595. tot += n[i];
  596. if (!tot)
  597. continue;
  598. ent = tor_malloc_zero(sizeof(c_hist_t));
  599. strlcpy(ent->country, c->countrycode, sizeof(ent->country));
  600. ent->total = round_to_next_multiple_of(tot, granularity);
  601. smartlist_add(entries, ent);
  602. });
  603. smartlist_sort(entries, _c_hist_compare);
  604. strings = smartlist_create();
  605. SMARTLIST_FOREACH(entries, c_hist_t *, ent, {
  606. char buf[32];
  607. tor_snprintf(buf, sizeof(buf), "%s=%u", ent->country, ent->total);
  608. smartlist_add(strings, tor_strdup(buf));
  609. });
  610. result = smartlist_join_strings(strings, ",", 0, NULL);
  611. SMARTLIST_FOREACH(strings, char *, cp, tor_free(cp));
  612. SMARTLIST_FOREACH(entries, c_hist_t *, ent, tor_free(ent));
  613. smartlist_free(strings);
  614. smartlist_free(entries);
  615. return result;
  616. }
  617. /** Store all our geoip statistics into $DATADIR/geoip-stats. */
  618. static void
  619. dump_geoip_stats(void)
  620. {
  621. #ifdef ENABLE_GEOIP_STATS
  622. time_t now = time(NULL);
  623. time_t request_start;
  624. char *filename = get_datadir_fname("geoip-stats");
  625. char *data_v2 = NULL, *data_v3 = NULL;
  626. char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
  627. open_file_t *open_file = NULL;
  628. double v2_share = 0.0, v3_share = 0.0;
  629. FILE *out;
  630. data_v2 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
  631. data_v3 = geoip_get_client_history(now, GEOIP_CLIENT_NETWORKSTATUS);
  632. format_iso_time(since, geoip_get_history_start());
  633. format_iso_time(written, now);
  634. out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
  635. 0600, &open_file);
  636. if (!out)
  637. goto done;
  638. if (fprintf(out, "written %s\nstarted-at %s\nns-ips %s\nns-v2-ips %s\n",
  639. written, since,
  640. data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
  641. goto done;
  642. tor_free(data_v2);
  643. tor_free(data_v3);
  644. request_start = current_request_period_starts -
  645. (n_old_request_periods * REQUEST_HIST_PERIOD);
  646. format_iso_time(since, request_start);
  647. data_v2 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS_V2);
  648. data_v3 = geoip_get_request_history(now, GEOIP_CLIENT_NETWORKSTATUS);
  649. if (fprintf(out, "requests-start %s\nn-ns-reqs %s\nn-v2-ns-reqs %s\n",
  650. since,
  651. data_v3 ? data_v3 : "", data_v2 ? data_v2 : "") < 0)
  652. goto done;
  653. if (!geoip_get_mean_shares(now, &v2_share, &v3_share)) {
  654. if (fprintf(out, "v2-ns-share %0.2lf%%\n", v2_share*100) < 0)
  655. goto done;
  656. if (fprintf(out, "v3-ns-share %0.2lf%%\n", v3_share*100) < 0)
  657. goto done;
  658. }
  659. finish_writing_to_file(open_file);
  660. open_file = NULL;
  661. done:
  662. if (open_file)
  663. abort_writing_to_file(open_file);
  664. tor_free(filename);
  665. tor_free(data_v2);
  666. tor_free(data_v3);
  667. #endif
  668. }
  669. /** Store all our geoip statistics as entry guards into
  670. * $DATADIR/entry-stats. */
  671. static void
  672. dump_entry_stats(void)
  673. {
  674. #ifdef ENABLE_ENTRY_STATS
  675. time_t now = time(NULL);
  676. char *filename = get_datadir_fname("entry-stats");
  677. char *data = NULL;
  678. char since[ISO_TIME_LEN+1], written[ISO_TIME_LEN+1];
  679. open_file_t *open_file = NULL;
  680. FILE *out;
  681. data = geoip_get_client_history(now, GEOIP_CLIENT_CONNECT);
  682. format_iso_time(since, geoip_get_history_start());
  683. format_iso_time(written, now);
  684. out = start_writing_to_stdio_file(filename, OPEN_FLAGS_APPEND,
  685. 0600, &open_file);
  686. if (!out)
  687. goto done;
  688. if (fprintf(out, "written %s\nstarted-at %s\nips %s\n",
  689. written, since, data ? data : "") < 0)
  690. goto done;
  691. finish_writing_to_file(open_file);
  692. open_file = NULL;
  693. done:
  694. if (open_file)
  695. abort_writing_to_file(open_file);
  696. tor_free(filename);
  697. tor_free(data);
  698. #endif
  699. }
  700. /** Helper used to implement GETINFO ip-to-country/... controller command. */
  701. int
  702. getinfo_helper_geoip(control_connection_t *control_conn,
  703. const char *question, char **answer)
  704. {
  705. (void)control_conn;
  706. if (geoip_is_loaded() && !strcmpstart(question, "ip-to-country/")) {
  707. int c;
  708. uint32_t ip;
  709. struct in_addr in;
  710. question += strlen("ip-to-country/");
  711. if (tor_inet_aton(question, &in) != 0) {
  712. ip = ntohl(in.s_addr);
  713. c = geoip_get_country_by_ip(ip);
  714. *answer = tor_strdup(geoip_get_country_name(c));
  715. }
  716. }
  717. return 0;
  718. }
  719. /** Release all storage held by the GeoIP database. */
  720. static void
  721. clear_geoip_db(void)
  722. {
  723. if (geoip_countries) {
  724. SMARTLIST_FOREACH(geoip_countries, geoip_country_t *, c, tor_free(c));
  725. smartlist_free(geoip_countries);
  726. }
  727. if (country_idxplus1_by_lc_code)
  728. strmap_free(country_idxplus1_by_lc_code, NULL);
  729. if (geoip_entries) {
  730. SMARTLIST_FOREACH(geoip_entries, geoip_entry_t *, ent, tor_free(ent));
  731. smartlist_free(geoip_entries);
  732. }
  733. geoip_countries = NULL;
  734. country_idxplus1_by_lc_code = NULL;
  735. geoip_entries = NULL;
  736. }
  737. /** Release all storage held in this file. */
  738. void
  739. geoip_free_all(void)
  740. {
  741. clientmap_entry_t **ent, **next, *this;
  742. for (ent = HT_START(clientmap, &client_history); ent != NULL; ent = next) {
  743. this = *ent;
  744. next = HT_NEXT_RMV(clientmap, &client_history, ent);
  745. tor_free(this);
  746. }
  747. HT_CLEAR(clientmap, &client_history);
  748. clear_geoip_db();
  749. }