microdesc.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487
  1. /* Copyright (c) 2009-2011, The Tor Project, Inc. */
  2. /* See LICENSE for licensing information */
  3. #include "or.h"
  4. #include "config.h"
  5. #include "microdesc.h"
  6. #include "routerparse.h"
  7. /** A data structure to hold a bunch of cached microdescriptors. There are
  8. * two active files in the cache: a "cache file" that we mmap, and a "journal
  9. * file" that we append to. Periodically, we rebuild the cache file to hold
  10. * only the microdescriptors that we want to keep */
  11. struct microdesc_cache_t {
  12. /** Map from sha256-digest to microdesc_t for every microdesc_t in the
  13. * cache. */
  14. HT_HEAD(microdesc_map, microdesc_t) map;
  15. /** Name of the cache file. */
  16. char *cache_fname;
  17. /** Name of the journal file. */
  18. char *journal_fname;
  19. /** Mmap'd contents of the cache file, or NULL if there is none. */
  20. tor_mmap_t *cache_content;
  21. /** Number of bytes used in the journal file. */
  22. size_t journal_len;
  23. /** Number of bytes in descriptors removed as too old. */
  24. size_t bytes_dropped;
  25. /** Total bytes of microdescriptor bodies we have added to this cache */
  26. uint64_t total_len_seen;
  27. /** Total number of microdescriptors we have added to this cache */
  28. unsigned n_seen;
  29. };
  30. /** Helper: computes a hash of <b>md</b> to place it in a hash table. */
  31. static INLINE unsigned int
  32. _microdesc_hash(microdesc_t *md)
  33. {
  34. unsigned *d = (unsigned*)md->digest;
  35. #if SIZEOF_INT == 4
  36. return d[0] ^ d[1] ^ d[2] ^ d[3] ^ d[4] ^ d[5] ^ d[6] ^ d[7];
  37. #else
  38. return d[0] ^ d[1] ^ d[2] ^ d[3];
  39. #endif
  40. }
  41. /** Helper: compares <b>a</b> and </b> for equality for hash-table purposes. */
  42. static INLINE int
  43. _microdesc_eq(microdesc_t *a, microdesc_t *b)
  44. {
  45. return !memcmp(a->digest, b->digest, DIGEST256_LEN);
  46. }
  47. HT_PROTOTYPE(microdesc_map, microdesc_t, node,
  48. _microdesc_hash, _microdesc_eq);
  49. HT_GENERATE(microdesc_map, microdesc_t, node,
  50. _microdesc_hash, _microdesc_eq, 0.6,
  51. malloc, realloc, free);
  52. /** Write the body of <b>md</b> into <b>f</b>, with appropriate annotations.
  53. * On success, return the total number of bytes written, and set
  54. * *<b>annotation_len_out</b> to the number of bytes written as
  55. * annotations. */
  56. static ssize_t
  57. dump_microdescriptor(FILE *f, microdesc_t *md, size_t *annotation_len_out)
  58. {
  59. ssize_t r = 0;
  60. size_t written;
  61. /* XXXX drops unkown annotations. */
  62. if (md->last_listed) {
  63. char buf[ISO_TIME_LEN+1];
  64. char annotation[ISO_TIME_LEN+32];
  65. format_iso_time(buf, md->last_listed);
  66. tor_snprintf(annotation, sizeof(annotation), "@last-listed %s\n", buf);
  67. fputs(annotation, f);
  68. r += strlen(annotation);
  69. *annotation_len_out = r;
  70. } else {
  71. *annotation_len_out = 0;
  72. }
  73. md->off = (off_t) ftell(f);
  74. written = fwrite(md->body, 1, md->bodylen, f);
  75. if (written != md->bodylen) {
  76. log_warn(LD_DIR,
  77. "Couldn't dump microdescriptor (wrote %lu out of %lu): %s",
  78. (unsigned long)written, (unsigned long)md->bodylen,
  79. strerror(ferror(f)));
  80. return -1;
  81. }
  82. r += md->bodylen;
  83. return r;
  84. }
  85. /** Holds a pointer to the current microdesc_cache_t object, or NULL if no
  86. * such object has been allocated. */
  87. static microdesc_cache_t *the_microdesc_cache = NULL;
  88. /** Return a pointer to the microdescriptor cache, loading it if necessary. */
  89. microdesc_cache_t *
  90. get_microdesc_cache(void)
  91. {
  92. if (PREDICT_UNLIKELY(the_microdesc_cache==NULL)) {
  93. microdesc_cache_t *cache = tor_malloc_zero(sizeof(microdesc_cache_t));
  94. HT_INIT(microdesc_map, &cache->map);
  95. cache->cache_fname = get_datadir_fname("cached-microdescs");
  96. cache->journal_fname = get_datadir_fname("cached-microdescs.new");
  97. microdesc_cache_reload(cache);
  98. the_microdesc_cache = cache;
  99. }
  100. return the_microdesc_cache;
  101. }
  102. /* There are three sources of microdescriptors:
  103. 1) Generated by us while acting as a directory authority.
  104. 2) Loaded from the cache on disk.
  105. 3) Downloaded.
  106. */
  107. /** Decode the microdescriptors from the string starting at <b>s</b> and
  108. * ending at <b>eos</b>, and store them in <b>cache</b>. If <b>no-save</b>,
  109. * mark them as non-writable to disk. If <b>where</b> is SAVED_IN_CACHE,
  110. * leave their bodies as pointers to the mmap'd cache. If where is
  111. * <b>SAVED_NOWHERE</b>, do not allow annotations. Return a list of the added
  112. * microdescriptors. */
  113. smartlist_t *
  114. microdescs_add_to_cache(microdesc_cache_t *cache,
  115. const char *s, const char *eos, saved_location_t where,
  116. int no_save)
  117. {
  118. /*XXXX need an argument that sets last_listed as appropriate. */
  119. smartlist_t *descriptors, *added;
  120. const int allow_annotations = (where != SAVED_NOWHERE);
  121. const int copy_body = (where != SAVED_IN_CACHE);
  122. descriptors = microdescs_parse_from_string(s, eos,
  123. allow_annotations,
  124. copy_body);
  125. added = microdescs_add_list_to_cache(cache, descriptors, where, no_save);
  126. smartlist_free(descriptors);
  127. return added;
  128. }
  129. /* As microdescs_add_to_cache, but takes a list of micrdescriptors instead of
  130. * a string to encode. Frees any members of <b>descriptors</b> that it does
  131. * not add. */
  132. smartlist_t *
  133. microdescs_add_list_to_cache(microdesc_cache_t *cache,
  134. smartlist_t *descriptors, saved_location_t where,
  135. int no_save)
  136. {
  137. smartlist_t *added;
  138. open_file_t *open_file = NULL;
  139. FILE *f = NULL;
  140. // int n_added = 0;
  141. ssize_t size = 0;
  142. if (where == SAVED_NOWHERE && !no_save) {
  143. f = start_writing_to_stdio_file(cache->journal_fname,
  144. OPEN_FLAGS_APPEND|O_BINARY,
  145. 0600, &open_file);
  146. if (!f) {
  147. log_warn(LD_DIR, "Couldn't append to journal in %s: %s",
  148. cache->journal_fname, strerror(errno));
  149. return NULL;
  150. }
  151. }
  152. added = smartlist_create();
  153. SMARTLIST_FOREACH_BEGIN(descriptors, microdesc_t *, md) {
  154. microdesc_t *md2;
  155. md2 = HT_FIND(microdesc_map, &cache->map, md);
  156. if (md2) {
  157. /* We already had this one. */
  158. if (md2->last_listed < md->last_listed)
  159. md2->last_listed = md->last_listed;
  160. microdesc_free(md);
  161. if (where != SAVED_NOWHERE)
  162. cache->bytes_dropped += size;
  163. continue;
  164. }
  165. /* Okay, it's a new one. */
  166. if (f) {
  167. size_t annotation_len;
  168. size = dump_microdescriptor(f, md, &annotation_len);
  169. if (size < 0) {
  170. /* XXX handle errors from dump_microdescriptor() */
  171. /* log? return -1? die? coredump the universe? */
  172. continue;
  173. }
  174. md->saved_location = SAVED_IN_JOURNAL;
  175. cache->journal_len += size;
  176. } else {
  177. md->saved_location = where;
  178. }
  179. md->no_save = no_save;
  180. HT_INSERT(microdesc_map, &cache->map, md);
  181. smartlist_add(added, md);
  182. ++cache->n_seen;
  183. cache->total_len_seen += md->bodylen;
  184. } SMARTLIST_FOREACH_END(md);
  185. if (f)
  186. finish_writing_to_file(open_file); /*XXX Check me.*/
  187. microdesc_cache_rebuild(cache, 0/* only as needed */);
  188. return added;
  189. }
  190. /** Remove every microdescriptor in <b>cache</b>. */
  191. void
  192. microdesc_cache_clear(microdesc_cache_t *cache)
  193. {
  194. microdesc_t **entry, **next;
  195. for (entry = HT_START(microdesc_map, &cache->map); entry; entry = next) {
  196. microdesc_t *md = *entry;
  197. next = HT_NEXT_RMV(microdesc_map, &cache->map, entry);
  198. microdesc_free(md);
  199. }
  200. HT_CLEAR(microdesc_map, &cache->map);
  201. if (cache->cache_content) {
  202. tor_munmap_file(cache->cache_content);
  203. cache->cache_content = NULL;
  204. }
  205. cache->total_len_seen = 0;
  206. cache->n_seen = 0;
  207. }
  208. /** Reload the contents of <b>cache</b> from disk. If it is empty, load it
  209. * for the first time. Return 0 on success, -1 on failure. */
  210. int
  211. microdesc_cache_reload(microdesc_cache_t *cache)
  212. {
  213. struct stat st;
  214. char *journal_content;
  215. smartlist_t *added;
  216. tor_mmap_t *mm;
  217. int total = 0;
  218. microdesc_cache_clear(cache);
  219. mm = cache->cache_content = tor_mmap_file(cache->cache_fname);
  220. if (mm) {
  221. added = microdescs_add_to_cache(cache, mm->data, mm->data+mm->size,
  222. SAVED_IN_CACHE, 0);
  223. if (added) {
  224. total += smartlist_len(added);
  225. smartlist_free(added);
  226. }
  227. }
  228. journal_content = read_file_to_str(cache->journal_fname,
  229. RFTS_IGNORE_MISSING, &st);
  230. if (journal_content) {
  231. cache->journal_len = (size_t) st.st_size;
  232. added = microdescs_add_to_cache(cache, journal_content,
  233. journal_content+st.st_size,
  234. SAVED_IN_JOURNAL, 0);
  235. if (added) {
  236. total += smartlist_len(added);
  237. smartlist_free(added);
  238. }
  239. tor_free(journal_content);
  240. }
  241. log_notice(LD_DIR, "Reloaded microdescriptor cache. Found %d descriptors.",
  242. total);
  243. microdesc_cache_clean(cache, 0, 0);
  244. return 0;
  245. }
  246. /** By default, we remove any microdescriptors that have gone at least this
  247. * long without appearing in a current consensus. */
  248. #define TOLERATE_MICRODESC_AGE (7*24*60*60)
  249. /** Remove all microdescriptors from <b>cache</b> that haven't been listed for
  250. * a long time. Does not rebuild the cache on disk. If <b>cutoff</b> is
  251. * positive, specifically remove microdescriptors that have been unlisted
  252. * since <b>cutoff</b>. If <b>force</b> is true, remove microdescriptors even
  253. * if we have no current live microdescriptor consensus.
  254. */
  255. void
  256. microdesc_cache_clean(microdesc_cache_t *cache, time_t cutoff, int force)
  257. {
  258. microdesc_t **mdp, *victim;
  259. int dropped=0, kept=0;
  260. size_t bytes_dropped = 0;
  261. time_t now = time(NULL);
  262. (void) force;
  263. /* In 0.2.2, we let this proceed unconditionally: only authorities have
  264. * microdesc caches. */
  265. if (cutoff <= 0)
  266. cutoff = now - TOLERATE_MICRODESC_AGE;
  267. for (mdp = HT_START(microdesc_map, &cache->map); mdp != NULL; ) {
  268. if ((*mdp)->last_listed < cutoff) {
  269. ++dropped;
  270. victim = *mdp;
  271. mdp = HT_NEXT_RMV(microdesc_map, &cache->map, mdp);
  272. bytes_dropped += victim->bodylen;
  273. microdesc_free(victim);
  274. } else {
  275. ++kept;
  276. mdp = HT_NEXT(microdesc_map, &cache->map, mdp);
  277. }
  278. }
  279. if (dropped) {
  280. log_notice(LD_DIR, "Removed %d/%d microdescriptors as old.",
  281. dropped,dropped+kept);
  282. cache->bytes_dropped += bytes_dropped;
  283. }
  284. }
  285. static int
  286. should_rebuild_md_cache(microdesc_cache_t *cache)
  287. {
  288. const size_t old_len =
  289. cache->cache_content ? cache->cache_content->size : 0;
  290. const size_t journal_len = cache->journal_len;
  291. const size_t dropped = cache->bytes_dropped;
  292. if (journal_len < 16384)
  293. return 0; /* Don't bother, not enough has happened yet. */
  294. if (dropped > (journal_len + old_len) / 3)
  295. return 1; /* We could save 1/3 or more of the currently used space. */
  296. if (journal_len > old_len / 2)
  297. return 1; /* We should append to the regular file */
  298. return 0;
  299. }
  300. /** Regenerate the main cache file for <b>cache</b>, clear the journal file,
  301. * and update every microdesc_t in the cache with pointers to its new
  302. * location. If <b>force</b> is true, do this unconditionally. If
  303. * <b>force</b> is false, do it only if we expect to save space on disk. */
  304. int
  305. microdesc_cache_rebuild(microdesc_cache_t *cache, int force)
  306. {
  307. open_file_t *open_file;
  308. FILE *f;
  309. microdesc_t **mdp;
  310. smartlist_t *wrote;
  311. ssize_t size;
  312. off_t off = 0;
  313. int orig_size, new_size;
  314. /* Remove dead descriptors */
  315. microdesc_cache_clean(cache, 0/*cutoff*/, 0/*force*/);
  316. if (!force && !should_rebuild_md_cache(cache))
  317. return 0;
  318. log_info(LD_DIR, "Rebuilding the microdescriptor cache...");
  319. orig_size = (int)(cache->cache_content ? cache->cache_content->size : 0);
  320. orig_size += (int)cache->journal_len;
  321. f = start_writing_to_stdio_file(cache->cache_fname,
  322. OPEN_FLAGS_REPLACE|O_BINARY,
  323. 0600, &open_file);
  324. if (!f)
  325. return -1;
  326. wrote = smartlist_create();
  327. HT_FOREACH(mdp, microdesc_map, &cache->map) {
  328. microdesc_t *md = *mdp;
  329. size_t annotation_len;
  330. if (md->no_save)
  331. continue;
  332. size = dump_microdescriptor(f, md, &annotation_len);
  333. if (size < 0) {
  334. /* XXX handle errors from dump_microdescriptor() */
  335. /* log? return -1? die? coredump the universe? */
  336. continue;
  337. }
  338. md->off = off + annotation_len;
  339. off += size;
  340. if (md->saved_location != SAVED_IN_CACHE) {
  341. tor_free(md->body);
  342. md->saved_location = SAVED_IN_CACHE;
  343. }
  344. smartlist_add(wrote, md);
  345. }
  346. finish_writing_to_file(open_file); /*XXX Check me.*/
  347. if (cache->cache_content)
  348. tor_munmap_file(cache->cache_content);
  349. cache->cache_content = tor_mmap_file(cache->cache_fname);
  350. if (!cache->cache_content && smartlist_len(wrote)) {
  351. log_err(LD_DIR, "Couldn't map file that we just wrote to %s!",
  352. cache->cache_fname);
  353. smartlist_free(wrote);
  354. return -1;
  355. }
  356. SMARTLIST_FOREACH_BEGIN(wrote, microdesc_t *, md) {
  357. tor_assert(md->saved_location == SAVED_IN_CACHE);
  358. md->body = (char*)cache->cache_content->data + md->off;
  359. tor_assert(!memcmp(md->body, "onion-key", 9));
  360. } SMARTLIST_FOREACH_END(md);
  361. smartlist_free(wrote);
  362. write_str_to_file(cache->journal_fname, "", 1);
  363. cache->journal_len = 0;
  364. cache->bytes_dropped = 0;
  365. new_size = (int)cache->cache_content->size;
  366. log_info(LD_DIR, "Done rebuilding microdesc cache. "
  367. "Saved %d bytes; %d still used.",
  368. orig_size-new_size, new_size);
  369. return 0;
  370. }
  371. /** Deallocate a single microdescriptor. Note: the microdescriptor MUST have
  372. * previously been removed from the cache if it had ever been inserted. */
  373. void
  374. microdesc_free(microdesc_t *md)
  375. {
  376. if (!md)
  377. return;
  378. /* Must be removed from hash table! */
  379. if (md->onion_pkey)
  380. crypto_free_pk_env(md->onion_pkey);
  381. if (md->body && md->saved_location != SAVED_IN_CACHE)
  382. tor_free(md->body);
  383. if (md->family) {
  384. SMARTLIST_FOREACH(md->family, char *, cp, tor_free(cp));
  385. smartlist_free(md->family);
  386. }
  387. tor_free(md->exitsummary);
  388. tor_free(md);
  389. }
  390. /** Free all storage held in the microdesc.c module. */
  391. void
  392. microdesc_free_all(void)
  393. {
  394. if (the_microdesc_cache) {
  395. microdesc_cache_clear(the_microdesc_cache);
  396. tor_free(the_microdesc_cache->cache_fname);
  397. tor_free(the_microdesc_cache->journal_fname);
  398. tor_free(the_microdesc_cache);
  399. }
  400. }
  401. /** If there is a microdescriptor in <b>cache</b> whose sha256 digest is
  402. * <b>d</b>, return it. Otherwise return NULL. */
  403. microdesc_t *
  404. microdesc_cache_lookup_by_digest256(microdesc_cache_t *cache, const char *d)
  405. {
  406. microdesc_t *md, search;
  407. if (!cache)
  408. cache = get_microdesc_cache();
  409. memcpy(search.digest, d, DIGEST256_LEN);
  410. md = HT_FIND(microdesc_map, &cache->map, &search);
  411. return md;
  412. }
  413. /** Return the mean size of decriptors added to <b>cache</b> since it was last
  414. * cleared. Used to estimate the size of large downloads. */
  415. size_t
  416. microdesc_average_size(microdesc_cache_t *cache)
  417. {
  418. if (!cache)
  419. cache = get_microdesc_cache();
  420. if (!cache->n_seen)
  421. return 512;
  422. return (size_t)(cache->total_len_seen / cache->n_seen);
  423. }