microdesc.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. /* Copyright (c) 2009, The Tor Project, Inc. */
  2. /* See LICENSE for licensing information */
  3. #include "or.h"
  4. /** A data structure to hold a bunch of cached microdescriptors. There are
  5. * two active files in the cache: a "cache file" that we mmap, and a "journal
  6. * file" that we append to. Periodically, we rebuild the cache file to hold
  7. * only the microdescriptors that we want to keep */
  8. struct microdesc_cache_t {
  9. /** Map from sha256-digest to microdesc_t for every microdesc_t in the
  10. * cache. */
  11. HT_HEAD(microdesc_map, microdesc_t) map;
  12. /** Name of the cache file. */
  13. char *cache_fname;
  14. /** Name of the journal file. */
  15. char *journal_fname;
  16. /** Mmap'd contents of the cache file, or NULL if there is none. */
  17. tor_mmap_t *cache_content;
  18. /** Number of bytes used in the journal file. */
  19. size_t journal_len;
  20. /** Total bytes of microdescriptor bodies we have added to this cache */
  21. uint64_t total_len_seen;
  22. /** Total number of microdescriptors we have added to this cache */
  23. unsigned n_seen;
  24. };
  25. /** Helper: computes a hash of <b>md</b> to place it in a hash table. */
  26. static INLINE unsigned int
  27. _microdesc_hash(microdesc_t *md)
  28. {
  29. unsigned *d = (unsigned*)md->digest;
  30. #if SIZEOF_INT == 4
  31. return d[0] ^ d[1] ^ d[2] ^ d[3] ^ d[4] ^ d[5] ^ d[6] ^ d[7];
  32. #else
  33. return d[0] ^ d[1] ^ d[2] ^ d[3];
  34. #endif
  35. }
  36. /** Helper: compares <b>a</b> and </b> for equality for hash-table purposes. */
  37. static INLINE int
  38. _microdesc_eq(microdesc_t *a, microdesc_t *b)
  39. {
  40. return !memcmp(a->digest, b->digest, DIGEST256_LEN);
  41. }
  42. HT_PROTOTYPE(microdesc_map, microdesc_t, node,
  43. _microdesc_hash, _microdesc_eq);
  44. HT_GENERATE(microdesc_map, microdesc_t, node,
  45. _microdesc_hash, _microdesc_eq, 0.6,
  46. _tor_malloc, _tor_realloc, _tor_free);
  47. /** Write the body of <b>md</b> into <b>f</b>, with appropriate annotations.
  48. * On success, return the total number of bytes written, and set
  49. * *<b>annotation_len_out</b> to the number of bytes written as
  50. * annotations. */
  51. static size_t
  52. dump_microdescriptor(FILE *f, microdesc_t *md, size_t *annotation_len_out)
  53. {
  54. size_t r = 0;
  55. /* XXXX drops unkown annotations. */
  56. if (md->last_listed) {
  57. char buf[ISO_TIME_LEN+1];
  58. char annotation[ISO_TIME_LEN+32];
  59. format_iso_time(buf, md->last_listed);
  60. tor_snprintf(annotation, sizeof(annotation), "@last-listed %s\n", buf);
  61. fputs(annotation, f);
  62. r += strlen(annotation);
  63. *annotation_len_out = r;
  64. } else {
  65. *annotation_len_out = 0;
  66. }
  67. md->off = (off_t) ftell(f);
  68. fwrite(md->body, 1, md->bodylen, f);
  69. r += md->bodylen;
  70. return r;
  71. }
  72. /** Holds a pointer to the current microdesc_cache_t object, or NULL if no
  73. * such object has been allocated. */
  74. static microdesc_cache_t *the_microdesc_cache = NULL;
  75. /** Return a pointer to the microdescriptor cache, loading it if necessary. */
  76. microdesc_cache_t *
  77. get_microdesc_cache(void)
  78. {
  79. if (PREDICT_UNLIKELY(the_microdesc_cache==NULL)) {
  80. microdesc_cache_t *cache = tor_malloc_zero(sizeof(microdesc_cache_t));
  81. HT_INIT(microdesc_map, &cache->map);
  82. cache->cache_fname = get_datadir_fname("cached-microdescs");
  83. cache->journal_fname = get_datadir_fname("cached-microdescs.new");
  84. microdesc_cache_reload(cache);
  85. the_microdesc_cache = cache;
  86. }
  87. return the_microdesc_cache;
  88. }
  89. /* There are three sources of microdescriptors:
  90. 1) Generated by us while acting as a directory authority.
  91. 2) Loaded from the cache on disk.
  92. 3) Downloaded.
  93. */
  94. /** Decode the microdescriptors from the string starting at <b>s</b> and
  95. * ending at <b>eos</b>, and store them in <b>cache</b>. If <b>no-save</b>,
  96. * mark them as non-writable to disk. If <b>where</b> is SAVED_IN_CACHE,
  97. * leave their bodies as pointers to the mmap'd cache. If where is
  98. * <b>SAVED_NOWHERE</b>, do not allow annotations. Return a list of the added
  99. * microdescriptors. */
  100. smartlist_t *
  101. microdescs_add_to_cache(microdesc_cache_t *cache,
  102. const char *s, const char *eos, saved_location_t where,
  103. int no_save)
  104. {
  105. /*XXXX need an argument that sets last_listed as appropriate. */
  106. smartlist_t *descriptors, *added;
  107. const int allow_annotations = (where != SAVED_NOWHERE);
  108. const int copy_body = (where != SAVED_IN_CACHE);
  109. descriptors = microdescs_parse_from_string(s, eos,
  110. allow_annotations,
  111. copy_body);
  112. added = microdescs_add_list_to_cache(cache, descriptors, where, no_save);
  113. smartlist_free(descriptors);
  114. return added;
  115. }
  116. /* As microdescs_add_to_cache, but takes a list of micrdescriptors instead of
  117. * a string to encode. Frees any members of <b>descriptors</b> that it does
  118. * not add. */
  119. smartlist_t *
  120. microdescs_add_list_to_cache(microdesc_cache_t *cache,
  121. smartlist_t *descriptors, saved_location_t where,
  122. int no_save)
  123. {
  124. smartlist_t *added;
  125. open_file_t *open_file = NULL;
  126. FILE *f = NULL;
  127. // int n_added = 0;
  128. size_t size = 0;
  129. if (where == SAVED_NOWHERE && !no_save) {
  130. f = start_writing_to_stdio_file(cache->journal_fname,
  131. OPEN_FLAGS_APPEND|O_BINARY,
  132. 0600, &open_file);
  133. if (!f) {
  134. log_warn(LD_DIR, "Couldn't append to journal in %s: %s",
  135. cache->journal_fname, strerror(errno));
  136. return NULL;
  137. }
  138. }
  139. added = smartlist_create();
  140. SMARTLIST_FOREACH_BEGIN(descriptors, microdesc_t *, md) {
  141. microdesc_t *md2;
  142. md2 = HT_FIND(microdesc_map, &cache->map, md);
  143. if (md2) {
  144. /* We already had this one. */
  145. if (md2->last_listed < md->last_listed)
  146. md2->last_listed = md->last_listed;
  147. microdesc_free(md);
  148. continue;
  149. }
  150. /* Okay, it's a new one. */
  151. if (f) {
  152. size_t annotation_len;
  153. size = dump_microdescriptor(f, md, &annotation_len);
  154. md->saved_location = SAVED_IN_JOURNAL;
  155. cache->journal_len += size;
  156. } else {
  157. md->saved_location = where;
  158. }
  159. md->no_save = no_save;
  160. HT_INSERT(microdesc_map, &cache->map, md);
  161. smartlist_add(added, md);
  162. ++cache->n_seen;
  163. cache->total_len_seen += md->bodylen;
  164. } SMARTLIST_FOREACH_END(md);
  165. if (f)
  166. finish_writing_to_file(open_file); /*XXX Check me.*/
  167. {
  168. size_t old_content_len =
  169. cache->cache_content ? cache->cache_content->size : 0;
  170. if (cache->journal_len > 16384 + old_content_len &&
  171. cache->journal_len > old_content_len * 2) {
  172. microdesc_cache_rebuild(cache);
  173. }
  174. }
  175. return added;
  176. }
  177. /** Remove every microdescriptor in <b>cache</b>. */
  178. void
  179. microdesc_cache_clear(microdesc_cache_t *cache)
  180. {
  181. microdesc_t **entry, **next;
  182. for (entry = HT_START(microdesc_map, &cache->map); entry; entry = next) {
  183. microdesc_t *md = *entry;
  184. next = HT_NEXT_RMV(microdesc_map, &cache->map, entry);
  185. microdesc_free(md);
  186. }
  187. HT_CLEAR(microdesc_map, &cache->map);
  188. if (cache->cache_content) {
  189. tor_munmap_file(cache->cache_content);
  190. cache->cache_content = NULL;
  191. }
  192. cache->total_len_seen = 0;
  193. cache->n_seen = 0;
  194. }
  195. /** Reload the contents of <b>cache</b> from disk. If it is empty, load it
  196. * for the first time. Return 0 on success, -1 on failure. */
  197. int
  198. microdesc_cache_reload(microdesc_cache_t *cache)
  199. {
  200. struct stat st;
  201. char *journal_content;
  202. smartlist_t *added;
  203. tor_mmap_t *mm;
  204. int total = 0;
  205. microdesc_cache_clear(cache);
  206. mm = cache->cache_content = tor_mmap_file(cache->cache_fname);
  207. if (mm) {
  208. added = microdescs_add_to_cache(cache, mm->data, mm->data+mm->size,
  209. SAVED_IN_CACHE, 0);
  210. if (added) {
  211. total += smartlist_len(added);
  212. smartlist_free(added);
  213. }
  214. }
  215. journal_content = read_file_to_str(cache->journal_fname,
  216. RFTS_IGNORE_MISSING, &st);
  217. if (journal_content) {
  218. added = microdescs_add_to_cache(cache, journal_content,
  219. journal_content+st.st_size,
  220. SAVED_IN_JOURNAL, 0);
  221. if (added) {
  222. total += smartlist_len(added);
  223. smartlist_free(added);
  224. }
  225. tor_free(journal_content);
  226. }
  227. log_notice(LD_DIR, "Reloaded microdescriptor cache. Found %d descriptors.",
  228. total);
  229. return 0;
  230. }
  231. /** Regenerate the main cache file for <b>cache</b>, clear the journal file,
  232. * and update every microdesc_t in the cache with pointers to its new
  233. * location. */
  234. int
  235. microdesc_cache_rebuild(microdesc_cache_t *cache)
  236. {
  237. open_file_t *open_file;
  238. FILE *f;
  239. microdesc_t **mdp;
  240. smartlist_t *wrote;
  241. size_t size;
  242. off_t off = 0;
  243. int orig_size, new_size;
  244. log_info(LD_DIR, "Rebuilding the microdescriptor cache...");
  245. orig_size = (int)(cache->cache_content ? cache->cache_content->size : 0);
  246. orig_size += (int)cache->journal_len;
  247. f = start_writing_to_stdio_file(cache->cache_fname,
  248. OPEN_FLAGS_REPLACE|O_BINARY,
  249. 0600, &open_file);
  250. if (!f)
  251. return -1;
  252. wrote = smartlist_create();
  253. HT_FOREACH(mdp, microdesc_map, &cache->map) {
  254. microdesc_t *md = *mdp;
  255. size_t annotation_len;
  256. if (md->no_save)
  257. continue;
  258. size = dump_microdescriptor(f, md, &annotation_len);
  259. md->off = off + annotation_len;
  260. off += size;
  261. if (md->saved_location != SAVED_IN_CACHE) {
  262. tor_free(md->body);
  263. md->saved_location = SAVED_IN_CACHE;
  264. }
  265. smartlist_add(wrote, md);
  266. }
  267. finish_writing_to_file(open_file); /*XXX Check me.*/
  268. if (cache->cache_content)
  269. tor_munmap_file(cache->cache_content);
  270. cache->cache_content = tor_mmap_file(cache->cache_fname);
  271. if (!cache->cache_content && smartlist_len(wrote)) {
  272. log_err(LD_DIR, "Couldn't map file that we just wrote to %s!",
  273. cache->cache_fname);
  274. smartlist_free(wrote);
  275. return -1;
  276. }
  277. SMARTLIST_FOREACH_BEGIN(wrote, microdesc_t *, md) {
  278. tor_assert(md->saved_location == SAVED_IN_CACHE);
  279. md->body = (char*)cache->cache_content->data + md->off;
  280. tor_assert(!memcmp(md->body, "onion-key", 9));
  281. } SMARTLIST_FOREACH_END(md);
  282. smartlist_free(wrote);
  283. write_str_to_file(cache->journal_fname, "", 1);
  284. cache->journal_len = 0;
  285. new_size = (int)cache->cache_content->size;
  286. log_info(LD_DIR, "Done rebuilding microdesc cache. "
  287. "Saved %d bytes; %d still used.",
  288. orig_size-new_size, new_size);
  289. return 0;
  290. }
  291. /** Deallocate a single microdescriptor. Note: the microdescriptor MUST have
  292. * previously been removed from the cache if it had ever been inserted. */
  293. void
  294. microdesc_free(microdesc_t *md)
  295. {
  296. if (!md)
  297. return;
  298. /* Must be removed from hash table! */
  299. if (md->onion_pkey)
  300. crypto_free_pk_env(md->onion_pkey);
  301. if (md->body && md->saved_location != SAVED_IN_CACHE)
  302. tor_free(md->body);
  303. if (md->family) {
  304. SMARTLIST_FOREACH(md->family, char *, cp, tor_free(cp));
  305. smartlist_free(md->family);
  306. }
  307. tor_free(md->exitsummary);
  308. tor_free(md);
  309. }
  310. /** Free all storage held in the microdesc.c module. */
  311. void
  312. microdesc_free_all(void)
  313. {
  314. if (the_microdesc_cache) {
  315. microdesc_cache_clear(the_microdesc_cache);
  316. tor_free(the_microdesc_cache->cache_fname);
  317. tor_free(the_microdesc_cache->journal_fname);
  318. tor_free(the_microdesc_cache);
  319. }
  320. }
  321. /** If there is a microdescriptor in <b>cache</b> whose sha256 digest is
  322. * <b>d</b>, return it. Otherwise return NULL. */
  323. microdesc_t *
  324. microdesc_cache_lookup_by_digest256(microdesc_cache_t *cache, const char *d)
  325. {
  326. microdesc_t *md, search;
  327. if (!cache)
  328. cache = get_microdesc_cache();
  329. memcpy(search.digest, d, DIGEST256_LEN);
  330. md = HT_FIND(microdesc_map, &cache->map, &search);
  331. return md;
  332. }
  333. /** Return the mean size of decriptors added to <b>cache</b> since it was last
  334. * cleared. Used to estimate the size of large downloads. */
  335. size_t
  336. microdesc_average_size(microdesc_cache_t *cache)
  337. {
  338. if (!cache)
  339. cache = get_microdesc_cache();
  340. if (!cache->n_seen)
  341. return 512;
  342. return (size_t)(cache->total_len_seen / cache->n_seen);
  343. }