storagedir.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595
  1. /* Copyright (c) 2017-2018, The Tor Project, Inc. */
  2. /* See LICENSE for licensing information */
  3. #include "lib/fs/storagedir.h"
  4. #include "lib/container/smartlist.h"
  5. #include "lib/encoding/confline.h"
  6. #include "lib/fs/dir.h"
  7. #include "lib/fs/files.h"
  8. #include "lib/fs/mmap.h"
  9. #include "lib/log/escape.h"
  10. #include "lib/log/torlog.h"
  11. #include "lib/log/util_bug.h"
  12. #include "lib/malloc/util_malloc.h"
  13. #include "lib/memarea/memarea.h"
  14. #include "lib/sandbox/sandbox.h"
  15. #include "lib/string/printf.h"
  16. #include "lib/string/util_string.h"
  17. #ifdef HAVE_SYS_TYPES_H
  18. #include <sys/types.h>
  19. #endif
  20. #ifdef HAVE_SYS_STAT_H
  21. #include <sys/stat.h>
  22. #endif
  23. #ifdef HAVE_UNISTD_H
  24. #include <unistd.h>
  25. #endif
  26. #include <stdlib.h>
  27. #include <errno.h>
  28. #include <string.h>
  29. #define FNAME_MIN_NUM 1000
  30. /** A storage_dir_t represents a directory full of similar cached
  31. * files. Filenames are decimal integers. Files can be cleaned as needed
  32. * to limit total disk usage. */
  33. struct storage_dir_t {
  34. /** Directory holding the files for this storagedir. */
  35. char *directory;
  36. /** Either NULL, or a directory listing of the directory (as a smartlist
  37. * of strings */
  38. smartlist_t *contents;
  39. /** The largest number of non-temporary files we'll place in the
  40. * directory. */
  41. int max_files;
  42. /** If true, then 'usage' has been computed. */
  43. int usage_known;
  44. /** The total number of bytes used in this directory */
  45. uint64_t usage;
  46. };
  47. /** Create or open a new storage directory at <b>dirname</b>, with
  48. * capacity for up to <b>max_files</b> files.
  49. */
  50. storage_dir_t *
  51. storage_dir_new(const char *dirname, int max_files)
  52. {
  53. if (check_private_dir(dirname, CPD_CREATE, NULL) < 0)
  54. return NULL;
  55. storage_dir_t *d = tor_malloc_zero(sizeof(storage_dir_t));
  56. d->directory = tor_strdup(dirname);
  57. d->max_files = max_files;
  58. return d;
  59. }
  60. /**
  61. * Drop all in-RAM storage for <b>d</b>. Does not delete any files.
  62. */
  63. void
  64. storage_dir_free_(storage_dir_t *d)
  65. {
  66. if (d == NULL)
  67. return;
  68. tor_free(d->directory);
  69. if (d->contents) {
  70. SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp));
  71. smartlist_free(d->contents);
  72. }
  73. tor_free(d);
  74. }
  75. /**
  76. * Tell the sandbox (if any) configured by <b>cfg</b> to allow the
  77. * operations that <b>d</b> will need.
  78. *
  79. * The presence of this function is why we need an upper limit on the
  80. * number of files in a storage_dir_t: we need to approve file operations
  81. * one by one.
  82. */
  83. int
  84. storage_dir_register_with_sandbox(storage_dir_t *d, sandbox_cfg_t **cfg)
  85. {
  86. int problems = 0;
  87. int idx;
  88. for (idx = FNAME_MIN_NUM; idx < FNAME_MIN_NUM + d->max_files; ++idx) {
  89. char *path = NULL, *tmppath = NULL;
  90. tor_asprintf(&path, "%s/%d", d->directory, idx);
  91. tor_asprintf(&tmppath, "%s/%d.tmp", d->directory, idx);
  92. problems += sandbox_cfg_allow_open_filename(cfg, tor_strdup(path));
  93. problems += sandbox_cfg_allow_open_filename(cfg, tor_strdup(tmppath));
  94. problems += sandbox_cfg_allow_stat_filename(cfg, tor_strdup(path));
  95. problems += sandbox_cfg_allow_stat_filename(cfg, tor_strdup(tmppath));
  96. problems += sandbox_cfg_allow_rename(cfg,
  97. tor_strdup(tmppath), tor_strdup(path));
  98. tor_free(path);
  99. tor_free(tmppath);
  100. }
  101. return problems ? -1 : 0;
  102. }
  103. /**
  104. * Remove all files in <b>d</b> whose names end with ".tmp".
  105. *
  106. * Requires that the contents field of <b>d</b> is set.
  107. */
  108. static void
  109. storage_dir_clean_tmpfiles(storage_dir_t *d)
  110. {
  111. if (!d->contents)
  112. return;
  113. SMARTLIST_FOREACH_BEGIN(d->contents, char *, fname) {
  114. if (strcmpend(fname, ".tmp"))
  115. continue;
  116. char *path = NULL;
  117. tor_asprintf(&path, "%s/%s", d->directory, fname);
  118. if (unlink(sandbox_intern_string(path))) {
  119. log_warn(LD_FS, "Unable to unlink %s while cleaning "
  120. "temporary files: %s", escaped(path), strerror(errno));
  121. tor_free(path);
  122. continue;
  123. }
  124. tor_free(path);
  125. SMARTLIST_DEL_CURRENT(d->contents, fname);
  126. tor_free(fname);
  127. } SMARTLIST_FOREACH_END(fname);
  128. d->usage_known = 0;
  129. }
  130. /**
  131. * Re-scan the directory <b>d</b> to learn its contents.
  132. */
  133. static int
  134. storage_dir_rescan(storage_dir_t *d)
  135. {
  136. if (d->contents) {
  137. SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp));
  138. smartlist_free(d->contents);
  139. }
  140. d->usage = 0;
  141. d->usage_known = 0;
  142. if (NULL == (d->contents = tor_listdir(d->directory))) {
  143. return -1;
  144. }
  145. storage_dir_clean_tmpfiles(d);
  146. return 0;
  147. }
  148. /**
  149. * Return a smartlist containing the filenames within <b>d</b>.
  150. */
  151. const smartlist_t *
  152. storage_dir_list(storage_dir_t *d)
  153. {
  154. if (! d->contents)
  155. storage_dir_rescan(d);
  156. return d->contents;
  157. }
  158. /**
  159. * Return the total number of bytes used for storage in <b>d</b>.
  160. */
  161. uint64_t
  162. storage_dir_get_usage(storage_dir_t *d)
  163. {
  164. if (d->usage_known)
  165. return d->usage;
  166. uint64_t total = 0;
  167. SMARTLIST_FOREACH_BEGIN(storage_dir_list(d), const char *, cp) {
  168. char *path = NULL;
  169. struct stat st;
  170. tor_asprintf(&path, "%s/%s", d->directory, cp);
  171. if (stat(sandbox_intern_string(path), &st) == 0) {
  172. total += st.st_size;
  173. }
  174. tor_free(path);
  175. } SMARTLIST_FOREACH_END(cp);
  176. d->usage = total;
  177. d->usage_known = 1;
  178. return d->usage;
  179. }
  180. /** Mmap a specified file within <b>d</b>.
  181. *
  182. * On failure, return NULL and set errno as for tor_mmap_file(). */
  183. tor_mmap_t *
  184. storage_dir_map(storage_dir_t *d, const char *fname)
  185. {
  186. char *path = NULL;
  187. tor_asprintf(&path, "%s/%s", d->directory, fname);
  188. tor_mmap_t *result = tor_mmap_file(path);
  189. int errval = errno;
  190. tor_free(path);
  191. if (result == NULL)
  192. errno = errval;
  193. return result;
  194. }
  195. /** Read a file within <b>d</b> into a newly allocated buffer. Set
  196. * *<b>sz_out</b> to its size. */
  197. uint8_t *
  198. storage_dir_read(storage_dir_t *d, const char *fname, int bin, size_t *sz_out)
  199. {
  200. const int flags = bin ? RFTS_BIN : 0;
  201. char *path = NULL;
  202. tor_asprintf(&path, "%s/%s", d->directory, fname);
  203. struct stat st;
  204. char *contents = read_file_to_str(path, flags, &st);
  205. if (contents && sz_out) {
  206. // it fits in RAM, so we know its size is less than SIZE_MAX
  207. #if UINT64_MAX > SIZE_MAX
  208. tor_assert((uint64_t)st.st_size <= SIZE_MAX);
  209. #endif
  210. *sz_out = (size_t) st.st_size;
  211. }
  212. tor_free(path);
  213. return (uint8_t *) contents;
  214. }
  215. /** Helper: Find an unused filename within the directory */
  216. static char *
  217. find_unused_fname(storage_dir_t *d)
  218. {
  219. if (!d->contents) {
  220. if (storage_dir_rescan(d) < 0)
  221. return NULL;
  222. }
  223. char buf[16];
  224. int i;
  225. /* Yuck; this is quadratic. Fortunately, that shouldn't matter much,
  226. * since disk writes are more expensive by a lot. */
  227. for (i = FNAME_MIN_NUM; i < FNAME_MIN_NUM + d->max_files; ++i) {
  228. tor_snprintf(buf, sizeof(buf), "%d", i);
  229. if (!smartlist_contains_string(d->contents, buf)) {
  230. return tor_strdup(buf);
  231. }
  232. }
  233. return NULL;
  234. }
  235. /** Helper: As storage_dir_save_bytes_to_file, but store a smartlist of
  236. * sized_chunk_t rather than a single byte array. */
  237. static int
  238. storage_dir_save_chunks_to_file(storage_dir_t *d,
  239. const smartlist_t *chunks,
  240. int binary,
  241. char **fname_out)
  242. {
  243. uint64_t total_length = 0;
  244. char *fname = find_unused_fname(d);
  245. if (!fname)
  246. return -1;
  247. SMARTLIST_FOREACH(chunks, const sized_chunk_t *, ch,
  248. total_length += ch->len);
  249. char *path = NULL;
  250. tor_asprintf(&path, "%s/%s", d->directory, fname);
  251. int r = write_chunks_to_file(path, chunks, binary, 0);
  252. if (r == 0) {
  253. if (d->usage_known)
  254. d->usage += total_length;
  255. if (fname_out) {
  256. *fname_out = tor_strdup(fname);
  257. }
  258. if (d->contents)
  259. smartlist_add(d->contents, tor_strdup(fname));
  260. }
  261. tor_free(fname);
  262. tor_free(path);
  263. return r;
  264. }
  265. /** Try to write the <b>length</b> bytes at <b>data</b> into a new file
  266. * in <b>d</b>. On success, return 0 and set *<b>fname_out</b> to a
  267. * newly allocated string containing the filename. On failure, return
  268. * -1. */
  269. int
  270. storage_dir_save_bytes_to_file(storage_dir_t *d,
  271. const uint8_t *data,
  272. size_t length,
  273. int binary,
  274. char **fname_out)
  275. {
  276. smartlist_t *chunks = smartlist_new();
  277. sized_chunk_t chunk = { (const char *)data, length };
  278. smartlist_add(chunks, &chunk);
  279. int r = storage_dir_save_chunks_to_file(d, chunks, binary, fname_out);
  280. smartlist_free(chunks);
  281. return r;
  282. }
  283. /**
  284. * As storage_dir_save_bytes_to_file, but saves a NUL-terminated string
  285. * <b>str</b>.
  286. */
  287. int
  288. storage_dir_save_string_to_file(storage_dir_t *d,
  289. const char *str,
  290. int binary,
  291. char **fname_out)
  292. {
  293. return storage_dir_save_bytes_to_file(d,
  294. (const uint8_t*)str, strlen(str), binary, fname_out);
  295. }
  296. /**
  297. * As storage_dir_save_bytes_to_file, but associates the data with the
  298. * key-value pairs in <b>labels</b>. Files stored in this format can be
  299. * recovered with storage_dir_map_labeled() or storage_dir_read_labeled().
  300. */
  301. int
  302. storage_dir_save_labeled_to_file(storage_dir_t *d,
  303. const config_line_t *labels,
  304. const uint8_t *data,
  305. size_t length,
  306. char **fname_out)
  307. {
  308. /*
  309. * The storage format is to prefix the data with the key-value pairs in
  310. * <b>labels</b>, and a single NUL separator. But code outside this module
  311. * MUST NOT rely on that format.
  312. */
  313. smartlist_t *chunks = smartlist_new();
  314. memarea_t *area = memarea_new();
  315. const config_line_t *line;
  316. for (line = labels; line; line = line->next) {
  317. sized_chunk_t *sz = memarea_alloc(area, sizeof(sized_chunk_t));
  318. sz->len = strlen(line->key) + 1 + strlen(line->value) + 1;
  319. const size_t allocated = sz->len + 1;
  320. char *bytes = memarea_alloc(area, allocated);
  321. tor_snprintf(bytes, allocated, "%s %s\n", line->key, line->value);
  322. sz->bytes = bytes;
  323. smartlist_add(chunks, sz);
  324. }
  325. sized_chunk_t *nul = memarea_alloc(area, sizeof(sized_chunk_t));
  326. nul->len = 1;
  327. nul->bytes = "\0";
  328. smartlist_add(chunks, nul);
  329. sized_chunk_t *datachunk = memarea_alloc(area, sizeof(sized_chunk_t));
  330. datachunk->bytes = (const char *)data;
  331. datachunk->len = length;
  332. smartlist_add(chunks, datachunk);
  333. int r = storage_dir_save_chunks_to_file(d, chunks, 1, fname_out);
  334. smartlist_free(chunks);
  335. memarea_drop_all(area);
  336. return r;
  337. }
  338. /**
  339. * Map a file that was created with storage_dir_save_labeled_to_file(). On
  340. * failure, return NULL. On success, write a set of newly allocated labels
  341. * into *<b>labels_out</b>, a pointer to the data into *<b>data_out</b>, and
  342. * the data's size into *<b>sz_out</b>. On success, also return a tor_mmap_t
  343. * object whose contents should not be used -- it needs to be kept around,
  344. * though, for as long as <b>data_out</b> is going to be valid.
  345. *
  346. * On failure, set errno as for tor_mmap_file() if the file was missing or
  347. * empty, and set errno to EINVAL if the file was not in the labeled
  348. * format expected.
  349. */
  350. tor_mmap_t *
  351. storage_dir_map_labeled(storage_dir_t *dir,
  352. const char *fname,
  353. config_line_t **labels_out,
  354. const uint8_t **data_out,
  355. size_t *sz_out)
  356. {
  357. tor_mmap_t *m = storage_dir_map(dir, fname);
  358. int errval;
  359. if (! m) {
  360. errval = errno;
  361. goto err;
  362. }
  363. const char *nulp = memchr(m->data, '\0', m->size);
  364. if (! nulp) {
  365. errval = EINVAL;
  366. goto err;
  367. }
  368. if (labels_out && config_get_lines(m->data, labels_out, 0) < 0) {
  369. errval = EINVAL;
  370. goto err;
  371. }
  372. size_t offset = nulp - m->data + 1;
  373. tor_assert(offset <= m->size);
  374. *data_out = (const uint8_t *)(m->data + offset);
  375. *sz_out = m->size - offset;
  376. return m;
  377. err:
  378. tor_munmap_file(m);
  379. errno = errval;
  380. return NULL;
  381. }
  382. /** As storage_dir_map_labeled, but return a new byte array containing the
  383. * data. */
  384. uint8_t *
  385. storage_dir_read_labeled(storage_dir_t *dir,
  386. const char *fname,
  387. config_line_t **labels_out,
  388. size_t *sz_out)
  389. {
  390. const uint8_t *data = NULL;
  391. tor_mmap_t *m = storage_dir_map_labeled(dir, fname, labels_out,
  392. &data, sz_out);
  393. if (m == NULL)
  394. return NULL;
  395. uint8_t *result = tor_memdup(data, *sz_out);
  396. tor_munmap_file(m);
  397. return result;
  398. }
  399. /* Reduce the cached usage amount in <b>d</b> by <b>removed_file_size</b>.
  400. * This function is a no-op if <b>d->usage_known</b> is 0. */
  401. static void
  402. storage_dir_reduce_usage(storage_dir_t *d, uint64_t removed_file_size)
  403. {
  404. if (d->usage_known) {
  405. if (! BUG(d->usage < removed_file_size)) {
  406. /* This bug can also be triggered if an external process resized a file
  407. * between the call to storage_dir_get_usage() that last checked
  408. * actual usage (rather than relaying on cached usage), and the call to
  409. * this function. */
  410. d->usage -= removed_file_size;
  411. } else {
  412. /* If we underflowed the cached directory size, re-check the sizes of all
  413. * the files in the directory. This makes storage_dir_shrink() quadratic,
  414. * but only if a process is continually changing file sizes in the
  415. * storage directory (in which case, we have bigger issues).
  416. *
  417. * We can't just reset usage_known, because storage_dir_shrink() relies
  418. * on knowing the usage. */
  419. storage_dir_rescan(d);
  420. (void)storage_dir_get_usage(d);
  421. }
  422. }
  423. }
  424. /**
  425. * Remove the file called <b>fname</b> from <b>d</b>.
  426. */
  427. void
  428. storage_dir_remove_file(storage_dir_t *d,
  429. const char *fname)
  430. {
  431. char *path = NULL;
  432. tor_asprintf(&path, "%s/%s", d->directory, fname);
  433. const char *ipath = sandbox_intern_string(path);
  434. uint64_t size = 0;
  435. if (d->usage_known) {
  436. struct stat st;
  437. if (stat(ipath, &st) == 0) {
  438. size = st.st_size;
  439. }
  440. }
  441. if (unlink(ipath) == 0) {
  442. storage_dir_reduce_usage(d, size);
  443. } else {
  444. log_warn(LD_FS, "Unable to unlink %s while removing file: %s",
  445. escaped(path), strerror(errno));
  446. tor_free(path);
  447. return;
  448. }
  449. if (d->contents) {
  450. smartlist_string_remove(d->contents, fname);
  451. }
  452. tor_free(path);
  453. }
  454. /** Helper type: used to sort the members of storage directory by mtime. */
  455. typedef struct shrinking_dir_entry_t {
  456. time_t mtime;
  457. uint64_t size;
  458. char *path;
  459. } shrinking_dir_entry_t;
  460. /** Helper: use with qsort to sort shrinking_dir_entry_t structs. */
  461. static int
  462. shrinking_dir_entry_compare(const void *a_, const void *b_)
  463. {
  464. const shrinking_dir_entry_t *a = a_;
  465. const shrinking_dir_entry_t *b = b_;
  466. if (a->mtime < b->mtime)
  467. return -1;
  468. else if (a->mtime > b->mtime)
  469. return 1;
  470. else
  471. return 0;
  472. }
  473. /**
  474. * Try to free space by removing the oldest files in <b>d</b>. Delete
  475. * until no more than <b>target_size</b> bytes are left, and at least
  476. * <b>min_to_remove</b> files have been removed... or until there is
  477. * nothing left to remove.
  478. *
  479. * Return 0 on success; -1 on failure.
  480. */
  481. int
  482. storage_dir_shrink(storage_dir_t *d,
  483. uint64_t target_size,
  484. int min_to_remove)
  485. {
  486. if (d->usage_known && d->usage <= target_size && !min_to_remove) {
  487. /* Already small enough. */
  488. return 0;
  489. }
  490. if (storage_dir_rescan(d) < 0)
  491. return -1;
  492. const uint64_t orig_usage = storage_dir_get_usage(d);
  493. if (orig_usage <= target_size && !min_to_remove) {
  494. /* Okay, small enough after rescan! */
  495. return 0;
  496. }
  497. const int n = smartlist_len(d->contents);
  498. shrinking_dir_entry_t *ents = tor_calloc(n, sizeof(shrinking_dir_entry_t));
  499. SMARTLIST_FOREACH_BEGIN(d->contents, const char *, fname) {
  500. shrinking_dir_entry_t *ent = &ents[fname_sl_idx];
  501. struct stat st;
  502. tor_asprintf(&ent->path, "%s/%s", d->directory, fname);
  503. if (stat(sandbox_intern_string(ent->path), &st) == 0) {
  504. ent->mtime = st.st_mtime;
  505. ent->size = st.st_size;
  506. }
  507. } SMARTLIST_FOREACH_END(fname);
  508. qsort(ents, n, sizeof(shrinking_dir_entry_t), shrinking_dir_entry_compare);
  509. int idx = 0;
  510. while ((d->usage > target_size || min_to_remove > 0) && idx < n) {
  511. if (unlink(sandbox_intern_string(ents[idx].path)) == 0) {
  512. storage_dir_reduce_usage(d, ents[idx].size);
  513. --min_to_remove;
  514. }
  515. ++idx;
  516. }
  517. for (idx = 0; idx < n; ++idx) {
  518. tor_free(ents[idx].path);
  519. }
  520. tor_free(ents);
  521. storage_dir_rescan(d);
  522. return 0;
  523. }
  524. /** Remove all files in <b>d</b>. */
  525. int
  526. storage_dir_remove_all(storage_dir_t *d)
  527. {
  528. return storage_dir_shrink(d, 0, d->max_files);
  529. }
  530. /**
  531. * Return the largest number of non-temporary files we're willing to
  532. * store in <b>d</b>.
  533. */
  534. int
  535. storage_dir_get_max_files(storage_dir_t *d)
  536. {
  537. return d->max_files;
  538. }