storagedir.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. /* Copyright (c) 2017, The Tor Project, Inc. */
  2. /* See LICENSE for licensing information */
  3. #include "container.h"
  4. #include "compat.h"
  5. #include "confline.h"
  6. #include "memarea.h"
  7. #include "sandbox.h"
  8. #include "storagedir.h"
  9. #include "torlog.h"
  10. #include "util.h"
  11. #ifdef HAVE_SYS_TYPES_H
  12. #include <sys/types.h>
  13. #endif
  14. #ifdef HAVE_SYS_STAT_H
  15. #include <sys/stat.h>
  16. #endif
  17. #ifdef HAVE_UNISTD_H
  18. #include <unistd.h>
  19. #endif
  20. #define FNAME_MIN_NUM 1000
  21. /** A storage_dir_t represents a directory full of similar cached
  22. * files. Filenames are decimal integers. Files can be cleaned as needed
  23. * to limit total disk usage. */
  24. struct storage_dir_t {
  25. /** Directory holding the files for this storagedir. */
  26. char *directory;
  27. /** Either NULL, or a directory listing of the directory (as a smartlist
  28. * of strings */
  29. smartlist_t *contents;
  30. /** The largest number of non-temporary files we'll place in the
  31. * directory. */
  32. int max_files;
  33. /** If true, then 'usage' has been computed. */
  34. int usage_known;
  35. /** The total number of bytes used in this directory */
  36. uint64_t usage;
  37. };
  38. /** Create or open a new storage directory at <b>dirname</b>, with
  39. * capacity for up to <b>max_files</b> files.
  40. */
  41. storage_dir_t *
  42. storage_dir_new(const char *dirname, int max_files)
  43. {
  44. if (check_private_dir(dirname, CPD_CREATE, NULL) < 0)
  45. return NULL;
  46. storage_dir_t *d = tor_malloc_zero(sizeof(storage_dir_t));
  47. d->directory = tor_strdup(dirname);
  48. d->max_files = max_files;
  49. return d;
  50. }
  51. /**
  52. * Drop all in-RAM storage for <b>d</b>. Does not delete any files.
  53. */
  54. void
  55. storage_dir_free(storage_dir_t *d)
  56. {
  57. if (d == NULL)
  58. return;
  59. tor_free(d->directory);
  60. if (d->contents) {
  61. SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp));
  62. smartlist_free(d->contents);
  63. }
  64. tor_free(d);
  65. }
  66. /**
  67. * Tell the sandbox (if any) configured by <b>cfg</b> to allow the
  68. * operations that <b>d</b> will need.
  69. *
  70. * The presence of this function is why we need an upper limit on the
  71. * number of filers in a storage_dir_t: we need to approve file
  72. * operaitons one by one.
  73. */
  74. int
  75. storage_dir_register_with_sandbox(storage_dir_t *d, sandbox_cfg_t **cfg)
  76. {
  77. int problems = 0;
  78. int idx;
  79. for (idx = FNAME_MIN_NUM; idx < FNAME_MIN_NUM + d->max_files; ++idx) {
  80. char *path = NULL, *tmppath = NULL;
  81. tor_asprintf(&path, "%s/%d", d->directory, idx);
  82. tor_asprintf(&tmppath, "%s/%d.tmp", d->directory, idx);
  83. problems += sandbox_cfg_allow_open_filename(cfg, tor_strdup(path));
  84. problems += sandbox_cfg_allow_open_filename(cfg, tor_strdup(tmppath));
  85. problems += sandbox_cfg_allow_stat_filename(cfg, tor_strdup(path));
  86. problems += sandbox_cfg_allow_stat_filename(cfg, tor_strdup(tmppath));
  87. problems += sandbox_cfg_allow_rename(cfg,
  88. tor_strdup(tmppath), tor_strdup(path));
  89. tor_free(path);
  90. tor_free(tmppath);
  91. }
  92. return problems ? -1 : 0;
  93. }
  94. /**
  95. * Remove all files in <b>d</b> whose names end with ".tmp".
  96. *
  97. * Requires that the contents field of <b>d</b> is set.
  98. */
  99. static void
  100. storage_dir_clean_tmpfiles(storage_dir_t *d)
  101. {
  102. if (!d->contents)
  103. return;
  104. SMARTLIST_FOREACH_BEGIN(d->contents, char *, fname) {
  105. if (strcmpend(fname, ".tmp"))
  106. continue;
  107. char *path = NULL;
  108. tor_asprintf(&path, "%s/%s", d->directory, fname);
  109. if (unlink(sandbox_intern_string(path))) {
  110. log_warn(LD_FS, "Unable to unlink %s", escaped(path));
  111. tor_free(path);
  112. continue;
  113. }
  114. tor_free(path);
  115. SMARTLIST_DEL_CURRENT(d->contents, fname);
  116. tor_free(fname);
  117. } SMARTLIST_FOREACH_END(fname);
  118. d->usage_known = 0;
  119. }
  120. /**
  121. * Re-scan the directory <b>d</b> to learn its contents.
  122. */
  123. static int
  124. storage_dir_rescan(storage_dir_t *d)
  125. {
  126. if (d->contents) {
  127. SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp));
  128. smartlist_free(d->contents);
  129. }
  130. d->usage = 0;
  131. d->usage_known = 0;
  132. if (NULL == (d->contents = tor_listdir(d->directory))) {
  133. return -1;
  134. }
  135. storage_dir_clean_tmpfiles(d);
  136. return 0;
  137. }
  138. /**
  139. * Return a smartlist containing the filenames within <b>d</b>.
  140. */
  141. const smartlist_t *
  142. storage_dir_list(storage_dir_t *d)
  143. {
  144. if (! d->contents)
  145. storage_dir_rescan(d);
  146. return d->contents;
  147. }
  148. /**
  149. * Return the total number of bytes used for storage in <b>d</b>.
  150. */
  151. uint64_t
  152. storage_dir_get_usage(storage_dir_t *d)
  153. {
  154. if (d->usage_known)
  155. return d->usage;
  156. uint64_t total = 0;
  157. SMARTLIST_FOREACH_BEGIN(storage_dir_list(d), const char *, cp) {
  158. char *path = NULL;
  159. struct stat st;
  160. tor_asprintf(&path, "%s/%s", d->directory, cp);
  161. if (stat(sandbox_intern_string(path), &st) == 0) {
  162. total += st.st_size;
  163. }
  164. tor_free(path);
  165. } SMARTLIST_FOREACH_END(cp);
  166. d->usage = total;
  167. d->usage_known = 1;
  168. return d->usage;
  169. }
  170. /** Mmap a specified file within <b>d</b>. */
  171. tor_mmap_t *
  172. storage_dir_map(storage_dir_t *d, const char *fname)
  173. {
  174. char *path = NULL;
  175. tor_asprintf(&path, "%s/%s", d->directory, fname);
  176. tor_mmap_t *result = tor_mmap_file(path);
  177. tor_free(path);
  178. return result;
  179. }
  180. /** Read a file within <b>d</b> into a newly allocated buffer. Set
  181. * *<b>sz_out</b> to its size. */
  182. uint8_t *
  183. storage_dir_read(storage_dir_t *d, const char *fname, int bin, size_t *sz_out)
  184. {
  185. const int flags = bin ? RFTS_BIN : 0;
  186. char *path = NULL;
  187. tor_asprintf(&path, "%s/%s", d->directory, fname);
  188. struct stat st;
  189. char *contents = read_file_to_str(path, flags, &st);
  190. if (contents && sz_out) {
  191. // it fits in RAM, so we know its size is less than SIZE_MAX
  192. tor_assert((uint64_t)st.st_size <= SIZE_MAX);
  193. *sz_out = (size_t) st.st_size;
  194. }
  195. tor_free(path);
  196. return (uint8_t *) contents;
  197. }
  198. /** Helper: Find an unused filename within the directory */
  199. static char *
  200. find_unused_fname(storage_dir_t *d)
  201. {
  202. if (!d->contents) {
  203. if (storage_dir_rescan(d) < 0)
  204. return NULL;
  205. }
  206. char buf[16];
  207. int i;
  208. /* Yuck; this is quadratic. Fortunately, that shouldn't matter much,
  209. * since disk writes are more expensive by a lot. */
  210. for (i = FNAME_MIN_NUM; i < FNAME_MIN_NUM + d->max_files; ++i) {
  211. tor_snprintf(buf, sizeof(buf), "%d", i);
  212. if (!smartlist_contains_string(d->contents, buf)) {
  213. return tor_strdup(buf);
  214. }
  215. }
  216. return NULL;
  217. }
  218. /** Helper: As storage_dir_save_bytes_to_file, but store a smartlist of
  219. * sized_chunk_t rather than a single byte array. */
  220. static int
  221. storage_dir_save_chunks_to_file(storage_dir_t *d,
  222. const smartlist_t *chunks,
  223. int binary,
  224. char **fname_out)
  225. {
  226. uint64_t total_length = 0;
  227. char *fname = find_unused_fname(d);
  228. if (!fname)
  229. return -1;
  230. SMARTLIST_FOREACH(chunks, const sized_chunk_t *, ch,
  231. total_length += ch->len);
  232. char *path = NULL;
  233. tor_asprintf(&path, "%s/%s", d->directory, fname);
  234. int r = write_chunks_to_file(path, chunks, binary, 0);
  235. if (r == 0) {
  236. if (d->usage_known)
  237. d->usage += total_length;
  238. if (fname_out) {
  239. *fname_out = tor_strdup(fname);
  240. }
  241. if (d->contents)
  242. smartlist_add(d->contents, tor_strdup(fname));
  243. }
  244. tor_free(fname);
  245. tor_free(path);
  246. return r;
  247. }
  248. /** Try to write the <b>length</b> bytes at <b>data</b> into a new file
  249. * in <b>d</b>. On success, return 0 and set *<b>fname_out</b> to a
  250. * newly allocated string containing the filename. On failure, return
  251. * -1. */
  252. int
  253. storage_dir_save_bytes_to_file(storage_dir_t *d,
  254. const uint8_t *data,
  255. size_t length,
  256. int binary,
  257. char **fname_out)
  258. {
  259. smartlist_t *chunks = smartlist_new();
  260. sized_chunk_t chunk = { (const char *)data, length };
  261. smartlist_add(chunks, &chunk);
  262. int r = storage_dir_save_chunks_to_file(d, chunks, binary, fname_out);
  263. smartlist_free(chunks);
  264. return r;
  265. }
  266. /**
  267. * As storage_dir_save_bytes_to_file, but saves a NUL-terminated string
  268. * <b>str</b>.
  269. */
  270. int
  271. storage_dir_save_string_to_file(storage_dir_t *d,
  272. const char *str,
  273. int binary,
  274. char **fname_out)
  275. {
  276. return storage_dir_save_bytes_to_file(d,
  277. (const uint8_t*)str, strlen(str), binary, fname_out);
  278. }
  279. /**
  280. * As storage_dir_save_bytes_to_file, but associates the data with the
  281. * key-value pairs in <b>labels</b>. Files
  282. * stored in this format can be recovered with storage_dir_map_labeled
  283. * or storage_dir_read_labeled().
  284. */
  285. int
  286. storage_dir_save_labeled_to_file(storage_dir_t *d,
  287. const config_line_t *labels,
  288. const uint8_t *data,
  289. size_t length,
  290. char **fname_out)
  291. {
  292. /*
  293. * The storage format is to prefix the data with the key-value pairs in
  294. * <b>labels</b>, and a single NUL separator. But code outside this module
  295. * MUST NOT rely on that format.
  296. */
  297. smartlist_t *chunks = smartlist_new();
  298. memarea_t *area = memarea_new();
  299. const config_line_t *line;
  300. for (line = labels; line; line = line->next) {
  301. sized_chunk_t *sz = memarea_alloc(area, sizeof(sized_chunk_t));
  302. sz->len = strlen(line->key) + 1 + strlen(line->value) + 1;
  303. const size_t allocated = sz->len + 1;
  304. char *bytes = memarea_alloc(area, allocated);
  305. tor_snprintf(bytes, allocated, "%s %s\n", line->key, line->value);
  306. sz->bytes = bytes;
  307. smartlist_add(chunks, sz);
  308. }
  309. sized_chunk_t *nul = memarea_alloc(area, sizeof(sized_chunk_t));
  310. nul->len = 1;
  311. nul->bytes = "\0";
  312. smartlist_add(chunks, nul);
  313. sized_chunk_t *datachunk = memarea_alloc(area, sizeof(sized_chunk_t));
  314. datachunk->bytes = (const char *)data;
  315. datachunk->len = length;
  316. smartlist_add(chunks, datachunk);
  317. int r = storage_dir_save_chunks_to_file(d, chunks, 1, fname_out);
  318. smartlist_free(chunks);
  319. memarea_drop_all(area);
  320. return r;
  321. }
  322. /**
  323. * Map a file that was created with storage_dir_save_labeled(). On failure,
  324. * return NULL. On success, write a set of newly allocated labels into to
  325. * *<b>labels_out</b>, a pointer to the into *<b>data_out</b>, and the data's
  326. * into *<b>sz_out</b>. On success, also return a tor_mmap_t object whose
  327. * contents should not be used -- it needs to be kept around, though, for as
  328. * long as <b>data_out</b> is going to be valid.
  329. */
  330. tor_mmap_t *
  331. storage_dir_map_labeled(storage_dir_t *dir,
  332. const char *fname,
  333. config_line_t **labels_out,
  334. const uint8_t **data_out,
  335. size_t *sz_out)
  336. {
  337. tor_mmap_t *m = storage_dir_map(dir, fname);
  338. if (! m)
  339. goto err;
  340. const char *nulp = memchr(m->data, '\0', m->size);
  341. if (! nulp)
  342. goto err;
  343. if (labels_out && config_get_lines(m->data, labels_out, 0) < 0)
  344. goto err;
  345. size_t offset = nulp - m->data + 1;
  346. tor_assert(offset <= m->size);
  347. *data_out = (const uint8_t *)(m->data + offset);
  348. *sz_out = m->size - offset;
  349. return m;
  350. err:
  351. tor_munmap_file(m);
  352. return NULL;
  353. }
  354. /** As storage_dir_map_labeled, but return a new byte array containing the
  355. * data. */
  356. uint8_t *
  357. storage_dir_read_labeled(storage_dir_t *dir,
  358. const char *fname,
  359. config_line_t **labels_out,
  360. size_t *sz_out)
  361. {
  362. const uint8_t *data = NULL;
  363. tor_mmap_t *m = storage_dir_map_labeled(dir, fname, labels_out,
  364. &data, sz_out);
  365. if (m == NULL)
  366. return NULL;
  367. uint8_t *result = tor_memdup(data, *sz_out);
  368. tor_munmap_file(m);
  369. return result;
  370. }
  371. /**
  372. * Remove the file called <b>fname</b> from <b>d</b>.
  373. */
  374. void
  375. storage_dir_remove_file(storage_dir_t *d,
  376. const char *fname)
  377. {
  378. char *path = NULL;
  379. tor_asprintf(&path, "%s/%s", d->directory, fname);
  380. const char *ipath = sandbox_intern_string(path);
  381. uint64_t size = 0;
  382. if (d->usage_known) {
  383. struct stat st;
  384. if (stat(ipath, &st) == 0) {
  385. size = st.st_size;
  386. }
  387. }
  388. if (unlink(ipath) == 0) {
  389. d->usage -= size;
  390. } else {
  391. log_warn(LD_FS, "Unable to unlink %s", escaped(path));
  392. tor_free(path);
  393. return;
  394. }
  395. if (d->contents) {
  396. smartlist_string_remove(d->contents, fname);
  397. }
  398. tor_free(path);
  399. }
  400. /** Helper type: used to sort the members of storage directory by mtime. */
  401. typedef struct shrinking_dir_entry_t {
  402. time_t mtime;
  403. uint64_t size;
  404. char *path;
  405. } shrinking_dir_entry_t;
  406. /** Helper: use with qsort to sort shrinking_dir_entry_t structs. */
  407. static int
  408. shrinking_dir_entry_compare(const void *a_, const void *b_)
  409. {
  410. const shrinking_dir_entry_t *a = a_;
  411. const shrinking_dir_entry_t *b = b_;
  412. if (a->mtime < b->mtime)
  413. return -1;
  414. else if (a->mtime > b->mtime)
  415. return 1;
  416. else
  417. return 0;
  418. }
  419. /**
  420. * Try to free space by removing the oldest files in <b>d</b>. Delete
  421. * until no more than <b>target_size</b> bytes are left, and at least
  422. * <b>min_to_remove</b> files have been removed... or until there is
  423. * nothing left to remove.
  424. *
  425. * Return 0 on success; -1 on failure.
  426. */
  427. int
  428. storage_dir_shrink(storage_dir_t *d,
  429. uint64_t target_size,
  430. int min_to_remove)
  431. {
  432. if (d->usage_known && d->usage <= target_size && !min_to_remove) {
  433. /* Already small enough. */
  434. return 0;
  435. }
  436. if (storage_dir_rescan(d) < 0)
  437. return -1;
  438. const uint64_t orig_usage = storage_dir_get_usage(d);
  439. if (orig_usage <= target_size && !min_to_remove) {
  440. /* Okay, small enough after rescan! */
  441. return 0;
  442. }
  443. const int n = smartlist_len(d->contents);
  444. shrinking_dir_entry_t *ents = tor_calloc(n, sizeof(shrinking_dir_entry_t));
  445. SMARTLIST_FOREACH_BEGIN(d->contents, const char *, fname) {
  446. shrinking_dir_entry_t *ent = &ents[fname_sl_idx];
  447. struct stat st;
  448. tor_asprintf(&ent->path, "%s/%s", d->directory, fname);
  449. if (stat(sandbox_intern_string(ent->path), &st) == 0) {
  450. ent->mtime = st.st_mtime;
  451. ent->size = st.st_size;
  452. }
  453. } SMARTLIST_FOREACH_END(fname);
  454. qsort(ents, n, sizeof(shrinking_dir_entry_t), shrinking_dir_entry_compare);
  455. int idx = 0;
  456. while ((d->usage > target_size || min_to_remove > 0) && idx < n) {
  457. if (unlink(sandbox_intern_string(ents[idx].path)) == 0) {
  458. if (! BUG(d->usage < ents[idx].size)) {
  459. d->usage -= ents[idx].size;
  460. }
  461. --min_to_remove;
  462. }
  463. ++idx;
  464. }
  465. for (idx = 0; idx < n; ++idx) {
  466. tor_free(ents[idx].path);
  467. }
  468. tor_free(ents);
  469. storage_dir_rescan(d);
  470. return 0;
  471. }
  472. /** Remove all files in <b>d</b>. */
  473. int
  474. storage_dir_remove_all(storage_dir_t *d)
  475. {
  476. return storage_dir_shrink(d, 0, d->max_files);
  477. }
  478. /**
  479. * Return the largest number of non-temporary files we're willing to
  480. * store in <b>d</b>.
  481. */
  482. int
  483. storage_dir_get_max_files(storage_dir_t *d)
  484. {
  485. return d->max_files;
  486. }