storagedir.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. /* Copyright (c) 2017, The Tor Project, Inc. */
  2. /* See LICENSE for licensing information */
  3. #include "container.h"
  4. #include "compat.h"
  5. #include "sandbox.h"
  6. #include "storagedir.h"
  7. #include "torlog.h"
  8. #include "util.h"
  9. #ifdef HAVE_SYS_TYPES_H
  10. #include <sys/types.h>
  11. #endif
  12. #ifdef HAVE_SYS_STAT_H
  13. #include <sys/stat.h>
  14. #endif
  15. #ifdef HAVE_UNISTD_H
  16. #include <unistd.h>
  17. #endif
  18. #define FNAME_MIN_NUM 1000
  19. /** A storage_dir_t represents a directory full of similar cached
  20. * files. Filenames are decimal integers. Files can be cleaned as needed
  21. * to limit total disk usage. */
  22. struct storage_dir_t {
  23. /** Directory holding the files for this storagedir. */
  24. char *directory;
  25. /** Either NULL, or a directory listing of the directory (as a smartlist
  26. * of strings */
  27. smartlist_t *contents;
  28. /** The largest number of non-temporary files we'll place in the
  29. * directory. */
  30. int max_files;
  31. /** If true, then 'usage' has been computed. */
  32. int usage_known;
  33. /** The total number of bytes used in this directory */
  34. uint64_t usage;
  35. };
  36. /** Create or open a new storage directory at <b>dirname</b>, with
  37. * capacity for up to <b>max_files</b> files.
  38. */
  39. storage_dir_t *
  40. storage_dir_new(const char *dirname, int max_files)
  41. {
  42. if (check_private_dir(dirname, CPD_CREATE, NULL) < 0)
  43. return NULL;
  44. storage_dir_t *d = tor_malloc_zero(sizeof(storage_dir_t));
  45. d->directory = tor_strdup(dirname);
  46. d->max_files = max_files;
  47. return d;
  48. }
  49. /**
  50. * Drop all in-RAM storage for <b>d</b>. Does not delete any files.
  51. */
  52. void
  53. storage_dir_free(storage_dir_t *d)
  54. {
  55. if (d == NULL)
  56. return;
  57. tor_free(d->directory);
  58. if (d->contents) {
  59. SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp));
  60. smartlist_free(d->contents);
  61. }
  62. tor_free(d);
  63. }
  64. /**
  65. * Tell the sandbox (if any) configured by <b>cfg</b> to allow the
  66. * operations that <b>d</b> will need.
  67. *
  68. * The presence of this function is why we need an upper limit on the
  69. * number of filers in a storage_dir_t: we need to approve file
  70. * operaitons one by one.
  71. */
  72. int
  73. storage_dir_register_with_sandbox(storage_dir_t *d, sandbox_cfg_t **cfg)
  74. {
  75. int problems = 0;
  76. int idx;
  77. for (idx = FNAME_MIN_NUM; idx < FNAME_MIN_NUM + d->max_files; ++idx) {
  78. char *path = NULL, *tmppath = NULL;
  79. tor_asprintf(&path, "%s/%d", d->directory, idx);
  80. tor_asprintf(&tmppath, "%s/%d.tmp", d->directory, idx);
  81. problems += sandbox_cfg_allow_open_filename(cfg, path);
  82. problems += sandbox_cfg_allow_open_filename(cfg, tmppath);
  83. problems += sandbox_cfg_allow_stat_filename(cfg, path);
  84. problems += sandbox_cfg_allow_stat_filename(cfg, tmppath);
  85. problems += sandbox_cfg_allow_rename(cfg, tmppath, path);
  86. tor_free(path);
  87. tor_free(tmppath);
  88. }
  89. return problems ? -1 : 0;
  90. }
  91. /**
  92. * Remove all files in <b>d</b> whose names end with ".tmp".
  93. *
  94. * Requires that the contents field of <b>d</b> is set.
  95. */
  96. static void
  97. storage_dir_clean_tmpfiles(storage_dir_t *d)
  98. {
  99. if (!d->contents)
  100. return;
  101. SMARTLIST_FOREACH_BEGIN(d->contents, char *, fname) {
  102. if (strcmpend(fname, ".tmp"))
  103. continue;
  104. char *path = NULL;
  105. tor_asprintf(&path, "%s/%s", d->directory, fname);
  106. if (unlink(sandbox_intern_string(path))) {
  107. log_warn(LD_FS, "Unable to unlink %s", escaped(path));
  108. tor_free(path);
  109. continue;
  110. }
  111. tor_free(path);
  112. SMARTLIST_DEL_CURRENT(d->contents, fname);
  113. tor_free(fname);
  114. } SMARTLIST_FOREACH_END(fname);
  115. d->usage_known = 0;
  116. }
  117. /**
  118. * Re-scan the directory <b>d</b> to learn its contents.
  119. */
  120. static int
  121. storage_dir_rescan(storage_dir_t *d)
  122. {
  123. if (d->contents) {
  124. SMARTLIST_FOREACH(d->contents, char *, cp, tor_free(cp));
  125. smartlist_free(d->contents);
  126. }
  127. d->usage = 0;
  128. d->usage_known = 0;
  129. if (NULL == (d->contents = tor_listdir(d->directory))) {
  130. return -1;
  131. }
  132. storage_dir_clean_tmpfiles(d);
  133. return 0;
  134. }
  135. /**
  136. * Return a smartlist containing the filenames within <b>d</b>.
  137. */
  138. const smartlist_t *
  139. storage_dir_list(storage_dir_t *d)
  140. {
  141. if (! d->contents)
  142. storage_dir_rescan(d);
  143. return d->contents;
  144. }
  145. /**
  146. * Return the total number of bytes used for storage in <b>d</b>.
  147. */
  148. uint64_t
  149. storage_dir_get_usage(storage_dir_t *d)
  150. {
  151. if (d->usage_known)
  152. return d->usage;
  153. uint64_t total = 0;
  154. SMARTLIST_FOREACH_BEGIN(storage_dir_list(d), const char *, cp) {
  155. char *path = NULL;
  156. struct stat st;
  157. tor_asprintf(&path, "%s/%s", d->directory, cp);
  158. if (stat(sandbox_intern_string(path), &st) == 0) {
  159. total += st.st_size;
  160. }
  161. tor_free(path);
  162. } SMARTLIST_FOREACH_END(cp);
  163. d->usage = total;
  164. d->usage_known = 1;
  165. return d->usage;
  166. }
  167. /** Mmap a specified file within <b>d</b>. */
  168. tor_mmap_t *
  169. storage_dir_map(storage_dir_t *d, const char *fname)
  170. {
  171. char *path = NULL;
  172. tor_asprintf(&path, "%s/%s", d->directory, fname);
  173. tor_mmap_t *result = tor_mmap_file(path);
  174. tor_free(path);
  175. return result;
  176. }
  177. /** Read a file within <b>d</b> into a newly allocated buffer. Set
  178. * *<b>sz_out</b> to its size. */
  179. uint8_t *
  180. storage_dir_read(storage_dir_t *d, const char *fname, int bin, size_t *sz_out)
  181. {
  182. const int flags = bin ? RFTS_BIN : 0;
  183. char *path = NULL;
  184. tor_asprintf(&path, "%s/%s", d->directory, fname);
  185. struct stat st;
  186. char *contents = read_file_to_str(path, flags, &st);
  187. if (contents && sz_out) {
  188. // it fits in RAM, so we know its size is less than SIZE_MAX
  189. tor_assert((uint64_t)st.st_size <= SIZE_MAX);
  190. *sz_out = (size_t) st.st_size;
  191. }
  192. tor_free(path);
  193. return (uint8_t *) contents;
  194. }
  195. /** Helper: Find an unused filename within the directory */
  196. static char *
  197. find_unused_fname(storage_dir_t *d)
  198. {
  199. if (!d->contents) {
  200. if (storage_dir_rescan(d) < 0)
  201. return NULL;
  202. }
  203. char buf[16];
  204. int i;
  205. /* Yuck; this is quadratic. Fortunately, that shouldn't matter much,
  206. * since disk writes are more expensive by a lot. */
  207. for (i = FNAME_MIN_NUM; i < FNAME_MIN_NUM + d->max_files; ++i) {
  208. tor_snprintf(buf, sizeof(buf), "%d", i);
  209. if (!smartlist_contains_string(d->contents, buf)) {
  210. return tor_strdup(buf);
  211. }
  212. }
  213. return NULL;
  214. }
  215. /** Try to write the <b>length</b> bytes at <b>data</b> into a new file
  216. * in <b>d</b>. On success, return 0 and set *<b>fname_out</b> to a
  217. * newly allocated string containing the filename. On failure, return
  218. * -1. */
  219. int
  220. storage_dir_save_bytes_to_file(storage_dir_t *d,
  221. const uint8_t *data,
  222. size_t length,
  223. int binary,
  224. char **fname_out)
  225. {
  226. char *fname = find_unused_fname(d);
  227. if (!fname)
  228. return -1;
  229. char *path = NULL;
  230. tor_asprintf(&path, "%s/%s", d->directory, fname);
  231. int r = write_bytes_to_file(path, (const char *)data, length, binary);
  232. if (r == 0) {
  233. if (d->usage_known)
  234. d->usage += length;
  235. if (fname_out) {
  236. *fname_out = tor_strdup(fname);
  237. }
  238. if (d->contents)
  239. smartlist_add(d->contents, tor_strdup(fname));
  240. }
  241. tor_free(fname);
  242. tor_free(path);
  243. return r;
  244. }
  245. /**
  246. * As storage_dir_save_bytes_to_file, but saves a NUL-terminated string
  247. * <b>str</b>.
  248. */
  249. int
  250. storage_dir_save_string_to_file(storage_dir_t *d,
  251. const char *str,
  252. int binary,
  253. char **fname_out)
  254. {
  255. return storage_dir_save_bytes_to_file(d,
  256. (const uint8_t*)str, strlen(str), binary, fname_out);
  257. }
  258. /**
  259. * Remove the file called <b>fname</b> from <b>d</b>.
  260. */
  261. void
  262. storage_dir_remove_file(storage_dir_t *d,
  263. const char *fname)
  264. {
  265. char *path = NULL;
  266. tor_asprintf(&path, "%s/%s", d->directory, fname);
  267. const char *ipath = sandbox_intern_string(path);
  268. uint64_t size = 0;
  269. if (d->usage_known) {
  270. struct stat st;
  271. if (stat(ipath, &st) == 0) {
  272. size = st.st_size;
  273. }
  274. }
  275. if (unlink(ipath) == 0) {
  276. d->usage -= size;
  277. } else {
  278. log_warn(LD_FS, "Unable to unlink %s", escaped(path));
  279. tor_free(path);
  280. return;
  281. }
  282. if (d->contents) {
  283. smartlist_string_remove(d->contents, fname);
  284. }
  285. tor_free(path);
  286. }
  287. /** Helper type: used to sort the members of storage directory by mtime. */
  288. typedef struct shrinking_dir_entry_t {
  289. time_t mtime;
  290. uint64_t size;
  291. char *path;
  292. } shrinking_dir_entry_t;
  293. /** Helper: use with qsort to sort shrinking_dir_entry_t structs. */
  294. static int
  295. shrinking_dir_entry_compare(const void *a_, const void *b_)
  296. {
  297. const shrinking_dir_entry_t *a = a_;
  298. const shrinking_dir_entry_t *b = b_;
  299. if (a->mtime < b->mtime)
  300. return -1;
  301. else if (a->mtime > b->mtime)
  302. return 1;
  303. else
  304. return 0;
  305. }
  306. /**
  307. * Try to free space by removing the oldest files in <b>d</b>. Delete
  308. * until no more than <b>target_size</b> bytes are left, and at least
  309. * <b>min_to_remove</b> files have been removed... or until there is
  310. * nothing left to remove.
  311. *
  312. * Return 0 on success; -1 on failure.
  313. */
  314. int
  315. storage_dir_shrink(storage_dir_t *d,
  316. uint64_t target_size,
  317. int min_to_remove)
  318. {
  319. if (d->usage_known && d->usage <= target_size && !min_to_remove) {
  320. /* Already small enough. */
  321. return 0;
  322. }
  323. if (storage_dir_rescan(d) < 0)
  324. return -1;
  325. const uint64_t orig_usage = storage_dir_get_usage(d);
  326. if (orig_usage <= target_size && !min_to_remove) {
  327. /* Okay, small enough after rescan! */
  328. return 0;
  329. }
  330. const int n = smartlist_len(d->contents);
  331. shrinking_dir_entry_t *ents = tor_calloc(n, sizeof(shrinking_dir_entry_t));
  332. SMARTLIST_FOREACH_BEGIN(d->contents, const char *, fname) {
  333. shrinking_dir_entry_t *ent = &ents[fname_sl_idx];
  334. struct stat st;
  335. tor_asprintf(&ent->path, "%s/%s", d->directory, fname);
  336. if (stat(sandbox_intern_string(ent->path), &st) == 0) {
  337. ent->mtime = st.st_mtime;
  338. ent->size = st.st_size;
  339. }
  340. } SMARTLIST_FOREACH_END(fname);
  341. qsort(ents, n, sizeof(shrinking_dir_entry_t), shrinking_dir_entry_compare);
  342. int idx = 0;
  343. while ((d->usage > target_size || min_to_remove > 0) && idx < n) {
  344. if (unlink(sandbox_intern_string(ents[idx].path)) == 0) {
  345. if (! BUG(d->usage < ents[idx].size)) {
  346. d->usage -= ents[idx].size;
  347. }
  348. --min_to_remove;
  349. }
  350. ++idx;
  351. }
  352. for (idx = 0; idx < n; ++idx) {
  353. tor_free(ents[idx].path);
  354. }
  355. tor_free(ents);
  356. storage_dir_rescan(d);
  357. return 0;
  358. }
  359. /** Remove all files in <b>d</b>. */
  360. int
  361. storage_dir_remove_all(storage_dir_t *d)
  362. {
  363. return storage_dir_shrink(d, 0, d->max_files);
  364. }