unparseable.c 18 KB


  1. /* Copyright (c) 2001 Matej Pfajfar.
  2. * Copyright (c) 2001-2004, Roger Dingledine.
  3. * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
  4. * Copyright (c) 2007-2018, The Tor Project, Inc. */
  5. /* See LICENSE for licensing information */
  6. #define UNPARSEABLE_PRIVATE
  7. #include "core/or/or.h"
  8. #include "app/config/config.h"
  9. #include "feature/dirparse/unparseable.h"
  10. #include "lib/sandbox/sandbox.h"
  11. #ifdef HAVE_SYS_STAT_H
  12. #include <sys/stat.h>
  13. #endif
  14. /* Dump mechanism for unparseable descriptors */
  15. /** List of dumped descriptors for FIFO cleanup purposes */
  16. STATIC smartlist_t *descs_dumped = NULL;
  17. /** Total size of dumped descriptors for FIFO cleanup */
  18. STATIC uint64_t len_descs_dumped = 0;
  19. /** Directory to stash dumps in */
  20. static int have_dump_desc_dir = 0;
  21. static int problem_with_dump_desc_dir = 0;
  22. #define DESC_DUMP_DATADIR_SUBDIR "unparseable-descs"
  23. #define DESC_DUMP_BASE_FILENAME "unparseable-desc"
  24. /** Find the dump directory and check if we'll be able to create it */
  25. void
  26. dump_desc_init(void)
  27. {
  28. char *dump_desc_dir;
  29. dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR);
  30. /*
  31. * We just check for it, don't create it at this point; we'll
  32. * create it when we need it if it isn't already there.
  33. */
  34. if (check_private_dir(dump_desc_dir, CPD_CHECK, get_options()->User) < 0) {
  35. /* Error, log and flag it as having a problem */
  36. log_notice(LD_DIR,
  37. "Doesn't look like we'll be able to create descriptor dump "
  38. "directory %s; dumps will be disabled.",
  39. dump_desc_dir);
  40. problem_with_dump_desc_dir = 1;
  41. tor_free(dump_desc_dir);
  42. return;
  43. }
  44. /* Check if it exists */
  45. switch (file_status(dump_desc_dir)) {
  46. case FN_DIR:
  47. /* We already have a directory */
  48. have_dump_desc_dir = 1;
  49. break;
  50. case FN_NOENT:
  51. /* Nothing, we'll need to create it later */
  52. have_dump_desc_dir = 0;
  53. break;
  54. case FN_ERROR:
  55. /* Log and flag having a problem */
  56. log_notice(LD_DIR,
  57. "Couldn't check whether descriptor dump directory %s already"
  58. " exists: %s",
  59. dump_desc_dir, strerror(errno));
  60. problem_with_dump_desc_dir = 1;
  61. break;
  62. case FN_FILE:
  63. case FN_EMPTY:
  64. default:
  65. /* Something else was here! */
  66. log_notice(LD_DIR,
  67. "Descriptor dump directory %s already exists and isn't a "
  68. "directory",
  69. dump_desc_dir);
  70. problem_with_dump_desc_dir = 1;
  71. }
  72. if (have_dump_desc_dir && !problem_with_dump_desc_dir) {
  73. dump_desc_populate_fifo_from_directory(dump_desc_dir);
  74. }
  75. tor_free(dump_desc_dir);
  76. }
  77. /** Create the dump directory if needed and possible */
  78. static void
  79. dump_desc_create_dir(void)
  80. {
  81. char *dump_desc_dir;
  82. /* If the problem flag is set, skip it */
  83. if (problem_with_dump_desc_dir) return;
  84. /* Do we need it? */
  85. if (!have_dump_desc_dir) {
  86. dump_desc_dir = get_datadir_fname(DESC_DUMP_DATADIR_SUBDIR);
  87. if (check_private_dir(dump_desc_dir, CPD_CREATE,
  88. get_options()->User) < 0) {
  89. log_notice(LD_DIR,
  90. "Failed to create descriptor dump directory %s",
  91. dump_desc_dir);
  92. problem_with_dump_desc_dir = 1;
  93. }
  94. /* Okay, we created it */
  95. have_dump_desc_dir = 1;
  96. tor_free(dump_desc_dir);
  97. }
  98. }
  99. /** Dump desc FIFO/cleanup; take ownership of the given filename, add it to
  100. * the FIFO, and clean up the oldest entries to the extent they exceed the
  101. * configured cap. If any old entries with a matching hash existed, they
  102. * just got overwritten right before this was called and we should adjust
  103. * the total size counter without deleting them.
  104. */
  105. static void
  106. dump_desc_fifo_add_and_clean(char *filename, const uint8_t *digest_sha256,
  107. size_t len)
  108. {
  109. dumped_desc_t *ent = NULL, *tmp;
  110. uint64_t max_len;
  111. tor_assert(filename != NULL);
  112. tor_assert(digest_sha256 != NULL);
  113. if (descs_dumped == NULL) {
  114. /* We better have no length, then */
  115. tor_assert(len_descs_dumped == 0);
  116. /* Make a smartlist */
  117. descs_dumped = smartlist_new();
  118. }
  119. /* Make a new entry to put this one in */
  120. ent = tor_malloc_zero(sizeof(*ent));
  121. ent->filename = filename;
  122. ent->len = len;
  123. ent->when = time(NULL);
  124. memcpy(ent->digest_sha256, digest_sha256, DIGEST256_LEN);
  125. /* Do we need to do some cleanup? */
  126. max_len = get_options()->MaxUnparseableDescSizeToLog;
  127. /* Iterate over the list until we've freed enough space */
  128. while (len > max_len - len_descs_dumped &&
  129. smartlist_len(descs_dumped) > 0) {
  130. /* Get the oldest thing on the list */
  131. tmp = (dumped_desc_t *)(smartlist_get(descs_dumped, 0));
  132. /*
  133. * Check if it matches the filename we just added, so we don't delete
  134. * something we just emitted if we get repeated identical descriptors.
  135. */
  136. if (strcmp(tmp->filename, filename) != 0) {
  137. /* Delete it and adjust the length counter */
  138. tor_unlink(tmp->filename);
  139. tor_assert(len_descs_dumped >= tmp->len);
  140. len_descs_dumped -= tmp->len;
  141. log_info(LD_DIR,
  142. "Deleting old unparseable descriptor dump %s due to "
  143. "space limits",
  144. tmp->filename);
  145. } else {
  146. /*
  147. * Don't delete, but do adjust the counter since we will bump it
  148. * later
  149. */
  150. tor_assert(len_descs_dumped >= tmp->len);
  151. len_descs_dumped -= tmp->len;
  152. log_info(LD_DIR,
  153. "Replacing old descriptor dump %s with new identical one",
  154. tmp->filename);
  155. }
  156. /* Free it and remove it from the list */
  157. smartlist_del_keeporder(descs_dumped, 0);
  158. tor_free(tmp->filename);
  159. tor_free(tmp);
  160. }
  161. /* Append our entry to the end of the list and bump the counter */
  162. smartlist_add(descs_dumped, ent);
  163. len_descs_dumped += len;
  164. }
  165. /** Check if we already have a descriptor for this hash and move it to the
  166. * head of the queue if so. Return 1 if one existed and 0 otherwise.
  167. */
  168. static int
  169. dump_desc_fifo_bump_hash(const uint8_t *digest_sha256)
  170. {
  171. dumped_desc_t *match = NULL;
  172. tor_assert(digest_sha256);
  173. if (descs_dumped) {
  174. /* Find a match if one exists */
  175. SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) {
  176. if (ent &&
  177. tor_memeq(ent->digest_sha256, digest_sha256, DIGEST256_LEN)) {
  178. /*
  179. * Save a pointer to the match and remove it from its current
  180. * position.
  181. */
  182. match = ent;
  183. SMARTLIST_DEL_CURRENT_KEEPORDER(descs_dumped, ent);
  184. break;
  185. }
  186. } SMARTLIST_FOREACH_END(ent);
  187. if (match) {
  188. /* Update the timestamp */
  189. match->when = time(NULL);
  190. /* Add it back at the end of the list */
  191. smartlist_add(descs_dumped, match);
  192. /* Indicate we found one */
  193. return 1;
  194. }
  195. }
  196. return 0;
  197. }
  198. /** Clean up on exit; just memory, leave the dumps behind
  199. */
  200. void
  201. dump_desc_fifo_cleanup(void)
  202. {
  203. if (descs_dumped) {
  204. /* Free each descriptor */
  205. SMARTLIST_FOREACH_BEGIN(descs_dumped, dumped_desc_t *, ent) {
  206. tor_assert(ent);
  207. tor_free(ent->filename);
  208. tor_free(ent);
  209. } SMARTLIST_FOREACH_END(ent);
  210. /* Free the list */
  211. smartlist_free(descs_dumped);
  212. descs_dumped = NULL;
  213. len_descs_dumped = 0;
  214. }
  215. }
  216. /** Handle one file for dump_desc_populate_fifo_from_directory(); make sure
  217. * the filename is sensibly formed and matches the file content, and either
  218. * return a dumped_desc_t for it or remove the file and return NULL.
  219. */
  220. MOCK_IMPL(STATIC dumped_desc_t *,
  221. dump_desc_populate_one_file, (const char *dirname, const char *f))
  222. {
  223. dumped_desc_t *ent = NULL;
  224. char *path = NULL, *desc = NULL;
  225. const char *digest_str;
  226. char digest[DIGEST256_LEN], content_digest[DIGEST256_LEN];
  227. /* Expected prefix before digest in filenames */
  228. const char *f_pfx = DESC_DUMP_BASE_FILENAME ".";
  229. /*
  230. * Stat while reading; this is important in case the file
  231. * contains a NUL character.
  232. */
  233. struct stat st;
  234. /* Sanity-check args */
  235. tor_assert(dirname != NULL);
  236. tor_assert(f != NULL);
  237. /* Form the full path */
  238. tor_asprintf(&path, "%s" PATH_SEPARATOR "%s", dirname, f);
  239. /* Check that f has the form DESC_DUMP_BASE_FILENAME.<digest256> */
  240. if (!strcmpstart(f, f_pfx)) {
  241. /* It matches the form, but is the digest parseable as such? */
  242. digest_str = f + strlen(f_pfx);
  243. if (base16_decode(digest, DIGEST256_LEN,
  244. digest_str, strlen(digest_str)) != DIGEST256_LEN) {
  245. /* We failed to decode it */
  246. digest_str = NULL;
  247. }
  248. } else {
  249. /* No match */
  250. digest_str = NULL;
  251. }
  252. if (!digest_str) {
  253. /* We couldn't get a sensible digest */
  254. log_notice(LD_DIR,
  255. "Removing unrecognized filename %s from unparseable "
  256. "descriptors directory", f);
  257. tor_unlink(path);
  258. /* We're done */
  259. goto done;
  260. }
  261. /*
  262. * The filename has the form DESC_DUMP_BASE_FILENAME "." <digest256> and
  263. * we've decoded the digest. Next, check that we can read it and the
  264. * content matches this digest. We are relying on the fact that if the
  265. * file contains a '\0', read_file_to_str() will allocate space for and
  266. * read the entire file and return the correct size in st.
  267. */
  268. desc = read_file_to_str(path, RFTS_IGNORE_MISSING|RFTS_BIN, &st);
  269. if (!desc) {
  270. /* We couldn't read it */
  271. log_notice(LD_DIR,
  272. "Failed to read %s from unparseable descriptors directory; "
  273. "attempting to remove it.", f);
  274. tor_unlink(path);
  275. /* We're done */
  276. goto done;
  277. }
  278. #if SIZE_MAX > UINT64_MAX
  279. if (BUG((uint64_t)st.st_size > (uint64_t)SIZE_MAX)) {
  280. /* LCOV_EXCL_START
  281. * Should be impossible since RFTS above should have failed to read the
  282. * huge file into RAM. */
  283. goto done;
  284. /* LCOV_EXCL_STOP */
  285. }
  286. #endif /* SIZE_MAX > UINT64_MAX */
  287. if (BUG(st.st_size < 0)) {
  288. /* LCOV_EXCL_START
  289. * Should be impossible, since the OS isn't supposed to be b0rken. */
  290. goto done;
  291. /* LCOV_EXCL_STOP */
  292. }
  293. /* (Now we can be sure that st.st_size is safe to cast to a size_t.) */
  294. /*
  295. * We got one; now compute its digest and check that it matches the
  296. * filename.
  297. */
  298. if (crypto_digest256((char *)content_digest, desc, (size_t) st.st_size,
  299. DIGEST_SHA256) < 0) {
  300. /* Weird, but okay */
  301. log_info(LD_DIR,
  302. "Unable to hash content of %s from unparseable descriptors "
  303. "directory", f);
  304. tor_unlink(path);
  305. /* We're done */
  306. goto done;
  307. }
  308. /* Compare the digests */
  309. if (tor_memneq(digest, content_digest, DIGEST256_LEN)) {
  310. /* No match */
  311. log_info(LD_DIR,
  312. "Hash of %s from unparseable descriptors directory didn't "
  313. "match its filename; removing it", f);
  314. tor_unlink(path);
  315. /* We're done */
  316. goto done;
  317. }
  318. /* Okay, it's a match, we should prepare ent */
  319. ent = tor_malloc_zero(sizeof(dumped_desc_t));
  320. ent->filename = path;
  321. memcpy(ent->digest_sha256, digest, DIGEST256_LEN);
  322. ent->len = (size_t) st.st_size;
  323. ent->when = st.st_mtime;
  324. /* Null out path so we don't free it out from under ent */
  325. path = NULL;
  326. done:
  327. /* Free allocations if we had them */
  328. tor_free(desc);
  329. tor_free(path);
  330. return ent;
  331. }
  332. /** Sort helper for dump_desc_populate_fifo_from_directory(); compares
  333. * the when field of dumped_desc_ts in a smartlist to put the FIFO in
  334. * the correct order after reconstructing it from the directory.
  335. */
  336. static int
  337. dump_desc_compare_fifo_entries(const void **a_v, const void **b_v)
  338. {
  339. const dumped_desc_t **a = (const dumped_desc_t **)a_v;
  340. const dumped_desc_t **b = (const dumped_desc_t **)b_v;
  341. if ((a != NULL) && (*a != NULL)) {
  342. if ((b != NULL) && (*b != NULL)) {
  343. /* We have sensible dumped_desc_ts to compare */
  344. if ((*a)->when < (*b)->when) {
  345. return -1;
  346. } else if ((*a)->when == (*b)->when) {
  347. return 0;
  348. } else {
  349. return 1;
  350. }
  351. } else {
  352. /*
  353. * We shouldn't see this, but what the hell, NULLs precede everythin
  354. * else
  355. */
  356. return 1;
  357. }
  358. } else {
  359. return -1;
  360. }
  361. }
  362. /** Scan the contents of the directory, and update FIFO/counters; this will
  363. * consistency-check descriptor dump filenames against hashes of descriptor
  364. * dump file content, and remove any inconsistent/unreadable dumps, and then
  365. * reconstruct the dump FIFO as closely as possible for the last time the
  366. * tor process shut down. If a previous dump was repeated more than once and
  367. * moved ahead in the FIFO, the mtime will not have been updated and the
  368. * reconstructed order will be wrong, but will always be a permutation of
  369. * the original.
  370. */
  371. STATIC void
  372. dump_desc_populate_fifo_from_directory(const char *dirname)
  373. {
  374. smartlist_t *files = NULL;
  375. dumped_desc_t *ent = NULL;
  376. tor_assert(dirname != NULL);
  377. /* Get a list of files */
  378. files = tor_listdir(dirname);
  379. if (!files) {
  380. log_notice(LD_DIR,
  381. "Unable to get contents of unparseable descriptor dump "
  382. "directory %s",
  383. dirname);
  384. return;
  385. }
  386. /*
  387. * Iterate through the list and decide which files should go in the
  388. * FIFO and which should be purged.
  389. */
  390. SMARTLIST_FOREACH_BEGIN(files, char *, f) {
  391. /* Try to get a FIFO entry */
  392. ent = dump_desc_populate_one_file(dirname, f);
  393. if (ent) {
  394. /*
  395. * We got one; add it to the FIFO. No need for duplicate checking
  396. * here since we just verified the name and digest match.
  397. */
  398. /* Make sure we have a list to add it to */
  399. if (!descs_dumped) {
  400. descs_dumped = smartlist_new();
  401. len_descs_dumped = 0;
  402. }
  403. /* Add it and adjust the counter */
  404. smartlist_add(descs_dumped, ent);
  405. len_descs_dumped += ent->len;
  406. }
  407. /*
  408. * If we didn't, we will have unlinked the file if necessary and
  409. * possible, and emitted a log message about it, so just go on to
  410. * the next.
  411. */
  412. } SMARTLIST_FOREACH_END(f);
  413. /* Did we get anything? */
  414. if (descs_dumped != NULL) {
  415. /* Sort the FIFO in order of increasing timestamp */
  416. smartlist_sort(descs_dumped, dump_desc_compare_fifo_entries);
  417. /* Log some stats */
  418. log_info(LD_DIR,
  419. "Reloaded unparseable descriptor dump FIFO with %d dump(s) "
  420. "totaling %"PRIu64 " bytes",
  421. smartlist_len(descs_dumped), (len_descs_dumped));
  422. }
  423. /* Free the original list */
  424. SMARTLIST_FOREACH(files, char *, f, tor_free(f));
  425. smartlist_free(files);
  426. }
  427. /** For debugging purposes, dump unparseable descriptor *<b>desc</b> of
  428. * type *<b>type</b> to file $DATADIR/unparseable-desc. Do not write more
  429. * than one descriptor to disk per minute. If there is already such a
  430. * file in the data directory, overwrite it. */
  431. MOCK_IMPL(void,
  432. dump_desc,(const char *desc, const char *type))
  433. {
  434. tor_assert(desc);
  435. tor_assert(type);
  436. size_t len;
  437. /* The SHA256 of the string */
  438. uint8_t digest_sha256[DIGEST256_LEN];
  439. char digest_sha256_hex[HEX_DIGEST256_LEN+1];
  440. /* Filename to log it to */
  441. char *debugfile, *debugfile_base;
  442. /* Get the hash for logging purposes anyway */
  443. len = strlen(desc);
  444. if (crypto_digest256((char *)digest_sha256, desc, len,
  445. DIGEST_SHA256) < 0) {
  446. log_info(LD_DIR,
  447. "Unable to parse descriptor of type %s, and unable to even hash"
  448. " it!", type);
  449. goto err;
  450. }
  451. base16_encode(digest_sha256_hex, sizeof(digest_sha256_hex),
  452. (const char *)digest_sha256, sizeof(digest_sha256));
  453. /*
  454. * We mention type and hash in the main log; don't clutter up the files
  455. * with anything but the exact dump.
  456. */
  457. tor_asprintf(&debugfile_base,
  458. DESC_DUMP_BASE_FILENAME ".%s", digest_sha256_hex);
  459. debugfile = get_datadir_fname2(DESC_DUMP_DATADIR_SUBDIR, debugfile_base);
  460. /*
  461. * Check if the sandbox is active or will become active; see comment
  462. * below at the log message for why.
  463. */
  464. if (!(sandbox_is_active() || get_options()->Sandbox)) {
  465. if (len <= get_options()->MaxUnparseableDescSizeToLog) {
  466. if (!dump_desc_fifo_bump_hash(digest_sha256)) {
  467. /* Create the directory if needed */
  468. dump_desc_create_dir();
  469. /* Make sure we've got it */
  470. if (have_dump_desc_dir && !problem_with_dump_desc_dir) {
  471. /* Write it, and tell the main log about it */
  472. write_str_to_file(debugfile, desc, 1);
  473. log_info(LD_DIR,
  474. "Unable to parse descriptor of type %s with hash %s and "
  475. "length %lu. See file %s in data directory for details.",
  476. type, digest_sha256_hex, (unsigned long)len,
  477. debugfile_base);
  478. dump_desc_fifo_add_and_clean(debugfile, digest_sha256, len);
  479. /* Since we handed ownership over, don't free debugfile later */
  480. debugfile = NULL;
  481. } else {
  482. /* Problem with the subdirectory */
  483. log_info(LD_DIR,
  484. "Unable to parse descriptor of type %s with hash %s and "
  485. "length %lu. Descriptor not dumped because we had a "
  486. "problem creating the " DESC_DUMP_DATADIR_SUBDIR
  487. " subdirectory",
  488. type, digest_sha256_hex, (unsigned long)len);
  489. /* We do have to free debugfile in this case */
  490. }
  491. } else {
  492. /* We already had one with this hash dumped */
  493. log_info(LD_DIR,
  494. "Unable to parse descriptor of type %s with hash %s and "
  495. "length %lu. Descriptor not dumped because one with that "
  496. "hash has already been dumped.",
  497. type, digest_sha256_hex, (unsigned long)len);
  498. /* We do have to free debugfile in this case */
  499. }
  500. } else {
  501. /* Just log that it happened without dumping */
  502. log_info(LD_DIR,
  503. "Unable to parse descriptor of type %s with hash %s and "
  504. "length %lu. Descriptor not dumped because it exceeds maximum"
  505. " log size all by itself.",
  506. type, digest_sha256_hex, (unsigned long)len);
  507. /* We do have to free debugfile in this case */
  508. }
  509. } else {
  510. /*
  511. * Not logging because the sandbox is active and seccomp2 apparently
  512. * doesn't have a sensible way to allow filenames according to a pattern
  513. * match. (If we ever figure out how to say "allow writes to /regex/",
  514. * remove this checK).
  515. */
  516. log_info(LD_DIR,
  517. "Unable to parse descriptor of type %s with hash %s and "
  518. "length %lu. Descriptor not dumped because the sandbox is "
  519. "configured",
  520. type, digest_sha256_hex, (unsigned long)len);
  521. }
  522. tor_free(debugfile_base);
  523. tor_free(debugfile);
  524. err:
  525. return;
  526. }