directory.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654
  1. /* Copyright (c) 2001-2004, Roger Dingledine.
  2. * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
  3. * Copyright (c) 2007-2019, The Tor Project, Inc. */
  4. /* See LICENSE for licensing information */
  5. #include "core/or/or.h"
  6. #include "app/config/config.h"
  7. #include "core/mainloop/connection.h"
  8. #include "feature/dircache/dircache.h"
  9. #include "feature/dircache/dirserv.h"
  10. #include "feature/dirclient/dirclient.h"
  11. #include "feature/dircommon/directory.h"
  12. #include "feature/dircommon/fp_pair.h"
  13. #include "feature/stats/geoip_stats.h"
  14. #include "lib/compress/compress.h"
  15. #include "feature/dircommon/dir_connection_st.h"
  16. #include "feature/nodelist/routerinfo_st.h"
  17. #include "feature/hs/hs_pirprocess.h"
  18. /**
  19. * \file directory.c
  20. * \brief Code to send and fetch information from directory authorities and
  21. * caches via HTTP.
  22. *
  23. * Directory caches and authorities use dirserv.c to generate the results of a
  24. * query and stream them to the connection; clients use routerparse.c to parse
  25. * them.
  26. *
  27. * Every directory request has a dir_connection_t on the client side and on
  28. * the server side. In most cases, the dir_connection_t object is a linked
  29. * connection, tunneled through an edge_connection_t so that it can be a
  30. * stream on the Tor network. The only non-tunneled connections are those
  31. * that are used to upload material (descriptors and votes) to authorities.
  32. * Among tunneled connections, some use one-hop circuits, and others use
  33. * multi-hop circuits for anonymity.
  34. *
  35. * Directory requests are launched by calling
  36. * directory_initiate_request(). This
  37. * launch the connection, will construct an HTTP request with
  38. * directory_send_command(), send the and wait for a response. The client
  39. * later handles the response with connection_dir_client_reached_eof(),
  40. * which passes the information received to another part of Tor.
  41. *
  42. * On the server side, requests are read in directory_handle_command(),
  43. * which dispatches first on the request type (GET or POST), and then on
  44. * the URL requested. GET requests are processed with a table-based
  45. * dispatcher in url_table[]. The process of handling larger GET requests
  46. * is complicated because we need to avoid allocating a copy of all the
  47. * data to be sent to the client in one huge buffer. Instead, we spool the
  48. * data into the buffer using logic in connection_dirserv_flushed_some() in
  49. * dirserv.c. (TODO: If we extended buf.c to have a zero-copy
  50. * reference-based buffer type, we could remove most of that code, at the
  51. * cost of a bit more reference counting.)
  52. **/
  53. /* In-points to directory.c:
  54. *
  55. * - directory_post_to_dirservers(), called from
  56. * router_upload_dir_desc_to_dirservers() in router.c
  57. * upload_service_descriptor() in rendservice.c
  58. * - directory_get_from_dirserver(), called from
  59. * rend_client_refetch_renddesc() in rendclient.c
  60. * run_scheduled_events() in main.c
  61. * do_hup() in main.c
  62. * - connection_dir_process_inbuf(), called from
  63. * connection_process_inbuf() in connection.c
  64. * - connection_dir_finished_flushing(), called from
  65. * connection_finished_flushing() in connection.c
  66. * - connection_dir_finished_connecting(), called from
  67. * connection_finished_connecting() in connection.c
  68. */
  69. /** Convert a connection_t* to a dir_connection_t*; assert if the cast is
  70. * invalid. */
  71. dir_connection_t *
  72. TO_DIR_CONN(connection_t *c)
  73. {
  74. tor_assert(c->magic == DIR_CONNECTION_MAGIC);
  75. return DOWNCAST(dir_connection_t, c);
  76. }
  77. /** Return false if the directory purpose <b>dir_purpose</b>
  78. * does not require an anonymous (three-hop) connection.
  79. *
  80. * Return true 1) by default, 2) if all directory actions have
  81. * specifically been configured to be over an anonymous connection,
  82. * or 3) if the router is a bridge */
  83. int
  84. purpose_needs_anonymity(uint8_t dir_purpose, uint8_t router_purpose,
  85. const char *resource)
  86. {
  87. if (get_options()->AllDirActionsPrivate)
  88. return 1;
  89. if (router_purpose == ROUTER_PURPOSE_BRIDGE) {
  90. if (dir_purpose == DIR_PURPOSE_FETCH_SERVERDESC
  91. && resource && !strcmp(resource, "authority.z")) {
  92. /* We are asking a bridge for its own descriptor. That doesn't need
  93. anonymity. */
  94. return 0;
  95. }
  96. /* Assume all other bridge stuff needs anonymity. */
  97. return 1; /* if no circuits yet, this might break bootstrapping, but it's
  98. * needed to be safe. */
  99. }
  100. switch (dir_purpose)
  101. {
  102. case DIR_PURPOSE_UPLOAD_DIR:
  103. case DIR_PURPOSE_UPLOAD_VOTE:
  104. case DIR_PURPOSE_UPLOAD_SIGNATURES:
  105. case DIR_PURPOSE_FETCH_STATUS_VOTE:
  106. case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES:
  107. case DIR_PURPOSE_FETCH_CONSENSUS:
  108. case DIR_PURPOSE_FETCH_CERTIFICATE:
  109. case DIR_PURPOSE_FETCH_SERVERDESC:
  110. case DIR_PURPOSE_FETCH_EXTRAINFO:
  111. case DIR_PURPOSE_FETCH_MICRODESC:
  112. return 0;
  113. case DIR_PURPOSE_HAS_FETCHED_HSDESC:
  114. case DIR_PURPOSE_HAS_FETCHED_RENDDESC_V2:
  115. case DIR_PURPOSE_UPLOAD_RENDDESC_V2:
  116. case DIR_PURPOSE_FETCH_RENDDESC_V2:
  117. case DIR_PURPOSE_FETCH_HSDESC:
  118. case DIR_PURPOSE_UPLOAD_HSDESC:
  119. return 1;
  120. case DIR_PURPOSE_SERVER:
  121. default:
  122. log_warn(LD_BUG, "Called with dir_purpose=%d, router_purpose=%d",
  123. dir_purpose, router_purpose);
  124. tor_assert_nonfatal_unreached();
  125. return 1; /* Assume it needs anonymity; better safe than sorry. */
  126. }
  127. }
  128. /** Return a newly allocated string describing <b>auth</b>. Only describes
  129. * authority features. */
  130. char *
  131. authdir_type_to_string(dirinfo_type_t auth)
  132. {
  133. char *result;
  134. smartlist_t *lst = smartlist_new();
  135. if (auth & V3_DIRINFO)
  136. smartlist_add(lst, (void*)"V3");
  137. if (auth & BRIDGE_DIRINFO)
  138. smartlist_add(lst, (void*)"Bridge");
  139. if (smartlist_len(lst)) {
  140. result = smartlist_join_strings(lst, ", ", 0, NULL);
  141. } else {
  142. result = tor_strdup("[Not an authority]");
  143. }
  144. smartlist_free(lst);
  145. return result;
  146. }
  147. /** Return true iff anything we say on <b>conn</b> is being encrypted before
  148. * we send it to the client/server. */
  149. int
  150. connection_dir_is_encrypted(const dir_connection_t *conn)
  151. {
  152. /* Right now it's sufficient to see if conn is or has been linked, since
  153. * the only thing it could be linked to is an edge connection on a
  154. * circuit, and the only way it could have been unlinked is at the edge
  155. * connection getting closed.
  156. */
  157. return TO_CONN(conn)->linked;
  158. }
  159. /** Parse an HTTP request line at the start of a headers string. On failure,
  160. * return -1. On success, set *<b>command_out</b> to a copy of the HTTP
  161. * command ("get", "post", etc), set *<b>url_out</b> to a copy of the URL, and
  162. * return 0. */
  163. int
  164. parse_http_command(const char *headers, char **command_out, char **url_out)
  165. {
  166. const char *command, *end_of_command;
  167. char *s, *start, *tmp;
  168. s = (char *)eat_whitespace_no_nl(headers);
  169. if (!*s) return -1;
  170. command = s;
  171. s = (char *)find_whitespace(s); /* get past GET/POST */
  172. if (!*s) return -1;
  173. end_of_command = s;
  174. s = (char *)eat_whitespace_no_nl(s);
  175. if (!*s) return -1;
  176. start = s; /* this is the URL, assuming it's valid */
  177. s = (char *)find_whitespace(start);
  178. if (!*s) return -1;
  179. /* tolerate the http[s] proxy style of putting the hostname in the url */
  180. if (s-start >= 4 && !strcmpstart(start,"http")) {
  181. tmp = start + 4;
  182. if (*tmp == 's')
  183. tmp++;
  184. if (s-tmp >= 3 && !strcmpstart(tmp,"://")) {
  185. tmp = strchr(tmp+3, '/');
  186. if (tmp && tmp < s) {
  187. log_debug(LD_DIR,"Skipping over 'http[s]://hostname/' string");
  188. start = tmp;
  189. }
  190. }
  191. }
  192. /* Check if the header is well formed (next sequence
  193. * should be HTTP/1.X\r\n). Assumes we're supporting 1.0? */
  194. {
  195. unsigned minor_ver;
  196. char ch;
  197. char *e = (char *)eat_whitespace_no_nl(s);
  198. if (2 != tor_sscanf(e, "HTTP/1.%u%c", &minor_ver, &ch)) {
  199. return -1;
  200. }
  201. if (ch != '\r')
  202. return -1;
  203. }
  204. *url_out = tor_memdup_nulterm(start, s-start);
  205. *command_out = tor_memdup_nulterm(command, end_of_command - command);
  206. return 0;
  207. }
  208. /** Return a copy of the first HTTP header in <b>headers</b> whose key is
  209. * <b>which</b>. The key should be given with a terminating colon and space;
  210. * this function copies everything after, up to but not including the
  211. * following \\r\\n. */
  212. char *
  213. http_get_header(const char *headers, const char *which)
  214. {
  215. const char *cp = headers;
  216. while (cp) {
  217. if (!strcasecmpstart(cp, which)) {
  218. char *eos;
  219. cp += strlen(which);
  220. if ((eos = strchr(cp,'\r')))
  221. return tor_strndup(cp, eos-cp);
  222. else
  223. return tor_strdup(cp);
  224. }
  225. cp = strchr(cp, '\n');
  226. if (cp)
  227. ++cp;
  228. }
  229. return NULL;
  230. }
  231. /** Parse an HTTP response string <b>headers</b> of the form
  232. * \verbatim
  233. * "HTTP/1.\%d \%d\%s\r\n...".
  234. * \endverbatim
  235. *
  236. * If it's well-formed, assign the status code to *<b>code</b> and
  237. * return 0. Otherwise, return -1.
  238. *
  239. * On success: If <b>date</b> is provided, set *date to the Date
  240. * header in the http headers, or 0 if no such header is found. If
  241. * <b>compression</b> is provided, set *<b>compression</b> to the
  242. * compression method given in the Content-Encoding header, or 0 if no
  243. * such header is found, or -1 if the value of the header is not
  244. * recognized. If <b>reason</b> is provided, strdup the reason string
  245. * into it.
  246. */
  247. int
  248. parse_http_response(const char *headers, int *code, time_t *date,
  249. compress_method_t *compression, char **reason)
  250. {
  251. unsigned n1, n2;
  252. char datestr[RFC1123_TIME_LEN+1];
  253. smartlist_t *parsed_headers;
  254. tor_assert(headers);
  255. tor_assert(code);
  256. while (TOR_ISSPACE(*headers)) headers++; /* tolerate leading whitespace */
  257. if (tor_sscanf(headers, "HTTP/1.%u %u", &n1, &n2) < 2 ||
  258. (n1 != 0 && n1 != 1) ||
  259. (n2 < 100 || n2 >= 600)) {
  260. log_warn(LD_HTTP,"Failed to parse header %s",escaped(headers));
  261. return -1;
  262. }
  263. *code = n2;
  264. parsed_headers = smartlist_new();
  265. smartlist_split_string(parsed_headers, headers, "\n",
  266. SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, -1);
  267. if (reason) {
  268. smartlist_t *status_line_elements = smartlist_new();
  269. tor_assert(smartlist_len(parsed_headers));
  270. smartlist_split_string(status_line_elements,
  271. smartlist_get(parsed_headers, 0),
  272. " ", SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 3);
  273. tor_assert(smartlist_len(status_line_elements) <= 3);
  274. if (smartlist_len(status_line_elements) == 3) {
  275. *reason = smartlist_get(status_line_elements, 2);
  276. smartlist_set(status_line_elements, 2, NULL); /* Prevent free */
  277. }
  278. SMARTLIST_FOREACH(status_line_elements, char *, cp, tor_free(cp));
  279. smartlist_free(status_line_elements);
  280. }
  281. if (date) {
  282. *date = 0;
  283. SMARTLIST_FOREACH(parsed_headers, const char *, s,
  284. if (!strcmpstart(s, "Date: ")) {
  285. strlcpy(datestr, s+6, sizeof(datestr));
  286. /* This will do nothing on failure, so we don't need to check
  287. the result. We shouldn't warn, since there are many other valid
  288. date formats besides the one we use. */
  289. parse_rfc1123_time(datestr, date);
  290. break;
  291. });
  292. }
  293. if (compression) {
  294. const char *enc = NULL;
  295. SMARTLIST_FOREACH(parsed_headers, const char *, s,
  296. if (!strcmpstart(s, "Content-Encoding: ")) {
  297. enc = s+18; break;
  298. });
  299. if (enc == NULL)
  300. *compression = NO_METHOD;
  301. else {
  302. *compression = compression_method_get_by_name(enc);
  303. if (*compression == UNKNOWN_METHOD)
  304. log_info(LD_HTTP, "Unrecognized content encoding: %s. Trying to deal.",
  305. escaped(enc));
  306. }
  307. }
  308. SMARTLIST_FOREACH(parsed_headers, char *, s, tor_free(s));
  309. smartlist_free(parsed_headers);
  310. return 0;
  311. }
  312. /** If any directory object is arriving, and it's over 10MB large, we're
  313. * getting DoS'd. (As of 0.1.2.x, raw directories are about 1MB, and we never
  314. * ask for more than 96 router descriptors at a time.)
  315. */
  316. #define MAX_DIRECTORY_OBJECT_SIZE (10*(1<<20))
  317. #define MAX_VOTE_DL_SIZE (MAX_DIRECTORY_OBJECT_SIZE * 5)
  318. /** Read handler for directory connections. (That's connections <em>to</em>
  319. * directory servers and connections <em>at</em> directory servers.)
  320. */
  321. int
  322. connection_dir_process_inbuf(dir_connection_t *conn)
  323. {
  324. size_t max_size;
  325. tor_assert(conn);
  326. tor_assert(conn->base_.type == CONN_TYPE_DIR);
  327. /* Directory clients write, then read data until they receive EOF;
  328. * directory servers read data until they get an HTTP command, then
  329. * write their response (when it's finished flushing, they mark for
  330. * close).
  331. */
  332. /* If we're on the dirserver side, look for a command. */
  333. if (conn->base_.state == DIR_CONN_STATE_SERVER_COMMAND_WAIT) {
  334. if (directory_handle_command(conn) < 0) {
  335. connection_mark_for_close(TO_CONN(conn));
  336. return -1;
  337. }
  338. return 0;
  339. }
  340. max_size =
  341. (TO_CONN(conn)->purpose == DIR_PURPOSE_FETCH_STATUS_VOTE) ?
  342. MAX_VOTE_DL_SIZE : MAX_DIRECTORY_OBJECT_SIZE;
  343. if (connection_get_inbuf_len(TO_CONN(conn)) > max_size) {
  344. log_warn(LD_HTTP,
  345. "Too much data received from directory connection (%s): "
  346. "denial of service attempt, or you need to upgrade?",
  347. conn->base_.address);
  348. connection_mark_for_close(TO_CONN(conn));
  349. return -1;
  350. }
  351. if (!conn->base_.inbuf_reached_eof)
  352. log_debug(LD_HTTP,"Got data, not eof. Leaving on inbuf.");
  353. return 0;
  354. }
  355. /** Called when we're about to finally unlink and free a directory connection:
  356. * perform necessary accounting and cleanup */
  357. void
  358. connection_dir_about_to_close(dir_connection_t *dir_conn)
  359. {
  360. connection_t *conn = TO_CONN(dir_conn);
  361. hs_pirprocess_dealloc_reqid(dir_conn);
  362. if (conn->state < DIR_CONN_STATE_CLIENT_FINISHED) {
  363. /* It's a directory connection and connecting or fetching
  364. * failed: forget about this router, and maybe try again. */
  365. connection_dir_client_request_failed(dir_conn);
  366. }
  367. connection_dir_client_refetch_hsdesc_if_needed(dir_conn);
  368. }
  369. /** Write handler for directory connections; called when all data has
  370. * been flushed. Close the connection or wait for a response as
  371. * appropriate.
  372. */
  373. int
  374. connection_dir_finished_flushing(dir_connection_t *conn)
  375. {
  376. tor_assert(conn);
  377. tor_assert(conn->base_.type == CONN_TYPE_DIR);
  378. if (conn->base_.marked_for_close)
  379. return 0;
  380. /* Note that we have finished writing the directory response. For direct
  381. * connections this means we're done; for tunneled connections it's only
  382. * an intermediate step. */
  383. if (conn->dirreq_id)
  384. geoip_change_dirreq_state(conn->dirreq_id, DIRREQ_TUNNELED,
  385. DIRREQ_FLUSHING_DIR_CONN_FINISHED);
  386. else
  387. geoip_change_dirreq_state(TO_CONN(conn)->global_identifier,
  388. DIRREQ_DIRECT,
  389. DIRREQ_FLUSHING_DIR_CONN_FINISHED);
  390. switch (conn->base_.state) {
  391. case DIR_CONN_STATE_CONNECTING:
  392. case DIR_CONN_STATE_CLIENT_SENDING:
  393. log_debug(LD_DIR,"client finished sending command.");
  394. conn->base_.state = DIR_CONN_STATE_CLIENT_READING;
  395. return 0;
  396. case DIR_CONN_STATE_SERVER_WRITING:
  397. if (conn->spool) {
  398. log_warn(LD_BUG, "Emptied a dirserv buffer, but it's still spooling!");
  399. connection_mark_for_close(TO_CONN(conn));
  400. } else {
  401. log_debug(LD_DIRSERV, "Finished writing server response. Closing.");
  402. connection_mark_for_close(TO_CONN(conn));
  403. }
  404. return 0;
  405. default:
  406. log_warn(LD_BUG,"called in unexpected state %d.",
  407. conn->base_.state);
  408. tor_fragile_assert();
  409. return -1;
  410. }
  411. return 0;
  412. }
  413. /** Connected handler for directory connections: begin sending data to the
  414. * server, and return 0.
  415. * Only used when connections don't immediately connect. */
  416. int
  417. connection_dir_finished_connecting(dir_connection_t *conn)
  418. {
  419. tor_assert(conn);
  420. tor_assert(conn->base_.type == CONN_TYPE_DIR);
  421. tor_assert(conn->base_.state == DIR_CONN_STATE_CONNECTING);
  422. log_debug(LD_HTTP,"Dir connection to router %s:%u established.",
  423. conn->base_.address,conn->base_.port);
  424. /* start flushing conn */
  425. conn->base_.state = DIR_CONN_STATE_CLIENT_SENDING;
  426. return 0;
  427. }
  428. /** Helper. Compare two fp_pair_t objects, and return negative, 0, or
  429. * positive as appropriate. */
  430. static int
  431. compare_pairs_(const void **a, const void **b)
  432. {
  433. const fp_pair_t *fp1 = *a, *fp2 = *b;
  434. int r;
  435. if ((r = fast_memcmp(fp1->first, fp2->first, DIGEST_LEN)))
  436. return r;
  437. else
  438. return fast_memcmp(fp1->second, fp2->second, DIGEST_LEN);
  439. }
  440. /** Divide a string <b>res</b> of the form FP1-FP2+FP3-FP4...[.z], where each
  441. * FP is a hex-encoded fingerprint, into a sequence of distinct sorted
  442. * fp_pair_t. Skip malformed pairs. On success, return 0 and add those
  443. * fp_pair_t into <b>pairs_out</b>. On failure, return -1. */
  444. int
  445. dir_split_resource_into_fingerprint_pairs(const char *res,
  446. smartlist_t *pairs_out)
  447. {
  448. smartlist_t *pairs_tmp = smartlist_new();
  449. smartlist_t *pairs_result = smartlist_new();
  450. smartlist_split_string(pairs_tmp, res, "+", 0, 0);
  451. if (smartlist_len(pairs_tmp)) {
  452. char *last = smartlist_get(pairs_tmp,smartlist_len(pairs_tmp)-1);
  453. size_t last_len = strlen(last);
  454. if (last_len > 2 && !strcmp(last+last_len-2, ".z")) {
  455. last[last_len-2] = '\0';
  456. }
  457. }
  458. SMARTLIST_FOREACH_BEGIN(pairs_tmp, char *, cp) {
  459. if (strlen(cp) != HEX_DIGEST_LEN*2+1) {
  460. log_info(LD_DIR,
  461. "Skipping digest pair %s with non-standard length.", escaped(cp));
  462. } else if (cp[HEX_DIGEST_LEN] != '-') {
  463. log_info(LD_DIR,
  464. "Skipping digest pair %s with missing dash.", escaped(cp));
  465. } else {
  466. fp_pair_t pair;
  467. if (base16_decode(pair.first, DIGEST_LEN,
  468. cp, HEX_DIGEST_LEN) != DIGEST_LEN ||
  469. base16_decode(pair.second,DIGEST_LEN,
  470. cp+HEX_DIGEST_LEN+1, HEX_DIGEST_LEN) != DIGEST_LEN) {
  471. log_info(LD_DIR, "Skipping non-decodable digest pair %s", escaped(cp));
  472. } else {
  473. smartlist_add(pairs_result, tor_memdup(&pair, sizeof(pair)));
  474. }
  475. }
  476. tor_free(cp);
  477. } SMARTLIST_FOREACH_END(cp);
  478. smartlist_free(pairs_tmp);
  479. /* Uniq-and-sort */
  480. smartlist_sort(pairs_result, compare_pairs_);
  481. smartlist_uniq(pairs_result, compare_pairs_, tor_free_);
  482. smartlist_add_all(pairs_out, pairs_result);
  483. smartlist_free(pairs_result);
  484. return 0;
  485. }
  486. /** Given a directory <b>resource</b> request, containing zero
  487. * or more strings separated by plus signs, followed optionally by ".z", store
  488. * the strings, in order, into <b>fp_out</b>. If <b>compressed_out</b> is
  489. * non-NULL, set it to 1 if the resource ends in ".z", else set it to 0.
  490. *
  491. * If (flags & DSR_HEX), then delete all elements that aren't hex digests, and
  492. * decode the rest. If (flags & DSR_BASE64), then use "-" rather than "+" as
  493. * a separator, delete all the elements that aren't base64-encoded digests,
  494. * and decode the rest. If (flags & DSR_DIGEST256), these digests should be
  495. * 256 bits long; else they should be 160.
  496. *
  497. * If (flags & DSR_SORT_UNIQ), then sort the list and remove all duplicates.
  498. */
  499. int
  500. dir_split_resource_into_fingerprints(const char *resource,
  501. smartlist_t *fp_out, int *compressed_out,
  502. int flags)
  503. {
  504. const int decode_hex = flags & DSR_HEX;
  505. const int decode_base64 = flags & DSR_BASE64;
  506. const int digests_are_256 = flags & DSR_DIGEST256;
  507. const int sort_uniq = flags & DSR_SORT_UNIQ;
  508. const int digest_len = digests_are_256 ? DIGEST256_LEN : DIGEST_LEN;
  509. const int hex_digest_len = digests_are_256 ?
  510. HEX_DIGEST256_LEN : HEX_DIGEST_LEN;
  511. const int base64_digest_len = digests_are_256 ?
  512. BASE64_DIGEST256_LEN : BASE64_DIGEST_LEN;
  513. smartlist_t *fp_tmp = smartlist_new();
  514. tor_assert(!(decode_hex && decode_base64));
  515. tor_assert(fp_out);
  516. smartlist_split_string(fp_tmp, resource, decode_base64?"-":"+", 0, 0);
  517. if (compressed_out)
  518. *compressed_out = 0;
  519. if (smartlist_len(fp_tmp)) {
  520. char *last = smartlist_get(fp_tmp,smartlist_len(fp_tmp)-1);
  521. size_t last_len = strlen(last);
  522. if (last_len > 2 && !strcmp(last+last_len-2, ".z")) {
  523. last[last_len-2] = '\0';
  524. if (compressed_out)
  525. *compressed_out = 1;
  526. }
  527. }
  528. if (decode_hex || decode_base64) {
  529. const size_t encoded_len = decode_hex ? hex_digest_len : base64_digest_len;
  530. int i;
  531. char *cp, *d = NULL;
  532. for (i = 0; i < smartlist_len(fp_tmp); ++i) {
  533. cp = smartlist_get(fp_tmp, i);
  534. if (strlen(cp) != encoded_len) {
  535. log_info(LD_DIR,
  536. "Skipping digest %s with non-standard length.", escaped(cp));
  537. smartlist_del_keeporder(fp_tmp, i--);
  538. goto again;
  539. }
  540. d = tor_malloc_zero(digest_len);
  541. if (decode_hex ?
  542. (base16_decode(d, digest_len, cp, hex_digest_len) != digest_len) :
  543. (base64_decode(d, digest_len, cp, base64_digest_len)
  544. != digest_len)) {
  545. log_info(LD_DIR, "Skipping non-decodable digest %s", escaped(cp));
  546. smartlist_del_keeporder(fp_tmp, i--);
  547. goto again;
  548. }
  549. smartlist_set(fp_tmp, i, d);
  550. d = NULL;
  551. again:
  552. tor_free(cp);
  553. tor_free(d);
  554. }
  555. }
  556. if (sort_uniq) {
  557. if (decode_hex || decode_base64) {
  558. if (digests_are_256) {
  559. smartlist_sort_digests256(fp_tmp);
  560. smartlist_uniq_digests256(fp_tmp);
  561. } else {
  562. smartlist_sort_digests(fp_tmp);
  563. smartlist_uniq_digests(fp_tmp);
  564. }
  565. } else {
  566. smartlist_sort_strings(fp_tmp);
  567. smartlist_uniq_strings(fp_tmp);
  568. }
  569. }
  570. smartlist_add_all(fp_out, fp_tmp);
  571. smartlist_free(fp_tmp);
  572. return 0;
  573. }
  574. /** As dir_split_resource_into_fingerprints, but instead fills
  575. * <b>spool_out</b> with a list of spoolable_resource_t for the resource
  576. * identified through <b>source</b>. */
  577. int
  578. dir_split_resource_into_spoolable(const char *resource,
  579. dir_spool_source_t source,
  580. smartlist_t *spool_out,
  581. int *compressed_out,
  582. int flags)
  583. {
  584. smartlist_t *fingerprints = smartlist_new();
  585. tor_assert(flags & (DSR_HEX|DSR_BASE64));
  586. const size_t digest_len =
  587. (flags & DSR_DIGEST256) ? DIGEST256_LEN : DIGEST_LEN;
  588. int r = dir_split_resource_into_fingerprints(resource, fingerprints,
  589. compressed_out, flags);
  590. /* This is not a very efficient implementation XXXX */
  591. SMARTLIST_FOREACH_BEGIN(fingerprints, uint8_t *, digest) {
  592. spooled_resource_t *spooled =
  593. spooled_resource_new(source, digest, digest_len);
  594. if (spooled)
  595. smartlist_add(spool_out, spooled);
  596. tor_free(digest);
  597. } SMARTLIST_FOREACH_END(digest);
  598. smartlist_free(fingerprints);
  599. return r;
  600. }