directory.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651
  1. /* Copyright (c) 2001-2004, Roger Dingledine.
  2. * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
  3. * Copyright (c) 2007-2018, The Tor Project, Inc. */
  4. /* See LICENSE for licensing information */
  5. #include "core/or/or.h"
  6. #include "app/config/config.h"
  7. #include "core/mainloop/connection.h"
  8. #include "feature/dircache/dircache.h"
  9. #include "feature/dircache/dirserv.h"
  10. #include "feature/dirclient/dirclient.h"
  11. #include "feature/dircommon/directory.h"
  12. #include "feature/dircommon/fp_pair.h"
  13. #include "feature/stats/geoip_stats.h"
  14. #include "lib/compress/compress.h"
  15. #include "feature/dircommon/dir_connection_st.h"
  16. #include "feature/nodelist/routerinfo_st.h"
  17. /**
  18. * \file directory.c
  19. * \brief Code to send and fetch information from directory authorities and
  20. * caches via HTTP.
  21. *
  22. * Directory caches and authorities use dirserv.c to generate the results of a
  23. * query and stream them to the connection; clients use routerparse.c to parse
  24. * them.
  25. *
  26. * Every directory request has a dir_connection_t on the client side and on
  27. * the server side. In most cases, the dir_connection_t object is a linked
  28. * connection, tunneled through an edge_connection_t so that it can be a
  29. * stream on the Tor network. The only non-tunneled connections are those
  30. * that are used to upload material (descriptors and votes) to authorities.
  31. * Among tunneled connections, some use one-hop circuits, and others use
  32. * multi-hop circuits for anonymity.
  33. *
  34. * Directory requests are launched by calling
  35. * directory_initiate_request(). This
  36. * launch the connection, will construct an HTTP request with
  37. * directory_send_command(), send the and wait for a response. The client
  38. * later handles the response with connection_dir_client_reached_eof(),
  39. * which passes the information received to another part of Tor.
  40. *
  41. * On the server side, requests are read in directory_handle_command(),
  42. * which dispatches first on the request type (GET or POST), and then on
  43. * the URL requested. GET requests are processed with a table-based
  44. * dispatcher in url_table[]. The process of handling larger GET requests
  45. * is complicated because we need to avoid allocating a copy of all the
  46. * data to be sent to the client in one huge buffer. Instead, we spool the
  47. * data into the buffer using logic in connection_dirserv_flushed_some() in
  48. * dirserv.c. (TODO: If we extended buf.c to have a zero-copy
  49. * reference-based buffer type, we could remove most of that code, at the
  50. * cost of a bit more reference counting.)
  51. **/
  52. /* In-points to directory.c:
  53. *
  54. * - directory_post_to_dirservers(), called from
  55. * router_upload_dir_desc_to_dirservers() in router.c
  56. * upload_service_descriptor() in rendservice.c
  57. * - directory_get_from_dirserver(), called from
  58. * rend_client_refetch_renddesc() in rendclient.c
  59. * run_scheduled_events() in main.c
  60. * do_hup() in main.c
  61. * - connection_dir_process_inbuf(), called from
  62. * connection_process_inbuf() in connection.c
  63. * - connection_dir_finished_flushing(), called from
  64. * connection_finished_flushing() in connection.c
  65. * - connection_dir_finished_connecting(), called from
  66. * connection_finished_connecting() in connection.c
  67. */
  68. /** Convert a connection_t* to a dir_connection_t*; assert if the cast is
  69. * invalid. */
  70. dir_connection_t *
  71. TO_DIR_CONN(connection_t *c)
  72. {
  73. tor_assert(c->magic == DIR_CONNECTION_MAGIC);
  74. return DOWNCAST(dir_connection_t, c);
  75. }
  76. /** Return false if the directory purpose <b>dir_purpose</b>
  77. * does not require an anonymous (three-hop) connection.
  78. *
  79. * Return true 1) by default, 2) if all directory actions have
  80. * specifically been configured to be over an anonymous connection,
  81. * or 3) if the router is a bridge */
  82. int
  83. purpose_needs_anonymity(uint8_t dir_purpose, uint8_t router_purpose,
  84. const char *resource)
  85. {
  86. if (get_options()->AllDirActionsPrivate)
  87. return 1;
  88. if (router_purpose == ROUTER_PURPOSE_BRIDGE) {
  89. if (dir_purpose == DIR_PURPOSE_FETCH_SERVERDESC
  90. && resource && !strcmp(resource, "authority.z")) {
  91. /* We are asking a bridge for its own descriptor. That doesn't need
  92. anonymity. */
  93. return 0;
  94. }
  95. /* Assume all other bridge stuff needs anonymity. */
  96. return 1; /* if no circuits yet, this might break bootstrapping, but it's
  97. * needed to be safe. */
  98. }
  99. switch (dir_purpose)
  100. {
  101. case DIR_PURPOSE_UPLOAD_DIR:
  102. case DIR_PURPOSE_UPLOAD_VOTE:
  103. case DIR_PURPOSE_UPLOAD_SIGNATURES:
  104. case DIR_PURPOSE_FETCH_STATUS_VOTE:
  105. case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES:
  106. case DIR_PURPOSE_FETCH_CONSENSUS:
  107. case DIR_PURPOSE_FETCH_CERTIFICATE:
  108. case DIR_PURPOSE_FETCH_SERVERDESC:
  109. case DIR_PURPOSE_FETCH_EXTRAINFO:
  110. case DIR_PURPOSE_FETCH_MICRODESC:
  111. return 0;
  112. case DIR_PURPOSE_HAS_FETCHED_HSDESC:
  113. case DIR_PURPOSE_HAS_FETCHED_RENDDESC_V2:
  114. case DIR_PURPOSE_UPLOAD_RENDDESC_V2:
  115. case DIR_PURPOSE_FETCH_RENDDESC_V2:
  116. case DIR_PURPOSE_FETCH_HSDESC:
  117. case DIR_PURPOSE_UPLOAD_HSDESC:
  118. return 1;
  119. case DIR_PURPOSE_SERVER:
  120. default:
  121. log_warn(LD_BUG, "Called with dir_purpose=%d, router_purpose=%d",
  122. dir_purpose, router_purpose);
  123. tor_assert_nonfatal_unreached();
  124. return 1; /* Assume it needs anonymity; better safe than sorry. */
  125. }
  126. }
  127. /** Return a newly allocated string describing <b>auth</b>. Only describes
  128. * authority features. */
  129. char *
  130. authdir_type_to_string(dirinfo_type_t auth)
  131. {
  132. char *result;
  133. smartlist_t *lst = smartlist_new();
  134. if (auth & V3_DIRINFO)
  135. smartlist_add(lst, (void*)"V3");
  136. if (auth & BRIDGE_DIRINFO)
  137. smartlist_add(lst, (void*)"Bridge");
  138. if (smartlist_len(lst)) {
  139. result = smartlist_join_strings(lst, ", ", 0, NULL);
  140. } else {
  141. result = tor_strdup("[Not an authority]");
  142. }
  143. smartlist_free(lst);
  144. return result;
  145. }
  146. /** Return true iff anything we say on <b>conn</b> is being encrypted before
  147. * we send it to the client/server. */
  148. int
  149. connection_dir_is_encrypted(const dir_connection_t *conn)
  150. {
  151. /* Right now it's sufficient to see if conn is or has been linked, since
  152. * the only thing it could be linked to is an edge connection on a
  153. * circuit, and the only way it could have been unlinked is at the edge
  154. * connection getting closed.
  155. */
  156. return TO_CONN(conn)->linked;
  157. }
  158. /** Parse an HTTP request line at the start of a headers string. On failure,
  159. * return -1. On success, set *<b>command_out</b> to a copy of the HTTP
  160. * command ("get", "post", etc), set *<b>url_out</b> to a copy of the URL, and
  161. * return 0. */
  162. int
  163. parse_http_command(const char *headers, char **command_out, char **url_out)
  164. {
  165. const char *command, *end_of_command;
  166. char *s, *start, *tmp;
  167. s = (char *)eat_whitespace_no_nl(headers);
  168. if (!*s) return -1;
  169. command = s;
  170. s = (char *)find_whitespace(s); /* get past GET/POST */
  171. if (!*s) return -1;
  172. end_of_command = s;
  173. s = (char *)eat_whitespace_no_nl(s);
  174. if (!*s) return -1;
  175. start = s; /* this is the URL, assuming it's valid */
  176. s = (char *)find_whitespace(start);
  177. if (!*s) return -1;
  178. /* tolerate the http[s] proxy style of putting the hostname in the url */
  179. if (s-start >= 4 && !strcmpstart(start,"http")) {
  180. tmp = start + 4;
  181. if (*tmp == 's')
  182. tmp++;
  183. if (s-tmp >= 3 && !strcmpstart(tmp,"://")) {
  184. tmp = strchr(tmp+3, '/');
  185. if (tmp && tmp < s) {
  186. log_debug(LD_DIR,"Skipping over 'http[s]://hostname/' string");
  187. start = tmp;
  188. }
  189. }
  190. }
  191. /* Check if the header is well formed (next sequence
  192. * should be HTTP/1.X\r\n). Assumes we're supporting 1.0? */
  193. {
  194. unsigned minor_ver;
  195. char ch;
  196. char *e = (char *)eat_whitespace_no_nl(s);
  197. if (2 != tor_sscanf(e, "HTTP/1.%u%c", &minor_ver, &ch)) {
  198. return -1;
  199. }
  200. if (ch != '\r')
  201. return -1;
  202. }
  203. *url_out = tor_memdup_nulterm(start, s-start);
  204. *command_out = tor_memdup_nulterm(command, end_of_command - command);
  205. return 0;
  206. }
  207. /** Return a copy of the first HTTP header in <b>headers</b> whose key is
  208. * <b>which</b>. The key should be given with a terminating colon and space;
  209. * this function copies everything after, up to but not including the
  210. * following \\r\\n. */
  211. char *
  212. http_get_header(const char *headers, const char *which)
  213. {
  214. const char *cp = headers;
  215. while (cp) {
  216. if (!strcasecmpstart(cp, which)) {
  217. char *eos;
  218. cp += strlen(which);
  219. if ((eos = strchr(cp,'\r')))
  220. return tor_strndup(cp, eos-cp);
  221. else
  222. return tor_strdup(cp);
  223. }
  224. cp = strchr(cp, '\n');
  225. if (cp)
  226. ++cp;
  227. }
  228. return NULL;
  229. }
  230. /** Parse an HTTP response string <b>headers</b> of the form
  231. * \verbatim
  232. * "HTTP/1.\%d \%d\%s\r\n...".
  233. * \endverbatim
  234. *
  235. * If it's well-formed, assign the status code to *<b>code</b> and
  236. * return 0. Otherwise, return -1.
  237. *
  238. * On success: If <b>date</b> is provided, set *date to the Date
  239. * header in the http headers, or 0 if no such header is found. If
  240. * <b>compression</b> is provided, set *<b>compression</b> to the
  241. * compression method given in the Content-Encoding header, or 0 if no
  242. * such header is found, or -1 if the value of the header is not
  243. * recognized. If <b>reason</b> is provided, strdup the reason string
  244. * into it.
  245. */
  246. int
  247. parse_http_response(const char *headers, int *code, time_t *date,
  248. compress_method_t *compression, char **reason)
  249. {
  250. unsigned n1, n2;
  251. char datestr[RFC1123_TIME_LEN+1];
  252. smartlist_t *parsed_headers;
  253. tor_assert(headers);
  254. tor_assert(code);
  255. while (TOR_ISSPACE(*headers)) headers++; /* tolerate leading whitespace */
  256. if (tor_sscanf(headers, "HTTP/1.%u %u", &n1, &n2) < 2 ||
  257. (n1 != 0 && n1 != 1) ||
  258. (n2 < 100 || n2 >= 600)) {
  259. log_warn(LD_HTTP,"Failed to parse header %s",escaped(headers));
  260. return -1;
  261. }
  262. *code = n2;
  263. parsed_headers = smartlist_new();
  264. smartlist_split_string(parsed_headers, headers, "\n",
  265. SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, -1);
  266. if (reason) {
  267. smartlist_t *status_line_elements = smartlist_new();
  268. tor_assert(smartlist_len(parsed_headers));
  269. smartlist_split_string(status_line_elements,
  270. smartlist_get(parsed_headers, 0),
  271. " ", SPLIT_SKIP_SPACE|SPLIT_IGNORE_BLANK, 3);
  272. tor_assert(smartlist_len(status_line_elements) <= 3);
  273. if (smartlist_len(status_line_elements) == 3) {
  274. *reason = smartlist_get(status_line_elements, 2);
  275. smartlist_set(status_line_elements, 2, NULL); /* Prevent free */
  276. }
  277. SMARTLIST_FOREACH(status_line_elements, char *, cp, tor_free(cp));
  278. smartlist_free(status_line_elements);
  279. }
  280. if (date) {
  281. *date = 0;
  282. SMARTLIST_FOREACH(parsed_headers, const char *, s,
  283. if (!strcmpstart(s, "Date: ")) {
  284. strlcpy(datestr, s+6, sizeof(datestr));
  285. /* This will do nothing on failure, so we don't need to check
  286. the result. We shouldn't warn, since there are many other valid
  287. date formats besides the one we use. */
  288. parse_rfc1123_time(datestr, date);
  289. break;
  290. });
  291. }
  292. if (compression) {
  293. const char *enc = NULL;
  294. SMARTLIST_FOREACH(parsed_headers, const char *, s,
  295. if (!strcmpstart(s, "Content-Encoding: ")) {
  296. enc = s+18; break;
  297. });
  298. if (enc == NULL)
  299. *compression = NO_METHOD;
  300. else {
  301. *compression = compression_method_get_by_name(enc);
  302. if (*compression == UNKNOWN_METHOD)
  303. log_info(LD_HTTP, "Unrecognized content encoding: %s. Trying to deal.",
  304. escaped(enc));
  305. }
  306. }
  307. SMARTLIST_FOREACH(parsed_headers, char *, s, tor_free(s));
  308. smartlist_free(parsed_headers);
  309. return 0;
  310. }
  311. /** If any directory object is arriving, and it's over 10MB large, we're
  312. * getting DoS'd. (As of 0.1.2.x, raw directories are about 1MB, and we never
  313. * ask for more than 96 router descriptors at a time.)
  314. */
  315. #define MAX_DIRECTORY_OBJECT_SIZE (10*(1<<20))
  316. #define MAX_VOTE_DL_SIZE (MAX_DIRECTORY_OBJECT_SIZE * 5)
  317. /** Read handler for directory connections. (That's connections <em>to</em>
  318. * directory servers and connections <em>at</em> directory servers.)
  319. */
  320. int
  321. connection_dir_process_inbuf(dir_connection_t *conn)
  322. {
  323. size_t max_size;
  324. tor_assert(conn);
  325. tor_assert(conn->base_.type == CONN_TYPE_DIR);
  326. /* Directory clients write, then read data until they receive EOF;
  327. * directory servers read data until they get an HTTP command, then
  328. * write their response (when it's finished flushing, they mark for
  329. * close).
  330. */
  331. /* If we're on the dirserver side, look for a command. */
  332. if (conn->base_.state == DIR_CONN_STATE_SERVER_COMMAND_WAIT) {
  333. if (directory_handle_command(conn) < 0) {
  334. connection_mark_for_close(TO_CONN(conn));
  335. return -1;
  336. }
  337. return 0;
  338. }
  339. max_size =
  340. (TO_CONN(conn)->purpose == DIR_PURPOSE_FETCH_STATUS_VOTE) ?
  341. MAX_VOTE_DL_SIZE : MAX_DIRECTORY_OBJECT_SIZE;
  342. if (connection_get_inbuf_len(TO_CONN(conn)) > max_size) {
  343. log_warn(LD_HTTP,
  344. "Too much data received from directory connection (%s): "
  345. "denial of service attempt, or you need to upgrade?",
  346. conn->base_.address);
  347. connection_mark_for_close(TO_CONN(conn));
  348. return -1;
  349. }
  350. if (!conn->base_.inbuf_reached_eof)
  351. log_debug(LD_HTTP,"Got data, not eof. Leaving on inbuf.");
  352. return 0;
  353. }
  354. /** Called when we're about to finally unlink and free a directory connection:
  355. * perform necessary accounting and cleanup */
  356. void
  357. connection_dir_about_to_close(dir_connection_t *dir_conn)
  358. {
  359. connection_t *conn = TO_CONN(dir_conn);
  360. if (conn->state < DIR_CONN_STATE_CLIENT_FINISHED) {
  361. /* It's a directory connection and connecting or fetching
  362. * failed: forget about this router, and maybe try again. */
  363. connection_dir_client_request_failed(dir_conn);
  364. }
  365. connection_dir_client_refetch_hsdesc_if_needed(dir_conn);
  366. }
  367. /** Write handler for directory connections; called when all data has
  368. * been flushed. Close the connection or wait for a response as
  369. * appropriate.
  370. */
  371. int
  372. connection_dir_finished_flushing(dir_connection_t *conn)
  373. {
  374. tor_assert(conn);
  375. tor_assert(conn->base_.type == CONN_TYPE_DIR);
  376. if (conn->base_.marked_for_close)
  377. return 0;
  378. /* Note that we have finished writing the directory response. For direct
  379. * connections this means we're done; for tunneled connections it's only
  380. * an intermediate step. */
  381. if (conn->dirreq_id)
  382. geoip_change_dirreq_state(conn->dirreq_id, DIRREQ_TUNNELED,
  383. DIRREQ_FLUSHING_DIR_CONN_FINISHED);
  384. else
  385. geoip_change_dirreq_state(TO_CONN(conn)->global_identifier,
  386. DIRREQ_DIRECT,
  387. DIRREQ_FLUSHING_DIR_CONN_FINISHED);
  388. switch (conn->base_.state) {
  389. case DIR_CONN_STATE_CONNECTING:
  390. case DIR_CONN_STATE_CLIENT_SENDING:
  391. log_debug(LD_DIR,"client finished sending command.");
  392. conn->base_.state = DIR_CONN_STATE_CLIENT_READING;
  393. return 0;
  394. case DIR_CONN_STATE_SERVER_WRITING:
  395. if (conn->spool) {
  396. log_warn(LD_BUG, "Emptied a dirserv buffer, but it's still spooling!");
  397. connection_mark_for_close(TO_CONN(conn));
  398. } else {
  399. log_debug(LD_DIRSERV, "Finished writing server response. Closing.");
  400. connection_mark_for_close(TO_CONN(conn));
  401. }
  402. return 0;
  403. default:
  404. log_warn(LD_BUG,"called in unexpected state %d.",
  405. conn->base_.state);
  406. tor_fragile_assert();
  407. return -1;
  408. }
  409. return 0;
  410. }
  411. /** Connected handler for directory connections: begin sending data to the
  412. * server, and return 0.
  413. * Only used when connections don't immediately connect. */
  414. int
  415. connection_dir_finished_connecting(dir_connection_t *conn)
  416. {
  417. tor_assert(conn);
  418. tor_assert(conn->base_.type == CONN_TYPE_DIR);
  419. tor_assert(conn->base_.state == DIR_CONN_STATE_CONNECTING);
  420. log_debug(LD_HTTP,"Dir connection to router %s:%u established.",
  421. conn->base_.address,conn->base_.port);
  422. /* start flushing conn */
  423. conn->base_.state = DIR_CONN_STATE_CLIENT_SENDING;
  424. return 0;
  425. }
  426. /** Helper. Compare two fp_pair_t objects, and return negative, 0, or
  427. * positive as appropriate. */
  428. static int
  429. compare_pairs_(const void **a, const void **b)
  430. {
  431. const fp_pair_t *fp1 = *a, *fp2 = *b;
  432. int r;
  433. if ((r = fast_memcmp(fp1->first, fp2->first, DIGEST_LEN)))
  434. return r;
  435. else
  436. return fast_memcmp(fp1->second, fp2->second, DIGEST_LEN);
  437. }
  438. /** Divide a string <b>res</b> of the form FP1-FP2+FP3-FP4...[.z], where each
  439. * FP is a hex-encoded fingerprint, into a sequence of distinct sorted
  440. * fp_pair_t. Skip malformed pairs. On success, return 0 and add those
  441. * fp_pair_t into <b>pairs_out</b>. On failure, return -1. */
  442. int
  443. dir_split_resource_into_fingerprint_pairs(const char *res,
  444. smartlist_t *pairs_out)
  445. {
  446. smartlist_t *pairs_tmp = smartlist_new();
  447. smartlist_t *pairs_result = smartlist_new();
  448. smartlist_split_string(pairs_tmp, res, "+", 0, 0);
  449. if (smartlist_len(pairs_tmp)) {
  450. char *last = smartlist_get(pairs_tmp,smartlist_len(pairs_tmp)-1);
  451. size_t last_len = strlen(last);
  452. if (last_len > 2 && !strcmp(last+last_len-2, ".z")) {
  453. last[last_len-2] = '\0';
  454. }
  455. }
  456. SMARTLIST_FOREACH_BEGIN(pairs_tmp, char *, cp) {
  457. if (strlen(cp) != HEX_DIGEST_LEN*2+1) {
  458. log_info(LD_DIR,
  459. "Skipping digest pair %s with non-standard length.", escaped(cp));
  460. } else if (cp[HEX_DIGEST_LEN] != '-') {
  461. log_info(LD_DIR,
  462. "Skipping digest pair %s with missing dash.", escaped(cp));
  463. } else {
  464. fp_pair_t pair;
  465. if (base16_decode(pair.first, DIGEST_LEN,
  466. cp, HEX_DIGEST_LEN) != DIGEST_LEN ||
  467. base16_decode(pair.second,DIGEST_LEN,
  468. cp+HEX_DIGEST_LEN+1, HEX_DIGEST_LEN) != DIGEST_LEN) {
  469. log_info(LD_DIR, "Skipping non-decodable digest pair %s", escaped(cp));
  470. } else {
  471. smartlist_add(pairs_result, tor_memdup(&pair, sizeof(pair)));
  472. }
  473. }
  474. tor_free(cp);
  475. } SMARTLIST_FOREACH_END(cp);
  476. smartlist_free(pairs_tmp);
  477. /* Uniq-and-sort */
  478. smartlist_sort(pairs_result, compare_pairs_);
  479. smartlist_uniq(pairs_result, compare_pairs_, tor_free_);
  480. smartlist_add_all(pairs_out, pairs_result);
  481. smartlist_free(pairs_result);
  482. return 0;
  483. }
  484. /** Given a directory <b>resource</b> request, containing zero
  485. * or more strings separated by plus signs, followed optionally by ".z", store
  486. * the strings, in order, into <b>fp_out</b>. If <b>compressed_out</b> is
  487. * non-NULL, set it to 1 if the resource ends in ".z", else set it to 0.
  488. *
  489. * If (flags & DSR_HEX), then delete all elements that aren't hex digests, and
  490. * decode the rest. If (flags & DSR_BASE64), then use "-" rather than "+" as
  491. * a separator, delete all the elements that aren't base64-encoded digests,
  492. * and decode the rest. If (flags & DSR_DIGEST256), these digests should be
  493. * 256 bits long; else they should be 160.
  494. *
  495. * If (flags & DSR_SORT_UNIQ), then sort the list and remove all duplicates.
  496. */
  497. int
  498. dir_split_resource_into_fingerprints(const char *resource,
  499. smartlist_t *fp_out, int *compressed_out,
  500. int flags)
  501. {
  502. const int decode_hex = flags & DSR_HEX;
  503. const int decode_base64 = flags & DSR_BASE64;
  504. const int digests_are_256 = flags & DSR_DIGEST256;
  505. const int sort_uniq = flags & DSR_SORT_UNIQ;
  506. const int digest_len = digests_are_256 ? DIGEST256_LEN : DIGEST_LEN;
  507. const int hex_digest_len = digests_are_256 ?
  508. HEX_DIGEST256_LEN : HEX_DIGEST_LEN;
  509. const int base64_digest_len = digests_are_256 ?
  510. BASE64_DIGEST256_LEN : BASE64_DIGEST_LEN;
  511. smartlist_t *fp_tmp = smartlist_new();
  512. tor_assert(!(decode_hex && decode_base64));
  513. tor_assert(fp_out);
  514. smartlist_split_string(fp_tmp, resource, decode_base64?"-":"+", 0, 0);
  515. if (compressed_out)
  516. *compressed_out = 0;
  517. if (smartlist_len(fp_tmp)) {
  518. char *last = smartlist_get(fp_tmp,smartlist_len(fp_tmp)-1);
  519. size_t last_len = strlen(last);
  520. if (last_len > 2 && !strcmp(last+last_len-2, ".z")) {
  521. last[last_len-2] = '\0';
  522. if (compressed_out)
  523. *compressed_out = 1;
  524. }
  525. }
  526. if (decode_hex || decode_base64) {
  527. const size_t encoded_len = decode_hex ? hex_digest_len : base64_digest_len;
  528. int i;
  529. char *cp, *d = NULL;
  530. for (i = 0; i < smartlist_len(fp_tmp); ++i) {
  531. cp = smartlist_get(fp_tmp, i);
  532. if (strlen(cp) != encoded_len) {
  533. log_info(LD_DIR,
  534. "Skipping digest %s with non-standard length.", escaped(cp));
  535. smartlist_del_keeporder(fp_tmp, i--);
  536. goto again;
  537. }
  538. d = tor_malloc_zero(digest_len);
  539. if (decode_hex ?
  540. (base16_decode(d, digest_len, cp, hex_digest_len) != digest_len) :
  541. (base64_decode(d, digest_len, cp, base64_digest_len)
  542. != digest_len)) {
  543. log_info(LD_DIR, "Skipping non-decodable digest %s", escaped(cp));
  544. smartlist_del_keeporder(fp_tmp, i--);
  545. goto again;
  546. }
  547. smartlist_set(fp_tmp, i, d);
  548. d = NULL;
  549. again:
  550. tor_free(cp);
  551. tor_free(d);
  552. }
  553. }
  554. if (sort_uniq) {
  555. if (decode_hex || decode_base64) {
  556. if (digests_are_256) {
  557. smartlist_sort_digests256(fp_tmp);
  558. smartlist_uniq_digests256(fp_tmp);
  559. } else {
  560. smartlist_sort_digests(fp_tmp);
  561. smartlist_uniq_digests(fp_tmp);
  562. }
  563. } else {
  564. smartlist_sort_strings(fp_tmp);
  565. smartlist_uniq_strings(fp_tmp);
  566. }
  567. }
  568. smartlist_add_all(fp_out, fp_tmp);
  569. smartlist_free(fp_tmp);
  570. return 0;
  571. }
  572. /** As dir_split_resource_into_fingerprints, but instead fills
  573. * <b>spool_out</b> with a list of spoolable_resource_t for the resource
  574. * identified through <b>source</b>. */
  575. int
  576. dir_split_resource_into_spoolable(const char *resource,
  577. dir_spool_source_t source,
  578. smartlist_t *spool_out,
  579. int *compressed_out,
  580. int flags)
  581. {
  582. smartlist_t *fingerprints = smartlist_new();
  583. tor_assert(flags & (DSR_HEX|DSR_BASE64));
  584. const size_t digest_len =
  585. (flags & DSR_DIGEST256) ? DIGEST256_LEN : DIGEST_LEN;
  586. int r = dir_split_resource_into_fingerprints(resource, fingerprints,
  587. compressed_out, flags);
  588. /* This is not a very efficient implementation XXXX */
  589. SMARTLIST_FOREACH_BEGIN(fingerprints, uint8_t *, digest) {
  590. spooled_resource_t *spooled =
  591. spooled_resource_new(source, digest, digest_len);
  592. if (spooled)
  593. smartlist_add(spool_out, spooled);
  594. tor_free(digest);
  595. } SMARTLIST_FOREACH_END(digest);
  596. smartlist_free(fingerprints);
  597. return r;
  598. }