Browse Source

Merge branch 'parse_accept_encoding'

Nick Mathewson 7 years ago
parent
commit
10a4f9cd07
5 changed files with 122 additions and 13 deletions
  1. 20 0
      src/common/compress.c
  2. 3 2
      src/common/compress.h
  3. 51 11
      src/or/directory.c
  4. 1 0
      src/or/directory.h
  5. 47 0
      src/test/test_dir_handle_get.c

+ 20 - 0
src/common/compress.c

@@ -285,6 +285,26 @@ tor_compress_supports_method(compress_method_t method)
   }
 }
 
+/**
+ * Return a bitmask of the supported compression types, where 1<<m is
+ * set in the bitmask if and only if compression with method <b>m</b> is
+ * supported.
+ */
+unsigned
+tor_compress_get_supported_method_bitmask(void)
+{
+  static unsigned supported = 0;
+  if (supported == 0) {
+    compress_method_t m;
+    for (m = NO_METHOD; m <= UNKNOWN_METHOD; ++m) {
+      if (tor_compress_supports_method(m)) {
+        supported |= (1u << m);
+      }
+    }
+  }
+  return supported;
+}
+
 /** Table of compression method names.  These should have an "x-" prefix,
  * if they are not listed in the IANA content coding registry. */
 static const struct {

+ 3 - 2
src/common/compress.h

@@ -16,12 +16,12 @@
  * functions here. Call tor_compress_supports_method() to check if a given
  * compression schema is supported by Tor. */
 typedef enum {
-  NO_METHOD=0,
+  NO_METHOD=0, // This method must be first.
   GZIP_METHOD=1,
   ZLIB_METHOD=2,
   LZMA_METHOD=3,
   ZSTD_METHOD=4,
-  UNKNOWN_METHOD=5
+  UNKNOWN_METHOD=5, // This method must be last. Add new ones in the middle.
 } compress_method_t;
 
 /**
@@ -48,6 +48,7 @@ compress_method_t detect_compression_method(const char *in, size_t in_len);
 int tor_compress_is_compression_bomb(size_t size_in, size_t size_out);
 
 int tor_compress_supports_method(compress_method_t method);
+unsigned tor_compress_get_supported_method_bitmask(void);
 const char * compression_method_get_name(compress_method_t method);
 compress_method_t compression_method_get_by_name(const char *name);
 

+ 51 - 11
src/or/directory.c

@@ -2928,6 +2928,31 @@ write_http_response_header(dir_connection_t *conn, ssize_t length,
                              cache_lifetime);
 }
 
+/** Parse the compression methods listed in an Accept-Encoding header <b>h</b>,
+ * and convert them to a bitfield where compression method x is supported if
+ * and only if 1 &lt;&lt; x is set in the bitfield. */
+STATIC unsigned
+parse_accept_encoding_header(const char *h)
+{
+  unsigned result = (1u << NO_METHOD);
+  smartlist_t *methods = smartlist_new();
+  smartlist_split_string(methods, h, ",",
+             SPLIT_SKIP_SPACE|SPLIT_STRIP_SPACE|SPLIT_IGNORE_BLANK, 0);
+
+  SMARTLIST_FOREACH_BEGIN(methods, const char *, m) {
+    compress_method_t method = compression_method_get_by_name(m);
+    if (method != UNKNOWN_METHOD) {
+      tor_assert(method < 8*sizeof(unsigned));
+      result |= (1u << method);
+    }
+  } SMARTLIST_FOREACH_END(m);
+  SMARTLIST_FOREACH_BEGIN(methods, char *, m) {
+    tor_free(m);
+  } SMARTLIST_FOREACH_END(m);
+  smartlist_free(methods);
+  return result;
+}
+
 /** Decide whether a client would accept the consensus we have.
  *
  * Clients can say they only want a consensus if it's signed by more
@@ -3002,8 +3027,9 @@ choose_compression_level(ssize_t n_bytes)
 
 /** Information passed to handle a GET request. */
 typedef struct get_handler_args_t {
-  /** True if the client asked for compressed data. */
-  int compressed;
+  /** Bitmask of compression methods that the client said (or implied) it
+   * supported. */
+  unsigned compression_supported;
   /** If nonzero, the time included an if-modified-since header with this
    * value. */
   time_t if_modified_since;
@@ -3077,8 +3103,9 @@ directory_handle_command_get,(dir_connection_t *conn, const char *headers,
 {
   char *url, *url_mem, *header;
   time_t if_modified_since = 0;
-  int compressed;
+  int zlib_compressed_in_url;
   size_t url_len;
+  unsigned compression_methods_supported;
 
   /* We ignore the body of a GET request. */
   (void)req_body;
@@ -3109,17 +3136,30 @@ directory_handle_command_get,(dir_connection_t *conn, const char *headers,
 
   url_mem = url;
   url_len = strlen(url);
-  compressed = url_len > 2 && !strcmp(url+url_len-2, ".z");
-  if (compressed) {
+
+  zlib_compressed_in_url = url_len > 2 && !strcmp(url+url_len-2, ".z");
+  if (zlib_compressed_in_url) {
     url[url_len-2] = '\0';
     url_len -= 2;
   }
 
+  if ((header = http_get_header(headers, "Accept-Encoding"))) {
+    compression_methods_supported = parse_accept_encoding_header(header);
+    tor_free(header);
+  } else {
+    compression_methods_supported = (1u << NO_METHOD);
+    if (zlib_compressed_in_url)
+      compression_methods_supported |= (1u << ZLIB_METHOD);
+  }
+
+  /* Remove all methods that we don't both support. */
+  compression_methods_supported &= tor_compress_get_supported_method_bitmask();
+
   get_handler_args_t args;
   args.url = url;
   args.headers = headers;
   args.if_modified_since = if_modified_since;
-  args.compressed = compressed;
+  args.compression_supported = compression_methods_supported;
 
   int i, result = -1;
   for (i = 0; url_table[i].string; ++i) {
@@ -3198,7 +3238,7 @@ handle_get_current_consensus(dir_connection_t *conn,
                              const get_handler_args_t *args)
 {
   const char *url = args->url;
-  const int compressed = args->compressed;
+  const int compressed = args->compression_supported & (1u << ZLIB_METHOD);
   const time_t if_modified_since = args->if_modified_since;
   int clear_spool = 0;
 
@@ -3339,7 +3379,7 @@ static int
 handle_get_status_vote(dir_connection_t *conn, const get_handler_args_t *args)
 {
   const char *url = args->url;
-  const int compressed = args->compressed;
+  const int compressed = args->compression_supported & (1u << ZLIB_METHOD);
   {
     int current;
     ssize_t body_len = 0;
@@ -3446,7 +3486,7 @@ static int
 handle_get_microdesc(dir_connection_t *conn, const get_handler_args_t *args)
 {
   const char *url = args->url;
-  const int compressed = args->compressed;
+  const int compressed = args->compression_supported & (1u << ZLIB_METHOD);
   int clear_spool = 1;
   {
     conn->spool = smartlist_new();
@@ -3496,7 +3536,7 @@ static int
 handle_get_descriptor(dir_connection_t *conn, const get_handler_args_t *args)
 {
   const char *url = args->url;
-  const int compressed = args->compressed;
+  const int compressed = args->compression_supported & (1u << ZLIB_METHOD);
   const or_options_t *options = get_options();
   int clear_spool = 1;
   if (!strcmpstart(url,"/tor/server/") ||
@@ -3589,7 +3629,7 @@ static int
 handle_get_keys(dir_connection_t *conn, const get_handler_args_t *args)
 {
   const char *url = args->url;
-  const int compressed = args->compressed;
+  const int compressed = args->compression_supported & (1u << ZLIB_METHOD);
   const time_t if_modified_since = args->if_modified_since;
   {
     smartlist_t *certs = smartlist_new();

+ 1 - 0
src/or/directory.h

@@ -196,6 +196,7 @@ STATIC int next_random_exponential_delay(int delay, int max_delay);
 STATIC int parse_hs_version_from_post(const char *url, const char *prefix,
                                       const char **end_pos);
 
+STATIC unsigned parse_accept_encoding_header(const char *h);
 #endif
 
 #endif

+ 47 - 0
src/test/test_dir_handle_get.c

@@ -2497,6 +2497,52 @@ test_dir_handle_get_status_vote_current_authority(void* data)
     dirvote_free_all();
 }
 
+static void
+test_dir_handle_get_parse_accept_encoding(void *arg)
+{
+  (void)arg;
+  const unsigned B_NONE = 1u << NO_METHOD;
+  const unsigned B_ZLIB = 1u << ZLIB_METHOD;
+  const unsigned B_GZIP = 1u << GZIP_METHOD;
+  const unsigned B_LZMA = 1u << LZMA_METHOD;
+  const unsigned B_ZSTD = 1u << ZSTD_METHOD;
+
+  unsigned encodings;
+
+  encodings = parse_accept_encoding_header("");
+  tt_uint_op(B_NONE, OP_EQ, encodings);
+
+  encodings = parse_accept_encoding_header("  ");
+  tt_uint_op(B_NONE, OP_EQ, encodings);
+
+  encodings = parse_accept_encoding_header("dewey, cheatham, and howe ");
+  tt_uint_op(B_NONE, OP_EQ, encodings);
+
+  encodings = parse_accept_encoding_header("dewey, cheatham, and gzip");
+  tt_uint_op(B_NONE, OP_EQ, encodings);
+
+  encodings = parse_accept_encoding_header("dewey, cheatham, and, gzip");
+  tt_uint_op(B_NONE|B_GZIP, OP_EQ, encodings);
+
+  encodings = parse_accept_encoding_header(" gzip");
+  tt_uint_op(B_NONE|B_GZIP, OP_EQ, encodings);
+
+  encodings = parse_accept_encoding_header("gzip");
+  tt_uint_op(B_NONE|B_GZIP, OP_EQ, encodings);
+
+  encodings = parse_accept_encoding_header("x-zstd, deflate, x-lzma");
+  tt_uint_op(B_NONE|B_ZLIB|B_ZSTD|B_LZMA, OP_EQ, encodings);
+
+  encodings = parse_accept_encoding_header("x-zstd, deflate, x-lzma, gzip");
+  tt_uint_op(B_NONE|B_ZLIB|B_ZSTD|B_LZMA|B_GZIP, OP_EQ, encodings);
+
+  encodings = parse_accept_encoding_header("x-zstd,deflate,x-lzma,gzip");
+  tt_uint_op(B_NONE|B_ZLIB|B_ZSTD|B_LZMA|B_GZIP, OP_EQ, encodings);
+
+ done:
+  ;
+}
+
 #define DIR_HANDLE_CMD(name,flags) \
   { #name, test_dir_handle_get_##name, (flags), NULL, NULL }
 
@@ -2555,6 +2601,7 @@ struct testcase_t dir_handle_get_tests[] = {
   DIR_HANDLE_CMD(status_vote_next_consensus_signatures_not_found, 0),
   DIR_HANDLE_CMD(status_vote_next_consensus_signatures_busy, 0),
   DIR_HANDLE_CMD(status_vote_next_consensus_signatures, 0),
+  DIR_HANDLE_CMD(parse_accept_encoding, 0),
   END_OF_TESTCASES
 };