Browse Source

Merge remote-tracking branch 'origin/maint-0.2.3' into maint-0.2.4

Conflicts:
	src/or/config.c
	src/or/relay.c
Nick Mathewson 11 years ago
parent
commit
d3063da691
11 changed files with 200 additions and 4 deletions
  1. 4 0
      changes/bug9002
  2. 15 0
      changes/bug9063_redux
  3. 9 0
      doc/tor.1.txt
  4. 2 0
      src/common/mempool.h
  5. 102 0
      src/or/circuitlist.c
  6. 1 0
      src/or/circuitlist.h
  7. 8 0
      src/or/config.c
  8. 4 0
      src/or/or.h
  9. 31 2
      src/or/relay.c
  10. 1 0
      src/or/relay.h
  11. 23 2
      src/or/rendcommon.c

+ 4 - 0
changes/bug9002

@@ -0,0 +1,4 @@
+  o Major bugfixes:
+    - Limit hidden service descriptors to at most ten introduction
+      points, to slow one kind of guard enumeration. Fixes bug 9002;
+      bugfix on 0.1.1.11-alpha.

+ 15 - 0
changes/bug9063_redux

@@ -0,0 +1,15 @@
+  o Major bugfixes:
+    - When we have too much memory queued in circuits (according to a new
+      MaxMemInCellQueues option), close the circuits consuming the most
+      memory.  This prevents us from running out of memory as a relay if
+      circuits fill up faster than they can be drained.  Fixes
+      bug 9063; bugfix on the 54th commit of Tor. This bug is a further
+      fix beyond bug 6252, whose fix was merged into 0.2.3.21-rc.
+
+      Also fixes an earlier approach taken in 0.2.4.13-alpha, where we
+      tried to solve this issue simply by imposing an upper limit on the
+      number of queued cells for a single circuit.  That approach proved to
+      be problematic, since there are ways to provoke clients to send a
+      number of cells in excess of any such reasonable limit.
+      Fixes bug 9072; bugfix on 0.2.4.13-alpha.
+

+ 9 - 0
doc/tor.1.txt

@@ -1663,6 +1663,15 @@ is non-zero):
     localhost, RFC1918 addresses, and so on. This can create security issues;
     you should probably leave it off. (Default: 0)
 
+**MaxMemInCellQueues**  __N__ **bytes**|**KB**|**MB**|**GB**::
+    This option configures a threshold above which Tor will assume that it
+    needs to stop queueing cells because it's about to run out of memory.
+    If it hits this threshold, it will begin killing circuits until it
+    has recovered at least 10% of this memory.  Do not set this option too
+    low, or your relay may be unreliable under load.  This option only
+    effects circuit queues, so the actual process size will be larger than
+    this. (Default: 8GB)
+
 DIRECTORY SERVER OPTIONS
 ------------------------
 

+ 2 - 0
src/common/mempool.h

@@ -22,6 +22,8 @@ void mp_pool_destroy(mp_pool_t *pool);
 void mp_pool_assert_ok(mp_pool_t *pool);
 void mp_pool_log_status(mp_pool_t *pool, int severity);
 
+#define MP_POOL_ITEM_OVERHEAD (sizeof(void*))
+
 #define MEMPOOL_STATS
 
 #ifdef MEMPOOL_PRIVATE

+ 102 - 0
src/or/circuitlist.c

@@ -1499,6 +1499,108 @@ circuit_mark_for_close_(circuit_t *circ, int reason, int line,
   }
 }
 
+/** Given a marked circuit <b>circ</b>, aggressively free its cell queues to
+ * recover memory. */
+static void
+marked_circuit_free_cells(circuit_t *circ)
+{
+  if (!circ->marked_for_close) {
+    log_warn(LD_BUG, "Called on non-marked circuit");
+    return;
+  }
+  cell_queue_clear(&circ->n_conn_cells);
+  if (! CIRCUIT_IS_ORIGIN(circ))
+    cell_queue_clear(& TO_OR_CIRCUIT(circ)->p_conn_cells);
+}
+
+/** Return the number of cells used by the circuit <b>c</b>'s cell queues. */
+static size_t
+n_cells_in_circ_queues(const circuit_t *c)
+{
+  size_t n = c->n_conn_cells.n;
+  if (! CIRCUIT_IS_ORIGIN(c))
+    n += TO_OR_CIRCUIT((circuit_t*)c)->p_conn_cells.n;
+  return n;
+}
+
+/** helper to sort a list of circuit_q by total queue lengths, in descending
+ * order. */
+static int
+circuits_compare_by_queue_len_(const void **a_, const void **b_)
+{
+  const circuit_t *a = *a_;
+  const circuit_t *b = *b_;
+  size_t a_n = n_cells_in_circ_queues(a);
+  size_t b_n = n_cells_in_circ_queues(b);
+
+  if (a_n < b_n)
+    return 1;
+  else if (a_n == b_n)
+    return 0;
+  else
+    return -1;
+}
+
+#define FRACTION_OF_CIRCS_TO_RETAIN_ON_OOM 0.90
+
+/** We're out of memory for cells, having allocated <b>current_allocation</b>
+ * bytes' worth.  Kill the 'worst' circuits until we're under
+ * FRACTION_OF_CIRCS_TO_RETAIN_ON_OOM of our maximum usage. */
+void
+circuits_handle_oom(size_t current_allocation)
+{
+  /* Let's hope there's enough slack space for this allocation here... */
+  smartlist_t *circlist = smartlist_new();
+  circuit_t *circ;
+  size_t n_cells_removed=0, n_cells_to_remove;
+  int n_circuits_killed=0;
+  log_notice(LD_GENERAL, "We're low on memory.  Killing circuits with "
+             "over-long queues. (This behavior is controlled by "
+             "MaxMemInCellQueues.)");
+
+  {
+    size_t mem_target = (size_t)(get_options()->MaxMemInCellQueues *
+                                 FRACTION_OF_CIRCS_TO_RETAIN_ON_OOM);
+    size_t mem_to_recover;
+    if (current_allocation <= mem_target)
+      return;
+    mem_to_recover = current_allocation - mem_target;
+    n_cells_to_remove = CEIL_DIV(mem_to_recover, packed_cell_mem_cost());
+  }
+
+  /* This algorithm itself assumes that you've got enough memory slack
+   * to actually run it. */
+  for (circ = global_circuitlist; circ; circ = circ->next)
+    smartlist_add(circlist, circ);
+
+  /* This is O(n log n); there are faster algorithms we could use instead.
+   * Let's hope this doesn't happen enough to be in the critical path. */
+  smartlist_sort(circlist, circuits_compare_by_queue_len_);
+
+  /* Okay, now the worst circuits are at the front of the list. Let's mark
+   * them, and reclaim their storage aggressively. */
+  SMARTLIST_FOREACH_BEGIN(circlist, circuit_t *, circ) {
+    size_t n = n_cells_in_circ_queues(circ);
+    if (! circ->marked_for_close) {
+      circuit_mark_for_close(circ, END_CIRC_REASON_RESOURCELIMIT);
+    }
+    marked_circuit_free_cells(circ);
+
+    ++n_circuits_killed;
+    n_cells_removed += n;
+    if (n_cells_removed >= n_cells_to_remove)
+      break;
+  } SMARTLIST_FOREACH_END(circ);
+
+  clean_cell_pool(); /* In case this helps. */
+
+  log_notice(LD_GENERAL, "Removed "U64_FORMAT" bytes by killing %d circuits.",
+             U64_PRINTF_ARG(n_cells_removed * packed_cell_mem_cost()),
+             n_circuits_killed);
+
+  smartlist_free(circlist);
+}
+
 /** Verify that cpath layer <b>cp</b> has all of its invariants
  * correct. Trigger an assert if anything is invalid.
  */

+ 1 - 0
src/or/circuitlist.h

@@ -61,6 +61,7 @@ int circuit_count_pending_on_channel(channel_t *chan);
 void assert_cpath_layer_ok(const crypt_path_t *cp);
 void assert_circuit_ok(const circuit_t *c);
 void circuit_free_all(void);
+void circuits_handle_oom(size_t current_allocation);
 
 #endif
 

+ 8 - 0
src/or/config.c

@@ -299,6 +299,7 @@ static config_var_t option_vars_[] = {
   V(MaxAdvertisedBandwidth,      MEMUNIT,  "1 GB"),
   V(MaxCircuitDirtiness,         INTERVAL, "10 minutes"),
   V(MaxClientCircuitsPending,    UINT,     "32"),
+  V(MaxMemInCellQueues,          MEMUNIT,  "8 GB"),
   OBSOLETE("MaxOnionsPending"),
   V(MaxOnionQueueDelay,          MSEC_INTERVAL, "1750 msec"),
   V(MinMeasuredBWsForAuthToIgnoreAdvertised, INT, "500"),
@@ -2610,7 +2611,14 @@ options_validate(or_options_t *old_options, or_options_t *options,
     REJECT("If EntryNodes is set, UseEntryGuards must be enabled.");
   }
 
+  if (options->MaxMemInCellQueues < (500 << 20)) {
+    log_warn(LD_CONFIG, "MaxMemInCellQueues must be at least 500 MB for now. "
+             "Ideally, have it as large as you can afford.");
+    options->MaxMemInCellQueues = (500 << 20);
+  }
+
   options->AllowInvalid_ = 0;
+
   if (options->AllowInvalidNodes) {
     SMARTLIST_FOREACH_BEGIN(options->AllowInvalidNodes, const char *, cp) {
         if (!strcasecmp(cp, "entry"))

+ 4 - 0
src/or/or.h

@@ -3433,6 +3433,10 @@ typedef struct {
   config_line_t *DirPort_lines;
   config_line_t *DNSPort_lines; /**< Ports to listen on for DNS requests. */
 
+  uint64_t MaxMemInCellQueues; /**< If we have more memory than this allocated
+                                * for circuit cell queues, run the OOM handler
+                                */
+
   /** @name port booleans
    *
    * Derived booleans: True iff there is a non-listener port on an AF_INET or

+ 31 - 2
src/or/relay.c

@@ -2034,7 +2034,7 @@ circuit_consider_sending_sendme(circuit_t *circ, crypt_path_t *layer_hint)
 #endif
 
 /** The total number of cells we have allocated from the memory pool. */
-static int total_cells_allocated = 0;
+static size_t total_cells_allocated = 0;
 
 /** A memory pool to allocate packed_cell_t objects. */
 static mp_pool_t *cell_pool = NULL;
@@ -2114,7 +2114,7 @@ dump_cell_pool_usage(int severity)
   }
   tor_log(severity, LD_MM,
           "%d cells allocated on %d circuits. %d cells leaked.",
-          n_cells, n_circs, total_cells_allocated - n_cells);
+          n_cells, n_circs, (int)total_cells_allocated - n_cells);
   mp_pool_log_status(cell_pool, severity);
 }
 
@@ -2222,6 +2222,29 @@ cell_queue_pop(cell_queue_t *queue)
   return cell;
 }
 
+/** Return the total number of bytes used for each packed_cell in a queue.
+ * Approximate. */
+size_t
+packed_cell_mem_cost(void)
+{
+  return sizeof(packed_cell_t) + MP_POOL_ITEM_OVERHEAD +
+    get_options()->CellStatistics ?
+    (sizeof(insertion_time_elem_t)+MP_POOL_ITEM_OVERHEAD) : 0;
+}
+
+/** Check whether we've got too much space used for cells.  If so,
+ * call the OOM handler and return 1.  Otherwise, return 0. */
+static int
+cell_queues_check_size(void)
+{
+  size_t alloc = total_cells_allocated * packed_cell_mem_cost();
+  if (alloc >= get_options()->MaxMemInCellQueues) {
+    circuits_handle_oom(alloc);
+    return 1;
+  }
+  return 0;
+}
+
 /**
  * Update the number of cells available on the circuit's n_chan or p_chan's
  * circuit mux.
@@ -2513,6 +2536,12 @@ append_cell_to_circuit_queue(circuit_t *circ, channel_t *chan,
 
   cell_queue_append_packed_copy(queue, cell, chan->wide_circ_ids);
 
+  if (PREDICT_UNLIKELY(cell_queues_check_size())) {
+    /* We ran the OOM handler */
+    if (circ->marked_for_close)
+      return;
+  }
+
   /* If we have too many cells on the circuit, we should stop reading from
    * the edge streams for a while. */
   if (!streams_blocked && queue->n >= CELL_QUEUE_HIGHWATER_SIZE)

+ 1 - 0
src/or/relay.h

@@ -46,6 +46,7 @@ void init_cell_pool(void);
 void free_cell_pool(void);
 void clean_cell_pool(void);
 void dump_cell_pool_usage(int severity);
+size_t packed_cell_mem_cost(void);
 
 /* For channeltls.c */
 void packed_cell_free(packed_cell_t *cell);

+ 23 - 2
src/or/rendcommon.c

@@ -1002,6 +1002,10 @@ rend_cache_lookup_v2_desc_as_dir(const char *desc_id, const char **desc)
   return 0;
 }
 
+/* Do not allow more than this many introduction points in a hidden service
+ * descriptor */
+#define MAX_INTRO_POINTS 10
+
 /** Parse *desc, calculate its service id, and store it in the cache.
  * If we have a newer v0 descriptor with the same ID, ignore this one.
  * If we have an older descriptor with the same ID, replace it.
@@ -1070,6 +1074,15 @@ rend_cache_store(const char *desc, size_t desc_len, int published,
     rend_service_descriptor_free(parsed);
     return -1;
   }
+  if (parsed->intro_nodes &&
+      smartlist_len(parsed->intro_nodes) > MAX_INTRO_POINTS) {
+    log_warn(LD_REND, "Found too many introduction points on a hidden "
+             "service descriptor for %s. This is probably a (misguided) "
+             "attempt to improve reliability, but it could also be an "
+             "attempt to do a guard enumeration attack. Rejecting.",
+             safe_str_client(query));
+    return -2;
+  }
   tor_snprintf(key, sizeof(key), "0%s", query);
   e = (rend_cache_entry_t*) strmap_get_lc(rend_cache, key);
   if (e && e->parsed->timestamp > parsed->timestamp) {
@@ -1288,6 +1301,7 @@ rend_cache_store_v2_desc_as_client(const char *desc,
   }
   /* Decode/decrypt introduction points. */
   if (intro_content) {
+    int n_intro_points;
     if (rend_query->auth_type != REND_NO_AUTH &&
         !tor_mem_is_zero(rend_query->descriptor_cookie,
                          sizeof(rend_query->descriptor_cookie))) {
@@ -1308,13 +1322,20 @@ rend_cache_store_v2_desc_as_client(const char *desc,
         intro_size = ipos_decrypted_size;
       }
     }
-    if (rend_parse_introduction_points(parsed, intro_content,
-                                       intro_size) <= 0) {
+    n_intro_points = rend_parse_introduction_points(parsed, intro_content,
+                                                    intro_size);
+    if (n_intro_points <= 0) {
       log_warn(LD_REND, "Failed to parse introduction points. Either the "
                "service has published a corrupt descriptor or you have "
                "provided invalid authorization data.");
       retval = -2;
       goto err;
+    } else if (n_intro_points > MAX_INTRO_POINTS) {
+      log_warn(LD_REND, "Found too many introduction points on a hidden "
+               "service descriptor for %s. This is probably a (misguided) "
+               "attempt to improve reliability, but it could also be an "
+               "attempt to do a guard enumeration attack. Rejecting.",
+               safe_str_client(rend_query->onion_address));
     }
   } else {
     log_info(LD_REND, "Descriptor does not contain any introduction points.");