Ver código fonte

Merge remote-tracking branch 'public/bug10169_024' into bug10169_025_v2

Conflicts:
	src/or/circuitlist.c
Nick Mathewson 10 anos atrás
pai
commit
87fb1e324c
9 arquivos alterados com 207 adições e 45 exclusões
  1. 4 0
      changes/bug10169
  2. 3 0
      changes/bug9686
  3. 5 5
      doc/tor.1.txt
  4. 54 5
      src/or/buffers.c
  5. 4 1
      src/or/buffers.h
  6. 125 26
      src/or/circuitlist.c
  7. 5 4
      src/or/config.c
  8. 5 3
      src/or/or.h
  9. 2 1
      src/or/relay.c

+ 4 - 0
changes/bug10169

@@ -0,0 +1,4 @@
+  o Major features:
+    - Also consider stream buffer sizes when calculating OOM
+      conditions. Rename MaxMemInCellQueues to MaxMemInQueues. Fixes
+      bug 10169.

+ 3 - 0
changes/bug9686

@@ -0,0 +1,3 @@
+  o Minor changes:
+    - Decrease the lower limit of MaxMemInQueues to 256 MBytes, to
+      appease raspberry pi users. Fixes bug 9686.

+ 5 - 5
doc/tor.1.txt

@@ -1727,13 +1727,13 @@ is non-zero):
     localhost, RFC1918 addresses, and so on. This can create security issues;
     you should probably leave it off. (Default: 0)
 
-[[MaxMemInCellQueues]] **MaxMemInCellQueues**  __N__ **bytes**|**KB**|**MB**|**GB**::
+[[MaxMemInQueues]] **MaxMemInQueues**  __N__ **bytes**|**KB**|**MB**|**GB**::
     This option configures a threshold above which Tor will assume that it
-    needs to stop queueing cells because it's about to run out of memory.
-    If it hits this threshold, it will begin killing circuits until it
-    has recovered at least 10% of this memory.  Do not set this option too
+    needs to stop queueing or buffering data because it's about to run out of
+    memory.  If it hits this threshold, it will begin killing circuits until
+    it has recovered at least 10% of this memory.  Do not set this option too
     low, or your relay may be unreliable under load.  This option only
-    affects circuit queues, so the actual process size will be larger than
+    affects some queues, so the actual process size will be larger than
     this. (Default: 8GB)
 
 DIRECTORY SERVER OPTIONS

+ 54 - 5
src/or/buffers.c

@@ -70,6 +70,8 @@ typedef struct chunk_t {
   size_t datalen; /**< The number of bytes stored in this chunk */
   size_t memlen; /**< The number of usable bytes of storage in <b>mem</b>. */
   char *data; /**< A pointer to the first byte of data stored in <b>mem</b>. */
+  uint32_t inserted_time; /**< Timestamp in truncated ms since epoch
+                           * when this chunk was inserted. */
   char mem[FLEXIBLE_ARRAY_MEMBER]; /**< The actual memory used for storage in
                 * this chunk. */
 } chunk_t;
@@ -141,6 +143,9 @@ static chunk_freelist_t freelists[] = {
  * could help with? */
 static uint64_t n_freelist_miss = 0;
 
+/** DOCDOC */
+static size_t total_bytes_allocated_in_chunks = 0;
+
 static void assert_freelist_ok(chunk_freelist_t *fl);
 
 /** Return the freelist to hold chunks of size <b>alloc</b>, or NULL if
@@ -174,6 +179,8 @@ chunk_free_unchecked(chunk_t *chunk)
   } else {
     if (freelist)
       ++freelist->n_free;
+    tor_assert(total_bytes_allocated_in_chunks >= alloc);
+    total_bytes_allocated_in_chunks -= alloc;
     tor_free(chunk);
   }
 }
@@ -200,6 +207,7 @@ chunk_new_with_alloc_size(size_t alloc)
     else
       ++n_freelist_miss;
     ch = tor_malloc(alloc);
+    total_bytes_allocated_in_chunks += alloc;
   }
   ch->next = NULL;
   ch->datalen = 0;
@@ -211,6 +219,10 @@ chunk_new_with_alloc_size(size_t alloc)
 static void
 chunk_free_unchecked(chunk_t *chunk)
 {
+  if (!chunk)
+    return;
+  tor_assert(total_bytes_allocated_in_chunks >= CHUNK_ALLOC_SIZE(chunk->memlen));
+  total_bytes_allocated_in_chunks -= CHUNK_ALLOC_SIZE(chunk->memlen);
   tor_free(chunk);
 }
 static INLINE chunk_t *
@@ -221,6 +233,7 @@ chunk_new_with_alloc_size(size_t alloc)
   ch->next = NULL;
   ch->datalen = 0;
   ch->memlen = CHUNK_SIZE_WITH_ALLOC(alloc);
+  total_bytes_allocated_in_chunks += alloc;
   ch->data = &ch->mem[0];
   return ch;
 }
@@ -232,11 +245,13 @@ static INLINE chunk_t *
 chunk_grow(chunk_t *chunk, size_t sz)
 {
   off_t offset;
+  size_t memlen_orig = chunk->memlen;
   tor_assert(sz > chunk->memlen);
   offset = chunk->data - chunk->mem;
   chunk = tor_realloc(chunk, CHUNK_ALLOC_SIZE(sz));
   chunk->memlen = sz;
   chunk->data = chunk->mem + offset;
+  total_bytes_allocated_in_chunks += CHUNK_ALLOC_SIZE(sz) - CHUNK_ALLOC_SIZE(memlen_orig);
   return chunk;
 }
 
@@ -261,12 +276,14 @@ preferred_chunk_size(size_t target)
 }
 
 /** Remove from the freelists most chunks that have not been used since the
- * last call to buf_shrink_freelists(). */
-void
+ * last call to buf_shrink_freelists().   Return the amount of memory
+ * freed. */
+size_t
 buf_shrink_freelists(int free_all)
 {
 #ifdef ENABLE_BUF_FREELISTS
   int i;
+  size_t total_freed = 0;
   disable_control_logging();
   for (i = 0; freelists[i].alloc_size; ++i) {
     int slack = freelists[i].slack;
@@ -298,6 +315,9 @@ buf_shrink_freelists(int free_all)
       *chp = NULL;
       while (chunk) {
         chunk_t *next = chunk->next;
+        tor_assert(total_bytes_allocated_in_chunks >= CHUNK_ALLOC_SIZE(chunk->memlen));
+        total_bytes_allocated_in_chunks -= CHUNK_ALLOC_SIZE(chunk->memlen);
+        total_freed += CHUNK_ALLOC_SIZE(chunk->memlen);
         tor_free(chunk);
         chunk = next;
         --n_to_free;
@@ -315,18 +335,21 @@ buf_shrink_freelists(int free_all)
       }
       // tor_assert(!n_to_free);
       freelists[i].cur_length = new_length;
+      tor_assert(orig_n_to_skip == new_length);
       log_info(LD_MM, "Cleaned freelist for %d-byte chunks: original "
-               "length %d, kept %d, dropped %d.",
+               "length %d, kept %d, dropped %d. New length is %d",
                (int)freelists[i].alloc_size, orig_length,
-               orig_n_to_skip, orig_n_to_free);
+               orig_n_to_skip, orig_n_to_free, new_length);
     }
     freelists[i].lowest_length = freelists[i].cur_length;
     assert_freelist_ok(&freelists[i]);
   }
  done:
   enable_control_logging();
+  return total_freed;
 #else
   (void) free_all;
+  return 0;
 #endif
 }
 
@@ -531,7 +554,7 @@ buf_allocation(const buf_t *buf)
   size_t total = 0;
   const chunk_t *chunk;
   for (chunk = buf->head; chunk; chunk = chunk->next) {
-    total += chunk->memlen;
+    total += CHUNK_ALLOC_SIZE(chunk->memlen);
   }
   return total;
 }
@@ -564,6 +587,7 @@ static chunk_t *
 chunk_copy(const chunk_t *in_chunk)
 {
   chunk_t *newch = tor_memdup(in_chunk, CHUNK_ALLOC_SIZE(in_chunk->memlen));
+  total_bytes_allocated_in_chunks += CHUNK_ALLOC_SIZE(in_chunk->memlen);
   newch->next = NULL;
   if (in_chunk->data) {
     off_t offset = in_chunk->data - in_chunk->mem;
@@ -599,6 +623,7 @@ static chunk_t *
 buf_add_chunk_with_capacity(buf_t *buf, size_t capacity, int capped)
 {
   chunk_t *chunk;
+  struct timeval now;
   if (CHUNK_ALLOC_SIZE(capacity) < buf->default_chunk_size) {
     chunk = chunk_new_with_alloc_size(buf->default_chunk_size);
   } else if (capped && CHUNK_ALLOC_SIZE(capacity) > MAX_CHUNK_ALLOC) {
@@ -606,6 +631,10 @@ buf_add_chunk_with_capacity(buf_t *buf, size_t capacity, int capped)
   } else {
     chunk = chunk_new_with_alloc_size(preferred_chunk_size(capacity));
   }
+
+  tor_gettimeofday_cached(&now);
+  chunk->inserted_time = (uint32_t)tv_to_msec(&now);
+
   if (buf->tail) {
     tor_assert(buf->head);
     buf->tail->next = chunk;
@@ -618,6 +647,26 @@ buf_add_chunk_with_capacity(buf_t *buf, size_t capacity, int capped)
   return chunk;
 }
 
+/** Return the age of the oldest chunk in the buffer <b>buf</b>, in
+ * milliseconds.  Requires the current time, in truncated milliseconds since
+ * the epoch, as its input <b>now</b>.
+ */
+uint32_t
+buf_get_oldest_chunk_timestamp(const buf_t *buf, uint32_t now)
+{
+  if (buf->head) {
+    return now - buf->head->inserted_time;
+  } else {
+    return 0;
+  }
+}
+
+size_t
+buf_get_total_allocation(void)
+{
+  return total_bytes_allocated_in_chunks;
+}
+
 /** Read up to <b>at_most</b> bytes from the socket <b>fd</b> into
  * <b>chunk</b> (which must be on <b>buf</b>). If we get an EOF, set
  * *<b>reached_eof</b> to 1.  Return -1 on error, 0 on eof or blocking,

+ 4 - 1
src/or/buffers.h

@@ -20,13 +20,16 @@ void buf_free(buf_t *buf);
 void buf_clear(buf_t *buf);
 buf_t *buf_copy(const buf_t *buf);
 void buf_shrink(buf_t *buf);
-void buf_shrink_freelists(int free_all);
+size_t buf_shrink_freelists(int free_all);
 void buf_dump_freelist_sizes(int severity);
 
 size_t buf_datalen(const buf_t *buf);
 size_t buf_allocation(const buf_t *buf);
 size_t buf_slack(const buf_t *buf);
 
+uint32_t buf_get_oldest_chunk_timestamp(const buf_t *buf, uint32_t now);
+size_t buf_get_total_allocation(void);
+
 int read_to_buf(tor_socket_t s, size_t at_most, buf_t *buf, int *reached_eof,
                 int *socket_error);
 int read_to_buf_tls(tor_tls_t *tls, size_t at_most, buf_t *buf);

+ 125 - 26
src/or/circuitlist.c

@@ -1612,6 +1612,38 @@ marked_circuit_free_cells(circuit_t *circ)
     cell_queue_clear(& TO_OR_CIRCUIT(circ)->p_chan_cells);
 }
 
+/** Aggressively free buffer contents on all the buffers of all streams in the
+ * list starting at <b>stream</b>. Return the number of bytes recovered. */
+static size_t
+marked_circuit_streams_free_bytes(edge_connection_t *stream)
+{
+  size_t result = 0;
+  for ( ; stream; stream = stream->next_stream) {
+    connection_t *conn = TO_CONN(stream);
+    if (conn->inbuf) {
+      result += buf_allocation(conn->inbuf);
+      buf_clear(conn->inbuf);
+    }
+    if (conn->outbuf) {
+      result += buf_allocation(conn->outbuf);
+      buf_clear(conn->outbuf);
+    }
+  }
+  return result;
+}
+
+/** Aggressively free buffer contents on all the buffers of all streams on
+ * circuit <b>c</b>. Return the number of bytes recovered. */
+static size_t
+marked_circuit_free_stream_bytes(circuit_t *c)
+{
+  if (CIRCUIT_IS_ORIGIN(c)) {
+    return marked_circuit_streams_free_bytes(TO_ORIGIN_CIRCUIT(c)->p_streams);
+  } else {
+    return marked_circuit_streams_free_bytes(TO_OR_CIRCUIT(c)->n_streams);
+  }
+}
+
 /** Return the number of cells used by the circuit <b>c</b>'s cell queues. */
 STATIC size_t
 n_cells_in_circ_queues(const circuit_t *c)
@@ -1652,20 +1684,68 @@ circuit_max_queued_cell_age(const circuit_t *c, uint32_t now)
   return age;
 }
 
-/** Temporary variable for circuits_compare_by_oldest_queued_cell_ This is a
- * kludge to work around the fact that qsort doesn't provide a way for
- * comparison functions to take an extra argument. */
-static uint32_t circcomp_now_tmp;
+/** Return the age in milliseconds of the oldest buffer chunk on any stream in
+ * the linked list <b>stream</b>, where age is taken in milliseconds before
+ * the time <b>now</b> (in truncated milliseconds since the epoch). */
+static uint32_t
+circuit_get_streams_max_data_age(const edge_connection_t *stream, uint32_t now)
+{
+  uint32_t age = 0, age2;
+  for (; stream; stream = stream->next_stream) {
+    const connection_t *conn = TO_CONN(stream);
+    if (conn->outbuf) {
+      age2 = buf_get_oldest_chunk_timestamp(conn->outbuf, now);
+      if (age2 > age)
+        age = age2;
+    }
+    if (conn->inbuf) {
+      age2 = buf_get_oldest_chunk_timestamp(conn->inbuf, now);
+      if (age2 > age)
+        age = age2;
+    }
+  }
 
-/** Helper to sort a list of circuit_t by age of oldest cell, in descending
- * order. Requires that circcomp_now_tmp is set correctly. */
+  return age;
+}
+
+/** Return the age in milliseconds of the oldest buffer chunk on any stream
+ * attached to the circuit <b>c</b>, where age is taken in milliseconds before
+ * the time <b>now</b> (in truncated milliseconds since the epoch). */
+static uint32_t
+circuit_max_queued_data_age(const circuit_t *c, uint32_t now)
+{
+  if (CIRCUIT_IS_ORIGIN(c)) {
+    return circuit_get_streams_max_data_age(
+                           TO_ORIGIN_CIRCUIT((circuit_t*)c)->p_streams, now);
+  } else {
+    return circuit_get_streams_max_data_age(
+                           TO_OR_CIRCUIT((circuit_t*)c)->n_streams, now);
+  }
+}
+
+/** Return the age of the oldest cell or stream buffer chunk on the circuit
+ * <b>c</b>, where age is taken in milliseconds before the time <b>now</b> (in
+ * truncated milliseconds since the epoch). */
+static uint32_t
+circuit_max_queued_item_age(const circuit_t *c, uint32_t now)
+{
+  uint32_t cell_age = circuit_max_queued_cell_age(c, now);
+  uint32_t data_age = circuit_max_queued_data_age(c, now);
+  if (cell_age > data_age)
+    return cell_age;
+  else
+    return data_age;
+}
+
+/** Helper to sort a list of circuit_t by age of oldest item, in descending
+ * order. */
 static int
-circuits_compare_by_oldest_queued_cell_(const void **a_, const void **b_)
+circuits_compare_by_oldest_queued_item_(const void **a_, const void **b_)
 {
   const circuit_t *a = *a_;
   const circuit_t *b = *b_;
-  uint32_t age_a = circuit_max_queued_cell_age(a, circcomp_now_tmp);
-  uint32_t age_b = circuit_max_queued_cell_age(b, circcomp_now_tmp);
+  uint32_t age_a = a->age_tmp;
+  uint32_t age_b = b->age_tmp;
 
   if (age_a < age_b)
     return 1;
@@ -1675,66 +1755,85 @@ circuits_compare_by_oldest_queued_cell_(const void **a_, const void **b_)
     return -1;
 }
 
-#define FRACTION_OF_CELLS_TO_RETAIN_ON_OOM 0.90
+#define FRACTION_OF_DATA_TO_RETAIN_ON_OOM 0.90
 
 /** We're out of memory for cells, having allocated <b>current_allocation</b>
  * bytes' worth.  Kill the 'worst' circuits until we're under
- * FRACTION_OF_CIRCS_TO_RETAIN_ON_OOM of our maximum usage. */
+ * FRACTION_OF_DATA_TO_RETAIN_ON_OOM of our maximum usage. */
 void
 circuits_handle_oom(size_t current_allocation)
 {
   /* Let's hope there's enough slack space for this allocation here... */
   smartlist_t *circlist = smartlist_new();
   circuit_t *circ;
-  size_t n_cells_removed=0, n_cells_to_remove;
+  size_t mem_to_recover;
+  size_t mem_recovered=0;
   int n_circuits_killed=0;
   struct timeval now;
+  uint32_t now_ms;
   log_notice(LD_GENERAL, "We're low on memory.  Killing circuits with "
              "over-long queues. (This behavior is controlled by "
-             "MaxMemInCellQueues.)");
+             "MaxMemInQueues.)");
+
+  {
+    const size_t recovered = buf_shrink_freelists(1);
+    if (recovered >= current_allocation) {
+      log_warn(LD_BUG, "We somehow recovered more memory from freelists "
+               "than we thought we had allocated");
+      current_allocation = 0;
+    } else {
+      current_allocation -= recovered;
+    }
+  }
 
   {
-    size_t mem_target = (size_t)(get_options()->MaxMemInCellQueues *
-                                 FRACTION_OF_CELLS_TO_RETAIN_ON_OOM);
-    size_t mem_to_recover;
+    size_t mem_target = (size_t)(get_options()->MaxMemInQueues *
+                                 FRACTION_OF_DATA_TO_RETAIN_ON_OOM);
     if (current_allocation <= mem_target)
       return;
     mem_to_recover = current_allocation - mem_target;
-    n_cells_to_remove = CEIL_DIV(mem_to_recover, packed_cell_mem_cost());
   }
 
+  tor_gettimeofday_cached(&now);
+  now_ms = (uint32_t)tv_to_msec(&now);
+
   /* This algorithm itself assumes that you've got enough memory slack
    * to actually run it. */
-  TOR_LIST_FOREACH(circ, &global_circuitlist, head)
+  TOR_LIST_FOREACH(circ, &global_circuitlist, head) {
+    circ->age_tmp = circuit_max_queued_item_age(circ, now_ms);
     smartlist_add(circlist, circ);
-
-  /* Set circcomp_now_tmp so that the sort can work. */
-  tor_gettimeofday_cached(&now);
-  circcomp_now_tmp = (uint32_t)tv_to_msec(&now);
+  }
 
   /* This is O(n log n); there are faster algorithms we could use instead.
    * Let's hope this doesn't happen enough to be in the critical path. */
-  smartlist_sort(circlist, circuits_compare_by_oldest_queued_cell_);
+  smartlist_sort(circlist, circuits_compare_by_oldest_queued_item_);
 
   /* Okay, now the worst circuits are at the front of the list. Let's mark
    * them, and reclaim their storage aggressively. */
   SMARTLIST_FOREACH_BEGIN(circlist, circuit_t *, circ) {
     size_t n = n_cells_in_circ_queues(circ);
+    size_t freed;
     if (! circ->marked_for_close) {
       circuit_mark_for_close(circ, END_CIRC_REASON_RESOURCELIMIT);
     }
     marked_circuit_free_cells(circ);
+    freed = marked_circuit_free_stream_bytes(circ);
 
     ++n_circuits_killed;
-    n_cells_removed += n;
-    if (n_cells_removed >= n_cells_to_remove)
+
+    mem_recovered += n * packed_cell_mem_cost();
+    mem_recovered += freed;
+
+    if (mem_recovered >= mem_to_recover)
       break;
   } SMARTLIST_FOREACH_END(circ);
 
   clean_cell_pool(); /* In case this helps. */
+  buf_shrink_freelists(1); /* This is necessary to actually release buffer
+                              chunks. */
 
   log_notice(LD_GENERAL, "Removed "U64_FORMAT" bytes by killing %d circuits.",
-             U64_PRINTF_ARG(n_cells_removed * packed_cell_mem_cost()),
+             U64_PRINTF_ARG(mem_recovered),
              n_circuits_killed);
 
   smartlist_free(circlist);

+ 5 - 4
src/or/config.c

@@ -85,6 +85,7 @@ static config_abbrev_t option_abbrevs_[] = {
   { "DirFetchPostPeriod", "StatusFetchPeriod", 0, 0},
   { "DirServer", "DirAuthority", 0, 0}, /* XXXX024 later, make this warn? */
   { "MaxConn", "ConnLimit", 0, 1},
+  { "MaxMemInCellQueues", "MaxMemInQueues", 0, 0},
   { "ORBindAddress", "ORListenAddress", 0, 0},
   { "DirBindAddress", "DirListenAddress", 0, 0},
   { "SocksBindAddress", "SocksListenAddress", 0, 0},
@@ -306,7 +307,7 @@ static config_var_t option_vars_[] = {
   V(MaxAdvertisedBandwidth,      MEMUNIT,  "1 GB"),
   V(MaxCircuitDirtiness,         INTERVAL, "10 minutes"),
   V(MaxClientCircuitsPending,    UINT,     "32"),
-  V(MaxMemInCellQueues,          MEMUNIT,  "8 GB"),
+  V(MaxMemInQueues,              MEMUNIT,  "8 GB"),
   OBSOLETE("MaxOnionsPending"),
   V(MaxOnionQueueDelay,          MSEC_INTERVAL, "1750 msec"),
   V(MinMeasuredBWsForAuthToIgnoreAdvertised, INT, "500"),
@@ -2754,10 +2755,10 @@ options_validate(or_options_t *old_options, or_options_t *options,
     REJECT("If EntryNodes is set, UseEntryGuards must be enabled.");
   }
 
-  if (options->MaxMemInCellQueues < (500 << 20)) {
-    log_warn(LD_CONFIG, "MaxMemInCellQueues must be at least 500 MB for now. "
+  if (options->MaxMemInQueues < (256 << 20)) {
+    log_warn(LD_CONFIG, "MaxMemInQueues must be at least 256 MB for now. "
              "Ideally, have it as large as you can afford.");
-    options->MaxMemInCellQueues = (500 << 20);
+    options->MaxMemInQueues = (256 << 20);
   }
 
   options->AllowInvalid_ = 0;

+ 5 - 3
src/or/or.h

@@ -2825,6 +2825,9 @@ typedef struct circuit_t {
    * more. */
   int deliver_window;
 
+  /** Temporary field used during circuits_handle_oom. */
+  uint32_t age_tmp;
+
   /** For storage while n_chan is pending (state CIRCUIT_STATE_CHAN_WAIT). */
   struct create_cell_t *n_chan_create_cell;
 
@@ -3478,9 +3481,8 @@ typedef struct {
   config_line_t *DirPort_lines;
   config_line_t *DNSPort_lines; /**< Ports to listen on for DNS requests. */
 
-  uint64_t MaxMemInCellQueues; /**< If we have more memory than this allocated
-                                * for circuit cell queues, run the OOM handler
-                                */
+  uint64_t MaxMemInQueues; /**< If we have more memory than this allocated
+                            * for queues and buffers, run the OOM handler */
 
   /** @name port booleans
    *

+ 2 - 1
src/or/relay.c

@@ -2205,7 +2205,8 @@ static int
 cell_queues_check_size(void)
 {
   size_t alloc = total_cells_allocated * packed_cell_mem_cost();
-  if (alloc >= get_options()->MaxMemInCellQueues) {
+  alloc += buf_get_total_allocation();
+  if (alloc >= get_options()->MaxMemInQueues) {
     circuits_handle_oom(alloc);
     return 1;
   }