瀏覽代碼

Improved circuit queue out-of-memory handler

Previously, when we ran low on memory, we'd close whichever circuits
had the most queued cells. Now, we close those that have the
*oldest* queued cells, on the theory that those are most responsible
for us running low on memory, and that those are the least likely to
actually drain on their own if we wait a little longer.

Based on analysis from a forthcoming paper by Jansen, Tschorsch,
Johnson, and Scheuermann. Fixes bug 9093.
Nick Mathewson 10 年之前
父節點
當前提交
1b8ceb83c9
共有 4 個文件被更改,包括 64 次插入12 次删除
  1. 7 0
      changes/bug9093
  2. 46 10
      src/or/circuitlist.c
  3. 5 0
      src/or/or.h
  4. 6 2
      src/or/relay.c

+ 7 - 0
changes/bug9093

@@ -0,0 +1,7 @@
+  o Minor features:
+    - Improve the circuit queue out-of-memory handler. Previously, when
+      we ran low on memory, we'd close whichever circuits had the most
+      queued cells. Now, we close those that have the *oldest* queued
+      cells, on the theory that those are most responsible for us
+      running low on memory. Based on analysis from a forthcoming paper
+      by Jansen, Tschorsch, Johnson, and Scheuermann. Fixes bug 9093.

+ 46 - 10
src/or/circuitlist.c

@@ -1383,25 +1383,56 @@ n_cells_in_circ_queues(const circuit_t *c)
   return n;
 }
 
-/** helper to sort a list of circuit_q by total queue lengths, in descending
- * order. */
+/**
+ * Return the age of the oldest cell queued on <b>c</b>, in milliseconds.
+ * Return 0 if there are no cells queued on c.  Requires that <b>now</b> be
+ * the current time in milliseconds since the epoch, truncated.
+ *
+ * This function will return incorrect results if the oldest cell queued on
+ * the circuit is older than 2**32 msec (about 49 days) old.
+ */
+static uint32_t
+circuit_max_queued_cell_age(const circuit_t *c, uint32_t now)
+{
+  uint32_t age = 0;
+  if (c->n_conn_cells.head)
+    age = now - c->n_conn_cells.head->inserted_time;
+
+  if (! CIRCUIT_IS_ORIGIN(c)) {
+    const or_circuit_t *orcirc = TO_OR_CIRCUIT((circuit_t*)c);
+    if (orcirc->p_conn_cells.head) {
+      uint32_t age2 = now - orcirc->p_conn_cells.head->inserted_time;
+      if (age2 > age)
+        return age2;
+    }
+  }
+  return age;
+}
+
+/** Temporary variable for circuits_compare_by_oldest_queued_cell_ This is a
+ * kludge to work around the fact that qsort doesn't provide a way for
+ * comparison functions to take an extra argument. */
+static uint32_t circcomp_now_tmp;
+
+/** Helper to sort a list of circuit_t by age of oldest cell, in descending
+ * order. Requires that circcomp_now_tmp is set correctly. */
 static int
-circuits_compare_by_queue_len_(const void **a_, const void **b_)
+circuits_compare_by_oldest_queued_cell_(const void **a_, const void **b_)
 {
   const circuit_t *a = *a_;
   const circuit_t *b = *b_;
-  size_t a_n = n_cells_in_circ_queues(a);
-  size_t b_n = n_cells_in_circ_queues(b);
+  uint32_t age_a = circuit_max_queued_cell_age(a, circcomp_now_tmp);
+  uint32_t age_b = circuit_max_queued_cell_age(b, circcomp_now_tmp);
 
-  if (a_n < b_n)
+  if (age_a < age_b)
     return 1;
-  else if (a_n == b_n)
+  else if (age_a == age_b)
     return 0;
   else
     return -1;
 }
 
-#define FRACTION_OF_CIRCS_TO_RETAIN_ON_OOM 0.90
+#define FRACTION_OF_CELLS_TO_RETAIN_ON_OOM 0.90
 
 /** We're out of memory for cells, having allocated <b>current_allocation</b>
  * bytes' worth.  Kill the 'worst' circuits until we're under
@@ -1414,13 +1445,14 @@ circuits_handle_oom(size_t current_allocation)
   circuit_t *circ;
   size_t n_cells_removed=0, n_cells_to_remove;
   int n_circuits_killed=0;
+  struct timeval now;
   log_notice(LD_GENERAL, "We're low on memory.  Killing circuits with "
              "over-long queues. (This behavior is controlled by "
              "MaxMemInCellQueues.)");
 
   {
     size_t mem_target = (size_t)(get_options()->MaxMemInCellQueues *
-                                 FRACTION_OF_CIRCS_TO_RETAIN_ON_OOM);
+                                 FRACTION_OF_CELLS_TO_RETAIN_ON_OOM);
     size_t mem_to_recover;
     if (current_allocation <= mem_target)
       return;
@@ -1433,9 +1465,13 @@ circuits_handle_oom(size_t current_allocation)
   for (circ = global_circuitlist; circ; circ = circ->next)
     smartlist_add(circlist, circ);
 
+  /* Set circcomp_now_tmp so that the sort can work. */
+  tor_gettimeofday_cached(&now);
+  circcomp_now_tmp = (uint32_t)tv_to_msec(&now);
+
   /* This is O(n log n); there are faster algorithms we could use instead.
    * Let's hope this doesn't happen enough to be in the critical path. */
-  smartlist_sort(circlist, circuits_compare_by_queue_len_);
+  smartlist_sort(circlist, circuits_compare_by_oldest_queued_cell_);
 
   /* Okay, now the worst circuits are at the front of the list. Let's mark
    * them, and reclaim their storage aggressively. */

+ 5 - 0
src/or/or.h

@@ -912,8 +912,13 @@ typedef struct var_cell_t {
 typedef struct packed_cell_t {
   struct packed_cell_t *next; /**< Next cell queued on this circuit. */
   char body[CELL_NETWORK_SIZE]; /**< Cell as packed for network. */
+  uint32_t inserted_time; /**< Time (in milliseconds since epoch, with high
+                           * bits truncated) when this cell was inserted. */
 } packed_cell_t;
 
+/* XXXX This next structure may be obsoleted by inserted_time in
+ * packed_cell_t */
+
 /** Number of cells added to a circuit queue including their insertion
  * time on 10 millisecond detail; used for buffer statistics. */
 typedef struct insertion_time_elem_t {

+ 6 - 2
src/or/relay.c

@@ -1904,15 +1904,19 @@ cell_queue_append(cell_queue_t *queue, packed_cell_t *cell)
 void
 cell_queue_append_packed_copy(cell_queue_t *queue, const cell_t *cell)
 {
+  struct timeval now;
   packed_cell_t *copy = packed_cell_copy(cell);
+  tor_gettimeofday_cached(&now);
+  copy->inserted_time = (uint32_t)tv_to_msec(&now);
+
   /* Remember the time when this cell was put in the queue. */
+  /*XXXX This may be obsoleted by inserted_time */
   if (get_options()->CellStatistics) {
-    struct timeval now;
     uint32_t added;
     insertion_time_queue_t *it_queue = queue->insertion_times;
     if (!it_pool)
       it_pool = mp_pool_new(sizeof(insertion_time_elem_t), 1024);
-    tor_gettimeofday_cached(&now);
+
 #define SECONDS_IN_A_DAY 86400L
     added = (uint32_t)(((now.tv_sec % SECONDS_IN_A_DAY) * 100L)
             + ((uint32_t)now.tv_usec / (uint32_t)10000L));