Browse Source

Merge remote-tracking branch 'public/bug11553_025'

Nick Mathewson 10 years ago
parent
commit
5a9ac0df99
6 changed files with 89 additions and 30 deletions
  1. 15 0
      changes/bug11553
  2. 2 2
      src/or/channel.c
  3. 4 5
      src/or/channel.h
  4. 57 18
      src/or/circuitbuild.c
  5. 11 3
      src/or/circuitlist.c
  6. 0 2
      src/or/connection.c

+ 15 - 0
changes/bug11553

@@ -0,0 +1,15 @@
+  o Minor features:
+    - When we run out of usable circuit IDs on a channel, log only one
+      warning for the whole channel, and include a description of
+      how many circuits there were on the channel. Fix for part of ticket
+      #11553.
+
+
+  o Major features (performance):
+    - Avoid wasting cycles looking for usable circuit IDs. Previously,
+      when allocating a new circuit ID, we would in the worst case do a
+      linear scan over the entire possible range of circuit IDs before
+      deciding that we had exhausted our possibilities. Now, we
+      try 64 circuit IDs at random before deciding that we probably
+      won't succeed. Fix for a possible root cause of ticket
+      #11553.

+ 2 - 2
src/or/channel.c

@@ -728,8 +728,8 @@ channel_init(channel_t *chan)
   /* Init timestamp */
   chan->timestamp_last_added_nonpadding = time(NULL);
 
-  /* Init next_circ_id */
-  chan->next_circ_id = crypto_rand_int(1 << 15);
+  /* Warn about exhausted circuit IDs no more than hourly. */
+  chan->last_warned_circ_ids_exhausted.rate = 3600;
 
   /* Initialize queues. */
   TOR_SIMPLEQ_INIT(&chan->incoming_queue);

+ 4 - 5
src/or/channel.h

@@ -149,11 +149,6 @@ struct channel_s {
   circ_id_type_bitfield_t circ_id_type:2;
   /** DOCDOC*/
   unsigned wide_circ_ids:1;
-  /**
-   * Which circ_id do we try to use next on this connection?  This is
-   * always in the range 0..1<<15-1.
-   */
-  circid_t next_circ_id;
 
   /** For how many circuits are we n_chan?  What about p_chan? */
   unsigned int num_n_circuits, num_p_circuits;
@@ -182,6 +177,10 @@ struct channel_s {
    */
   unsigned int is_local:1;
 
+  /** Have we logged a warning about circID exhaustion on this channel?
+   * If so, when? */
+  ratelim_t last_warned_circ_ids_exhausted;
+
   /** Channel timestamps for cell channels */
   time_t timestamp_client; /* Client used this, according to relay.c */
   time_t timestamp_drained; /* Output queue empty */

+ 57 - 18
src/or/circuitbuild.c

@@ -77,18 +77,28 @@ channel_connect_for_circuit(const tor_addr_t *addr, uint16_t port,
   return chan;
 }
 
-/** Iterate over values of circ_id, starting from conn-\>next_circ_id,
- * and with the high bit specified by conn-\>circ_id_type, until we get
- * a circ_id that is not in use by any other circuit on that conn.
+
+/** Search for a value for circ_id that we can use on <b>chan</b> for an
+ * outbound circuit, until we get a circ_id that is not in use by any other
+ * circuit on that conn.
  *
  * Return it, or 0 if can't get a unique circ_id.
  */
 static circid_t
 get_unique_circ_id_by_chan(channel_t *chan)
 {
+/* This number is chosen somewhat arbitrarily; see comment below for more
+ * info.  When the space is 80% full, it gives a one-in-a-million failure
+ * chance; when the space is 90% full, it gives a one-in-850 chance; and when
+ * the space is 95% full, it gives a one-in-26 failure chance.  That seems
+ * okay, though you could make a case IMO for anything between N=32 and
+ * N=256. */
+#define MAX_CIRCID_ATTEMPTS 64
+  int in_use;
+  unsigned n_with_circ = 0, n_pending_destroy = 0;
   circid_t test_circ_id;
   circid_t attempts=0;
-  circid_t high_bit, max_range;
+  circid_t high_bit, max_range, mask;
 
   tor_assert(chan);
 
@@ -98,25 +108,52 @@ get_unique_circ_id_by_chan(channel_t *chan)
              "a client with no identity.");
     return 0;
   }
-  max_range =  (chan->wide_circ_ids) ? (1u<<31) : (1u<<15);
+  max_range = (chan->wide_circ_ids) ? (1u<<31) : (1u<<15);
+  mask = max_range - 1;
   high_bit = (chan->circ_id_type == CIRC_ID_TYPE_HIGHER) ? max_range : 0;
   do {
-    /* Sequentially iterate over test_circ_id=1...max_range until we find a
-     * circID such that (high_bit|test_circ_id) is not already used. */
-    test_circ_id = chan->next_circ_id++;
-    if (test_circ_id == 0 || test_circ_id >= max_range) {
-      test_circ_id = 1;
-      chan->next_circ_id = 2;
-    }
-    if (++attempts > max_range) {
-      /* Make sure we don't loop forever if all circ_id's are used. This
-       * matters because it's an external DoS opportunity.
+    if (++attempts > MAX_CIRCID_ATTEMPTS) {
+      /* Make sure we don't loop forever because all circuit IDs are used.
+       *
+       * Once, we would try until we had tried every possible circuit ID.  But
+       * that's quite expensive.  Instead, we try MAX_CIRCID_ATTEMPTS random
+       * circuit IDs, and then give up.
+       *
+       * This potentially causes us to give up early if our circuit ID space
+       * is nearly full.  If we have N circuit IDs in use, then we will reject
+       * a new circuit with probability (N / max_range) ^ MAX_CIRCID_ATTEMPTS.
+       * This means that in practice, a few percent of our circuit ID capacity
+       * will go unused.
+       *
+       * The alternative here, though, is to do a linear search over the
+       * whole circuit ID space every time we extend a circuit, which is
+       * not so great either.
        */
-      log_warn(LD_CIRC,"No unused circ IDs. Failing.");
+      log_fn_ratelim(&chan->last_warned_circ_ids_exhausted, LOG_WARN,
+                 LD_CIRC,"No unused circIDs found on channel %s wide "
+                 "circID support, with %u inbound and %u outbound circuits. "
+                 "Found %u circuit IDs in use by circuits, and %u with "
+                 "pending destroy cells."
+                 "Failing a circuit.",
+                 chan->wide_circ_ids ? "with" : "without",
+                 chan->num_p_circuits, chan->num_n_circuits,
+                 n_with_circ, n_pending_destroy);
       return 0;
     }
+
+    do {
+      crypto_rand((char*) &test_circ_id, sizeof(test_circ_id));
+      test_circ_id &= mask;
+    } while (test_circ_id == 0);
+
     test_circ_id |= high_bit;
-  } while (circuit_id_in_use_on_channel(test_circ_id, chan));
+
+    in_use = circuit_id_in_use_on_channel(test_circ_id, chan);
+    if (in_use == 1)
+      ++n_with_circ;
+    else if (in_use == 2)
+      ++n_pending_destroy;
+  } while (in_use);
   return test_circ_id;
 }
 
@@ -561,7 +598,9 @@ circuit_deliver_create_cell(circuit_t *circ, const create_cell_t *create_cell,
 
   id = get_unique_circ_id_by_chan(circ->n_chan);
   if (!id) {
-    log_warn(LD_CIRC,"failed to get unique circID.");
+    static ratelim_t circid_warning_limit = RATELIM_INIT(9600);
+    log_fn_ratelim(&circid_warning_limit, LOG_WARN, LD_CIRC,
+                   "failed to get unique circID.");
     return -1;
   }
   log_debug(LD_CIRC,"Chosen circID %u.", (unsigned)id);

+ 11 - 3
src/or/circuitlist.c

@@ -1065,13 +1065,21 @@ circuit_get_by_circid_channel_even_if_marked(circid_t circ_id,
 }
 
 /** Return true iff the circuit ID <b>circ_id</b> is currently used by a
- * circuit, marked or not, on <b>chan</b>. */
+ * circuit, marked or not, on <b>chan</b>, or if the circ ID is reserved until
+ * a queued destroy cell can be sent.
+ *
+ * (Return 1 if the circuit is present, marked or not; Return 2
+ * if the circuit ID is pending a destroy.)
+ **/
 int
 circuit_id_in_use_on_channel(circid_t circ_id, channel_t *chan)
 {
   int found = 0;
-  return circuit_get_by_circid_channel_impl(circ_id, chan, &found) != NULL
-    || found;
+  if (circuit_get_by_circid_channel_impl(circ_id, chan, &found) != NULL)
+    return 1;
+  if (found)
+    return 2;
+  return 0;
 }
 
 /** Return the circuit that a given edge connection is using. */

+ 0 - 2
src/or/connection.c

@@ -271,8 +271,6 @@ dir_connection_new(int socket_family)
  *
  * Set timestamp_last_added_nonpadding to now.
  *
- * Assign a pseudorandom next_circ_id between 0 and 2**15.
- *
  * Initialize active_circuit_pqueue.
  *
  * Set active_circuit_pqueue_last_recalibrated to current cell_ewma tick.