Browse Source

Merge remote-tracking branch 'public/ticket6799_024_v2_squashed'

Conflicts:
	src/or/channel.c
	src/or/circuitlist.c
	src/or/connection.c

Conflicts involved removal of next_circ_id and addition of
unusable-circid tracking.
Nick Mathewson 10 years ago
parent
commit
3a2e25969f
10 changed files with 107 additions and 39 deletions
  1. 20 0
      changes/bug6799
  2. 5 6
      src/or/channel.c
  3. 4 2
      src/or/channel.h
  4. 1 1
      src/or/channeltls.c
  5. 8 2
      src/or/circuitlist.c
  6. 1 3
      src/or/connection.c
  7. 40 6
      src/or/connection_or.c
  8. 2 0
      src/or/connection_or.h
  9. 22 16
      src/or/main.c
  10. 4 3
      src/or/or.h

+ 20 - 0
changes/bug6799

@@ -0,0 +1,20 @@
+  o Major features:
+
+    - Increase the base amount of time that a canonical connection
+      (one that we have made to a known OR) is allowed to stay idle
+      from 3 minutes to 15 minutes.  This leaks less information
+      about when circuits have closed, and avoids unnecessary overhead
+      from renegotiating connections. Part of a fix for ticket 6799.
+
+    - Instead of closing connections after they have been idle for a
+      fixed interval, randomly add up to 50% to each connection's
+      maximum timeout. This makes it harder to tell when the last
+      circuit closed by looking at when a connection closes. Part of a
+      fix for ticket 6799.
+
+    - Base connection idleness tests on the actual time elapsed since
+      the connection last had circuits, not on the time when we last
+      added non-padding. This change also makes it harder for an
+      observer to tell when the last circuit closed by looking at when
+      a connection closes. Part of a fix for ticket 6799.
+      Incidentally fixes bug 12023; bugfix on 0.2.5.1-alpha.

+ 5 - 6
src/or/channel.c

@@ -112,7 +112,9 @@ HT_GENERATE(channel_idmap, channel_idmap_entry_s, node, channel_idmap_hash,
 
 static cell_queue_entry_t * cell_queue_entry_dup(cell_queue_entry_t *q);
 static void cell_queue_entry_free(cell_queue_entry_t *q, int handed_off);
+#if 0
 static int cell_queue_entry_is_padding(cell_queue_entry_t *q);
+#endif
 static cell_queue_entry_t *
 cell_queue_entry_new_fixed(cell_t *cell);
 static cell_queue_entry_t *
@@ -726,7 +728,7 @@ channel_init(channel_t *chan)
   chan->global_identifier = n_channels_allocated++;
 
   /* Init timestamp */
-  chan->timestamp_last_added_nonpadding = time(NULL);
+  chan->timestamp_last_had_circuits = time(NULL);
 
   /* Warn about exhausted circuit IDs no more than hourly. */
   chan->last_warned_circ_ids_exhausted.rate = 3600;
@@ -1595,6 +1597,7 @@ cell_queue_entry_free(cell_queue_entry_t *q, int handed_off)
   tor_free(q);
 }
 
+#if 0
 /**
  * Check whether a cell queue entry is padding; this is a helper function
  * for channel_write_cell_queue_entry()
@@ -1623,6 +1626,7 @@ cell_queue_entry_is_padding(cell_queue_entry_t *q)
 
   return 0;
 }
+#endif
 
 /**
  * Allocate a new cell queue entry for a fixed-size cell
@@ -1681,11 +1685,6 @@ channel_write_cell_queue_entry(channel_t *chan, cell_queue_entry_t *q)
              chan->state == CHANNEL_STATE_OPEN ||
              chan->state == CHANNEL_STATE_MAINT);
 
-  /* Increment the timestamp unless it's padding */
-  if (!cell_queue_entry_is_padding(q)) {
-    chan->timestamp_last_added_nonpadding = approx_time();
-  }
-
   {
     circid_t circ_id;
     if (is_destroy_cell(chan, q, &circ_id)) {

+ 4 - 2
src/or/channel.h

@@ -187,8 +187,10 @@ struct channel_s {
   time_t timestamp_recv; /* Cell received from lower layer */
   time_t timestamp_xmit; /* Cell sent to lower layer */
 
-  /* Timestamp for relay.c */
-  time_t timestamp_last_added_nonpadding;
+  /** Timestamp for run_connection_housekeeping(). We update this once a
+   * second when we run housekeeping and find a circuit on this channel, and
+   * whenever we add a circuit to the channel. */
+  time_t timestamp_last_had_circuits;
 
   /** Unique ID for measuring direct network status requests;vtunneled ones
    * come over a circuit_t, which has a dirreq_id field as well, but is a

+ 1 - 1
src/or/channeltls.c

@@ -1554,7 +1554,7 @@ channel_tls_process_netinfo_cell(cell_t *cell, channel_tls_t *chan)
       return;
     }
     if (tor_addr_eq(&addr, &(chan->conn->real_addr))) {
-      chan->conn->is_canonical = 1;
+      connection_or_set_canonical(chan->conn, 1);
       break;
     }
     cp = next;

+ 8 - 2
src/or/circuitlist.c

@@ -328,10 +328,13 @@ circuit_set_p_circid_chan(or_circuit_t *or_circ, circid_t id,
 
   circuit_set_circid_chan_helper(circ, CELL_DIRECTION_IN, id, chan);
 
-  if (chan)
+  if (chan) {
     tor_assert(bool_eq(or_circ->p_chan_cells.n,
                        or_circ->next_active_on_p_chan));
 
+    chan->timestamp_last_had_circuits = approx_time();
+  }
+
   if (circ->p_delete_pending && old_chan) {
     channel_mark_circid_unusable(old_chan, old_id);
     circ->p_delete_pending = 0;
@@ -350,9 +353,12 @@ circuit_set_n_circid_chan(circuit_t *circ, circid_t id,
 
   circuit_set_circid_chan_helper(circ, CELL_DIRECTION_OUT, id, chan);
 
-  if (chan)
+  if (chan) {
     tor_assert(bool_eq(circ->n_chan_cells.n, circ->next_active_on_n_chan));
 
+    chan->timestamp_last_had_circuits = approx_time();
+  }
+
   if (circ->n_delete_pending && old_chan) {
     channel_mark_circid_unusable(old_chan, old_id);
     circ->n_delete_pending = 0;

+ 1 - 3
src/or/connection.c

@@ -269,8 +269,6 @@ dir_connection_new(int socket_family)
 /** Allocate and return a new or_connection_t, initialized as by
  * connection_init().
  *
- * Set timestamp_last_added_nonpadding to now.
- *
  * Initialize active_circuit_pqueue.
  *
  * Set active_circuit_pqueue_last_recalibrated to current cell_ewma tick.
@@ -283,7 +281,7 @@ or_connection_new(int type, int socket_family)
   tor_assert(type == CONN_TYPE_OR || type == CONN_TYPE_EXT_OR);
   connection_init(now, TO_CONN(or_conn), type, socket_family);
 
-  or_conn->timestamp_last_added_nonpadding = time(NULL);
+  connection_or_set_canonical(or_conn, 0);
 
   if (type == CONN_TYPE_EXT_OR)
     connection_or_set_ext_or_identifier(or_conn);

+ 40 - 6
src/or/connection_or.c

@@ -826,6 +826,45 @@ connection_or_update_token_buckets(smartlist_t *conns,
   });
 }
 
+/** How long do we wait before killing non-canonical OR connections with no
+ * circuits?  In Tor versions up to 0.2.1.25 and 0.2.2.12-alpha, we waited 15
+ * minutes before cancelling these connections, which caused fast relays to
+ * accrue many many idle connections. Hopefully 3-4.5 minutes is low enough
+ * that it kills most idle connections, without being so low that we cause
+ * clients to bounce on and off.
+ *
+ * For canonical connections, the limit is higher, at 15-22.5 minutes.
+ *
+ * For each OR connection, we randomly add up to 50% extra to its idle_timeout
+ * field, to avoid exposing when exactly the last circuit closed.  Since we're
+ * storing idle_timeout in a uint16_t, don't let these values get higher than
+ * 12 hours or so without revising connection_or_set_canonical and/or expanding
+ * idle_timeout.
+ */
+#define IDLE_OR_CONN_TIMEOUT_NONCANONICAL 180
+#define IDLE_OR_CONN_TIMEOUT_CANONICAL 900
+
+/* Mark <b>or_conn</b> as canonical if <b>is_canonical</b> is set, and
+ * non-canonical otherwise. Adjust idle_timeout accordingly.
+ */
+void
+connection_or_set_canonical(or_connection_t *or_conn,
+                            int is_canonical)
+{
+  const unsigned int timeout_base = is_canonical ?
+    IDLE_OR_CONN_TIMEOUT_CANONICAL : IDLE_OR_CONN_TIMEOUT_NONCANONICAL;
+
+  if (bool_eq(is_canonical, or_conn->is_canonical) &&
+      or_conn->idle_timeout != 0) {
+    /* Don't recalculate an existing idle_timeout unless the canonical
+     * status changed. */
+    return;
+  }
+
+  or_conn->is_canonical = !! is_canonical; /* force to a 1-bit boolean */
+  or_conn->idle_timeout = timeout_base + crypto_rand_int(timeout_base / 2);
+}
+
 /** If we don't necessarily know the router we're connecting to, but we
  * have an addr/port/id_digest, then fill in as much as we can. Start
  * by checking to see if this describes a router we know.
@@ -850,7 +889,7 @@ connection_or_init_conn_from_address(or_connection_t *conn,
     /* XXXX proposal 186 is making this more complex.  For now, a conn
        is canonical when it uses the _preferred_ address. */
     if (tor_addr_eq(&conn->base_.addr, &node_ap.addr))
-      conn->is_canonical = 1;
+      connection_or_set_canonical(conn, 1);
     if (!started_here) {
       /* Override the addr/port, so our log messages will make sense.
        * This is dangerous, since if we ever try looking up a conn by
@@ -1966,9 +2005,6 @@ connection_or_write_cell_to_buf(const cell_t *cell, or_connection_t *conn)
 
   if (conn->base_.state == OR_CONN_STATE_OR_HANDSHAKING_V3)
     or_handshake_state_record_cell(conn, conn->handshake_state, cell, 0);
-
-  if (cell->command != CELL_PADDING)
-    conn->timestamp_last_added_nonpadding = approx_time();
 }
 
 /** Pack a variable-length <b>cell</b> into wire-format, and write it onto
@@ -1989,8 +2025,6 @@ connection_or_write_var_cell_to_buf(const var_cell_t *cell,
                           cell->payload_len, TO_CONN(conn));
   if (conn->base_.state == OR_CONN_STATE_OR_HANDSHAKING_V3)
     or_handshake_state_record_var_cell(conn, conn->handshake_state, cell, 0);
-  if (cell->command != CELL_PADDING)
-    conn->timestamp_last_added_nonpadding = approx_time();
 
   /* Touch the channel's active timestamp if there is one */
   if (conn->chan)

+ 2 - 0
src/or/connection_or.h

@@ -48,6 +48,8 @@ void connection_or_report_broken_states(int severity, int domain);
 MOCK_DECL(int,connection_tls_start_handshake,(or_connection_t *conn,
                                               int receiving));
 int connection_tls_continue_handshake(or_connection_t *conn);
+void connection_or_set_canonical(or_connection_t *or_conn,
+                                 int is_canonical);
 
 int connection_init_or_handshake_state(or_connection_t *conn,
                                        int started_here);

+ 22 - 16
src/or/main.c

@@ -1001,15 +1001,6 @@ directory_info_has_arrived(time_t now, int from_cache)
     consider_testing_reachability(1, 1);
 }
 
-/** How long do we wait before killing OR connections with no circuits?
- * In Tor versions up to 0.2.1.25 and 0.2.2.12-alpha, we waited 15 minutes
- * before cancelling these connections, which caused fast relays to accrue
- * many many idle connections. Hopefully 3 minutes is low enough that
- * it kills most idle connections, without being so low that we cause
- * clients to bounce on and off.
- */
-#define IDLE_OR_CONN_TIMEOUT 180
-
 /** Perform regular maintenance tasks for a single connection.  This
  * function gets run once per second per connection by run_scheduled_events.
  */
@@ -1020,6 +1011,8 @@ run_connection_housekeeping(int i, time_t now)
   connection_t *conn = smartlist_get(connection_array, i);
   const or_options_t *options = get_options();
   or_connection_t *or_conn;
+  channel_t *chan = NULL;
+  int have_any_circuits;
   int past_keepalive =
     now >= conn->timestamp_lastwritten + options->KeepalivePeriod;
 
@@ -1069,8 +1062,19 @@ run_connection_housekeeping(int i, time_t now)
   tor_assert(conn->outbuf);
 #endif
 
+  chan = TLS_CHAN_TO_BASE(or_conn->chan);
+  tor_assert(chan);
+
+
+  if (channel_num_circuits(chan) != 0) {
+    have_any_circuits = 1;
+    chan->timestamp_last_had_circuits = now;
+  } else {
+    have_any_circuits = 0;
+  }
+
   if (channel_is_bad_for_new_circs(TLS_CHAN_TO_BASE(or_conn->chan)) &&
-      !connection_or_get_num_circuits(or_conn)) {
+      ! have_any_circuits) {
     /* It's bad for new circuits, and has no unmarked circuits on it:
      * mark it now. */
     log_info(LD_OR,
@@ -1089,19 +1093,21 @@ run_connection_housekeeping(int i, time_t now)
       connection_or_close_normally(TO_OR_CONN(conn), 0);
     }
   } else if (we_are_hibernating() &&
-             !connection_or_get_num_circuits(or_conn) &&
+             ! have_any_circuits &&
              !connection_get_outbuf_len(conn)) {
     /* We're hibernating, there's no circuits, and nothing to flush.*/
     log_info(LD_OR,"Expiring non-used OR connection to fd %d (%s:%d) "
              "[Hibernating or exiting].",
              (int)conn->s,conn->address, conn->port);
     connection_or_close_normally(TO_OR_CONN(conn), 1);
-  } else if (!connection_or_get_num_circuits(or_conn) &&
-             now >= or_conn->timestamp_last_added_nonpadding +
-                                         IDLE_OR_CONN_TIMEOUT) {
+  } else if (!have_any_circuits &&
+             now - or_conn->idle_timeout >= chan->timestamp_last_had_circuits) {
     log_info(LD_OR,"Expiring non-used OR connection to fd %d (%s:%d) "
-             "[idle %d].", (int)conn->s,conn->address, conn->port,
-             (int)(now - or_conn->timestamp_last_added_nonpadding));
+             "[no circuits for %d; timeout %d; %scanonical].",
+             (int)conn->s, conn->address, conn->port,
+             (int)(now - chan->timestamp_last_had_circuits),
+             or_conn->idle_timeout,
+             or_conn->is_canonical ? "" : "non");
     connection_or_close_normally(TO_OR_CONN(conn), 0);
   } else if (
       now >= or_conn->timestamp_lastempty + options->KeepalivePeriod*10 &&

+ 4 - 3
src/or/or.h

@@ -1488,13 +1488,14 @@ typedef struct or_connection_t {
 
   uint16_t link_proto; /**< What protocol version are we using? 0 for
                         * "none negotiated yet." */
-
+  uint16_t idle_timeout; /**< How long can this connection sit with no
+                          * circuits on it before we close it? Based on
+                          * IDLE_CIRCUIT_TIMEOUT_{NON,}CANONICAL and
+                          * on is_canonical, randomized. */
   or_handshake_state_t *handshake_state; /**< If we are setting this connection
                                           * up, state information to do so. */
 
   time_t timestamp_lastempty; /**< When was the outbuf last completely empty?*/
-  time_t timestamp_last_added_nonpadding; /** When did we last add a
-                                           * non-padding cell to the outbuf? */
 
   /* bandwidth* and *_bucket only used by ORs in OPEN state: */
   int bandwidthrate; /**< Bytes/s added to the bucket. (OPEN ORs only.) */