Browse Source

Bug 17604: Converge on only one long-lived TLS conn between relays.

Accomplished via the following:

1. Use NETINFO cells to determine if both peers will agree on canonical
   status. Prefer connections where they agree to those where they do not.
2. Alter channel_is_better() to prefer older orconns in the case of multiple
   canonical connections, and use the orconn with more circuits on it in case
   of age ties.

Also perform some hourly accounting on how many of these types of connections
there are and log it at info or notice level.
Mike Perry 8 years ago
parent
commit
76c9330f9d
6 changed files with 190 additions and 44 deletions
  1. 14 0
      changes/bug17604
  2. 108 29
      src/or/channel.c
  3. 5 3
      src/or/channel.h
  4. 42 0
      src/or/channeltls.c
  5. 5 9
      src/or/connection_or.c
  6. 16 3
      src/or/main.c

+ 14 - 0
changes/bug17604

@@ -0,0 +1,14 @@
+ o Minor bugfixes (connection usage)
+   - Use NETINFO cells to try to determine if both relays involved in
+     a connection will agree on the canonical status of that connection.
+     Prefer the connections where this is the case for extend cells,
+     and try to close connections where relays disagree on canonical
+     status early. Also, additionally alter the connection selection
+     logic to prefer the oldest valid connection for extend cells.
+     These two changes should reduce the number of long-term connections
+     that are kept open between relays. Fixes bug #17604.
+   - Relays will now log hourly statistics on the total number of
+     connections to other relays. If the number of connections per relay
+     unexpectedly large, this log message is at notice level. Otherwise
+     it is at info.
+

+ 108 - 29
src/or/channel.c

@@ -836,6 +836,83 @@ channel_next_with_rsa_identity(channel_t *chan)
   return TOR_LIST_NEXT(chan, next_with_same_id);
 }
 
+/**
+ * Relays run this once an hour to look over our list of channels to other
+ * relays. It prints out some statistics if there are multiple connections
+ * to many relays.
+ *
+ * This function is similar to connection_or_set_bad_connections(),
+ * and probably could be adapted to replace it, if it was modified to actually
+ * take action on any of these connections.
+ */
+void
+channel_check_for_duplicates(void)
+{
+  channel_idmap_entry_t **iter;
+  channel_t *chan;
+  int total_relay_connections = 0, total_relays = 0, total_canonical = 0;
+  int total_half_canonical = 0;
+  int total_gt_one_connection = 0, total_gt_two_connections = 0;
+  int total_gt_four_connections = 0;
+
+  HT_FOREACH(iter, channel_idmap, &channel_identity_map) {
+    int connections_to_relay = 0;
+
+    /* Only consider relay connections */
+    if (!connection_or_digest_is_known_relay((char*)(*iter)->digest))
+      continue;
+
+    total_relays++;
+
+    for (chan = TOR_LIST_FIRST(&(*iter)->channel_list); chan;
+        chan = channel_next_with_rsa_identity(chan)) {
+
+      if (CHANNEL_CONDEMNED(chan) || !CHANNEL_IS_OPEN(chan))
+        continue;
+
+      connections_to_relay++;
+      total_relay_connections++;
+
+      if (chan->is_canonical(chan, 0)) total_canonical++;
+
+      if (!chan->is_canonical_to_peer && chan->is_canonical(chan, 0)
+          && chan->is_canonical(chan, 1)) {
+        total_half_canonical++;
+      }
+    }
+
+    if (connections_to_relay > 1) total_gt_one_connection++;
+    if (connections_to_relay > 2) total_gt_two_connections++;
+    if (connections_to_relay > 4) total_gt_four_connections++;
+  }
+
+#define MIN_RELAY_CONNECTIONS_TO_WARN 5
+
+  /* If we average 1.5 or more connections per relay, something is wrong */
+  if (total_relays > MIN_RELAY_CONNECTIONS_TO_WARN &&
+          total_relay_connections >= 1.5*total_relays) {
+    log_notice(LD_OR,
+        "Your relay has a very large number of connections to other relays. "
+        "Is your outbound address the same as your relay address? "
+        "Found %d connections to %d relays. Found %d current canonical "
+        "connections, in %d of which we were a non-canonical peer. "
+        "%d relays had more than 1 connection, %d had more than 2, and "
+        "%d had more than 4 connections.",
+        total_relay_connections, total_relays, total_canonical,
+        total_half_canonical, total_gt_one_connection,
+        total_gt_two_connections, total_gt_four_connections);
+  } else {
+    log_info(LD_OR, "Performed connection pruning. "
+        "Found %d connections to %d relays. Found %d current canonical "
+        "connections, in %d of which we were a non-canonical peer. "
+        "%d relays had more than 1 connection, %d had more than 2, and "
+        "%d had more than 4 connections.",
+        total_relay_connections, total_relays, total_canonical,
+        total_half_canonical, total_gt_one_connection,
+        total_gt_two_connections, total_gt_four_connections);
+  }
+}
+
 /**
  * Initialize a channel
  *
@@ -3323,22 +3400,20 @@ channel_connect(const tor_addr_t *addr, uint16_t port,
  */
 
 int
-channel_is_better(time_t now, channel_t *a, channel_t *b,
-                  int forgive_new_connections)
+channel_is_better(channel_t *a, channel_t *b)
 {
-  int a_grace, b_grace;
   int a_is_canonical, b_is_canonical;
-  int a_has_circs, b_has_circs;
-
-  /*
-   * Do not definitively deprecate a new channel with no circuits on it
-   * until this much time has passed.
-   */
-#define NEW_CHAN_GRACE_PERIOD (15*60)
 
   tor_assert(a);
   tor_assert(b);
 
+  /* If one channel is bad for new circuits, and the other isn't,
+   * use the one that is still good. */
+  if (!channel_is_bad_for_new_circs(a) && channel_is_bad_for_new_circs(b))
+    return 1;
+  if (channel_is_bad_for_new_circs(a) && !channel_is_bad_for_new_circs(b))
+    return 0;
+
   /* Check if one is canonical and the other isn't first */
   a_is_canonical = channel_is_canonical(a);
   b_is_canonical = channel_is_canonical(b);
@@ -3346,26 +3421,31 @@ channel_is_better(time_t now, channel_t *a, channel_t *b,
   if (a_is_canonical && !b_is_canonical) return 1;
   if (!a_is_canonical && b_is_canonical) return 0;
 
+  /* Check if we suspect that one of the channels will be preferred
+   * by the peer */
+  if (a->is_canonical_to_peer && !b->is_canonical_to_peer) return 1;
+  if (!a->is_canonical_to_peer && b->is_canonical_to_peer) return 0;
+
   /*
-   * Okay, if we're here they tied on canonicity. Next we check if
-   * they have any circuits, and if one does and the other doesn't,
-   * we prefer the one that does, unless we are forgiving and the
-   * one that has no circuits is in its grace period.
+   * Okay, if we're here they tied on canonicity, the prefer the older
+   * connection, so that the adversary can't create a new connection
+   * and try to switch us over to it (which will leak information
+   * about long-lived circuits). Additionally, switching connections
+   * too often makes us more vulnerable to attacks like Torscan and
+   * passive netflow-based equivalents.
+   *
+   * Connections will still only live for at most a week, due to
+   * the check in connection_or_group_set_badness() against
+   * TIME_BEFORE_OR_CONN_IS_TOO_OLD, which marks old connections as
+   * unusable for new circuits after 1 week. That check sets
+   * is_bad_for_new_circs, which is checked in channel_get_for_extend().
+   *
+   * We check channel_is_bad_for_new_circs() above here anyway, for safety.
    */
+  if (channel_when_created(a) < channel_when_created(b)) return 1;
+  else if (channel_when_created(a) > channel_when_created(b)) return 0;
 
-  a_has_circs = (channel_num_circuits(a) > 0);
-  b_has_circs = (channel_num_circuits(b) > 0);
-  a_grace = (forgive_new_connections &&
-             (now < channel_when_created(a) + NEW_CHAN_GRACE_PERIOD));
-  b_grace = (forgive_new_connections &&
-             (now < channel_when_created(b) + NEW_CHAN_GRACE_PERIOD));
-
-  if (a_has_circs && !b_has_circs && !b_grace) return 1;
-  if (!a_has_circs && b_has_circs && !a_grace) return 0;
-
-  /* They tied on circuits too; just prefer whichever is newer */
-
-  if (channel_when_created(a) > channel_when_created(b)) return 1;
+  if (channel_num_circuits(a) > channel_num_circuits(b)) return 1;
   else return 0;
 }
 
@@ -3390,7 +3470,6 @@ channel_get_for_extend(const char *rsa_id_digest,
   channel_t *chan, *best = NULL;
   int n_inprogress_goodaddr = 0, n_old = 0;
   int n_noncanonical = 0, n_possible = 0;
-  time_t now = approx_time();
 
   tor_assert(msg_out);
   tor_assert(launch_out);
@@ -3460,7 +3539,7 @@ channel_get_for_extend(const char *rsa_id_digest,
       continue;
     }
 
-    if (channel_is_better(now, chan, best, 0))
+    if (channel_is_better(chan, best))
       best = chan;
   }
 

+ 5 - 3
src/or/channel.h

@@ -85,6 +85,9 @@ struct channel_s {
   /** Is there a pending netflow padding callback? */
   unsigned int pending_padding_callback:1;
 
+  /** Is our peer likely to consider this channel canonical? */
+  unsigned int is_canonical_to_peer:1;
+
   /** Has this channel ever been used for non-directory traffic?
    * Used to decide what channels to pad, and when. */
   channel_usage_info_t channel_usage;
@@ -599,9 +602,7 @@ channel_t * channel_get_for_extend(const char *rsa_id_digest,
                                    int *launch_out);
 
 /* Ask which of two channels is better for circuit-extension purposes */
-int channel_is_better(time_t now,
-                      channel_t *a, channel_t *b,
-                      int forgive_new_connections);
+int channel_is_better(channel_t *a, channel_t *b);
 
 /** Channel lookups
  */
@@ -684,6 +685,7 @@ void channel_listener_dump_statistics(channel_listener_t *chan_l,
                                       int severity);
 void channel_listener_dump_transport_statistics(channel_listener_t *chan_l,
                                                 int severity);
+void channel_check_for_duplicates(void);
 
 void channel_update_bad_for_new_circs(const char *digest, int force);
 

+ 42 - 0
src/or/channeltls.c

@@ -739,6 +739,15 @@ channel_tls_matches_target_method(channel_t *chan,
     return 0;
   }
 
+  /* real_addr is the address this connection came from.
+   * base_.addr is updated by connection_or_init_conn_from_address()
+   * to be the address in the descriptor. It may be tempting to
+   * allow either address to be allowed, but if we did so, it would
+   * enable someone who steals a relay's keys to impersonate/MITM it
+   * from anywhere on the Internet! (Because they could make long-lived
+   * TLS connections from anywhere to all relays, and wait for them to
+   * be used for extends).
+   */
   return tor_addr_eq(&(tlschan->conn->real_addr), target);
 }
 
@@ -1667,6 +1676,7 @@ channel_tls_process_netinfo_cell(cell_t *cell, channel_tls_t *chan)
   const uint8_t *cp, *end;
   uint8_t n_other_addrs;
   time_t now = time(NULL);
+  const routerinfo_t *me = router_get_my_routerinfo();
 
   long apparent_skew = 0;
   tor_addr_t my_apparent_addr = TOR_ADDR_NULL;
@@ -1745,8 +1755,20 @@ channel_tls_process_netinfo_cell(cell_t *cell, channel_tls_t *chan)
 
   if (my_addr_type == RESOLVED_TYPE_IPV4 && my_addr_len == 4) {
     tor_addr_from_ipv4n(&my_apparent_addr, get_uint32(my_addr_ptr));
+
+    if (!get_options()->BridgeRelay && me &&
+        get_uint32(my_addr_ptr) == htonl(me->addr)) {
+      chan->base_.is_canonical_to_peer = 1;
+    }
+
   } else if (my_addr_type == RESOLVED_TYPE_IPV6 && my_addr_len == 16) {
     tor_addr_from_ipv6_bytes(&my_apparent_addr, (const char *) my_addr_ptr);
+
+    if (!get_options()->BridgeRelay && me &&
+        !tor_addr_is_null(&me->ipv6_addr) &&
+        tor_addr_eq(&my_apparent_addr, &me->ipv6_addr)) {
+      chan->base_.is_canonical_to_peer = 1;
+    }
   }
 
   n_other_addrs = (uint8_t) *cp++;
@@ -1762,6 +1784,14 @@ channel_tls_process_netinfo_cell(cell_t *cell, channel_tls_t *chan)
       connection_or_close_for_error(chan->conn, 0);
       return;
     }
+    /* A relay can connect from anywhere and be canonical, so
+     * long as it tells you from where it came. This may be a bit
+     * concerning.. Luckily we have another check in
+     * channel_tls_matches_target_method() to ensure that extends
+     * only go to the IP they ask for.
+     *
+     * XXX: Bleh. That check is not used if the connection is canonical.
+     */
     if (tor_addr_eq(&addr, &(chan->conn->real_addr))) {
       connection_or_set_canonical(chan->conn, 1);
       break;
@@ -1770,6 +1800,18 @@ channel_tls_process_netinfo_cell(cell_t *cell, channel_tls_t *chan)
     --n_other_addrs;
   }
 
+  if (me && !chan->base_.is_canonical_to_peer && chan->conn->is_canonical) {
+    log_info(LD_OR,
+             "We made a connection to a relay at %s (fp=%s) but we think "
+             "they will not consider this connection canonical. They "
+             "think we are at %s, but we think its %s.",
+             safe_str(chan->base_.get_remote_descr(&chan->base_, 0)),
+             safe_str(hex_str(chan->conn->identity_digest, DIGEST_LEN)),
+             safe_str(tor_addr_is_null(&my_apparent_addr) ?
+             "<none>" : fmt_and_decorate_addr(&my_apparent_addr)),
+             safe_str(fmt_addr32(me->addr)));
+  }
+
   /* Act on apparent skew. */
   /** Warn when we get a netinfo skew with at least this value. */
 #define NETINFO_NOTICE_SKEW 3600

+ 5 - 9
src/or/connection_or.c

@@ -1040,10 +1040,8 @@ connection_or_group_set_badness_(smartlist_t *group, int force)
     }
 
     if (!best ||
-        channel_is_better(now,
-                          TLS_CHAN_TO_BASE(or_conn->chan),
-                          TLS_CHAN_TO_BASE(best->chan),
-                          0)) {
+        channel_is_better(TLS_CHAN_TO_BASE(or_conn->chan),
+                          TLS_CHAN_TO_BASE(best->chan))) {
       best = or_conn;
     }
   } SMARTLIST_FOREACH_END(or_conn);
@@ -1071,11 +1069,9 @@ connection_or_group_set_badness_(smartlist_t *group, int force)
         or_conn->base_.state != OR_CONN_STATE_OPEN)
       continue;
     if (or_conn != best &&
-        channel_is_better(now,
-                          TLS_CHAN_TO_BASE(best->chan),
-                          TLS_CHAN_TO_BASE(or_conn->chan), 1)) {
-      /* This isn't the best conn, _and_ the best conn is better than it,
-         even when we're being forgiving. */
+        channel_is_better(TLS_CHAN_TO_BASE(best->chan),
+                          TLS_CHAN_TO_BASE(or_conn->chan))) {
+      /* This isn't the best conn, _and_ the best conn is better than it */
       if (best->is_canonical) {
         log_info(LD_OR,
                  "Marking OR conn to %s:%d as unsuitable for new circuits: "

+ 16 - 3
src/or/main.c

@@ -1189,6 +1189,7 @@ CALLBACK(write_bridge_ns);
 CALLBACK(check_fw_helper_app);
 CALLBACK(heartbeat);
 CALLBACK(reset_padding_counts);
+CALLBACK(check_canonical_channels);
 
 #undef CALLBACK
 
@@ -1221,6 +1222,7 @@ static periodic_event_item_t periodic_events[] = {
   CALLBACK(check_fw_helper_app),
   CALLBACK(heartbeat),
   CALLBACK(reset_padding_counts),
+  CALLBACK(check_canonical_channels),
   END_OF_PERIODIC_EVENTS
 };
 #undef CALLBACK
@@ -1726,9 +1728,17 @@ write_stats_file_callback(time_t now, const or_options_t *options)
   return safe_timer_diff(now, next_time_to_write_stats_files);
 }
 
-/**
- * Periodic callback: Write bridge statistics to disk if appropriate.
- */
+#define CHANNEL_CHECK_INTERVAL (60*60)
+static int
+check_canonical_channels_callback(time_t now, const or_options_t *options)
+{
+  (void)now;
+  if (public_server_mode(options))
+    channel_check_for_duplicates();
+
+  return CHANNEL_CHECK_INTERVAL;
+}
+
 static int
 reset_padding_counts_callback(time_t now, const or_options_t *options)
 {
@@ -1740,6 +1750,9 @@ reset_padding_counts_callback(time_t now, const or_options_t *options)
   return REPHIST_CELL_PADDING_COUNTS_INTERVAL;
 }
 
+/**
+ * Periodic callback: Write bridge statistics to disk if appropriate.
+ */
 static int
 record_bridge_stats_callback(time_t now, const or_options_t *options)
 {