7 years ago · e0bbef48bf
--- a/changes/bug24767
+++ b/changes/bug24767
@@ -0,0 +1,5 @@
 
				+  o Major bugfixes (relay, connection):
			
 
				+    - Refuse to connect again to a relay from which we failed previously with
			
 
				+      a connection refused, timeout or error (at the TCP level). The relay
			
 
				+      won't be retried for 60 seconds after the failure occured. Fixes bug
			
 
				+      24767; bugfix on 0.0.6.
			
--- a/src/or/connection_or.c
+++ b/src/or/connection_or.c
@@ -28,6 +28,7 @@
 
				  * part of a subclass (channel_tls_t).
			
 
				  */
			
 
				 #define TOR_CHANNEL_INTERNAL_
			
 
				+#define CONNECTION_OR_PRIVATE
			
 
				 #include "channel.h"
			
 
				 #include "channeltls.h"
			
 
				 #include "circuitbuild.h"
			
@@ -1122,6 +1123,216 @@ connection_or_group_set_badness_(smartlist_t *group, int force)
 
				   } SMARTLIST_FOREACH_END(or_conn);
			
 
				 }
			
 
				 
			
 
				+/* Lifetime of a connection failure. After that, we'll retry. This is in
			
 
				+ * seconds. */
			
 
				+#define OR_CONNECT_FAILURE_LIFETIME 60
			
 
				+/* The interval to use with when to clean up the failure cache. */
			
 
				+#define OR_CONNECT_FAILURE_CLEANUP_INTERVAL 60
			
 
				+
			
 
				+/* When is the next time we have to cleanup the failure map. We keep this
			
 
				+ * because we clean it opportunistically. */
			
 
				+static time_t or_connect_failure_map_next_cleanup_ts = 0;
			
 
				+
			
 
				+/* OR connection failure entry data structure. It is kept in the connection
			
 
				+ * failure map defined below and indexed by OR identity digest, address and
			
 
				+ * port.
			
 
				+ *
			
 
				+ * We need to identify a connection failure with these three values because we
			
 
				+ * want to avoid to wrongfully blacklist a relay if someone is trying to
			
 
				+ * extend to a known identity digest but with the wrong IP/port. For instance,
			
 
				+ * it can happen if a relay changed its port but the client still has an old
			
 
				+ * descriptor with the old port. We want to stop connecting to that
			
 
				+ * IP/port/identity all together, not only the relay identity. */
			
 
				+typedef struct or_connect_failure_entry_t {
			
 
				+  HT_ENTRY(or_connect_failure_entry_t) node;
			
 
				+  /* Identity digest of the connection where it is connecting to. */
			
 
				+  uint8_t identity_digest[DIGEST_LEN];
			
 
				+  /* This is the connection address from the base connection_t. After the
			
 
				+   * connection is checked for canonicity, the base address should represent
			
 
				+   * what we know instead of where we are connecting to. This is what we need
			
 
				+   * so we can correlate known relays within the consensus. */
			
 
				+  tor_addr_t addr;
			
 
				+  uint16_t port;
			
 
				+  /* Last time we were unable to connect. */
			
 
				+  time_t last_failed_connect_ts;
			
 
				+} or_connect_failure_entry_t;
			
 
				+
			
 
				+/* Map where we keep connection failure entries. They are indexed by addr,
			
 
				+ * port and identity digest. */
			
 
				+static HT_HEAD(or_connect_failure_ht, or_connect_failure_entry_t)
			
 
				+       or_connect_failures_map = HT_INITIALIZER();
			
 
				+
			
 
				+/* Helper: Hashtable equal function. Return 1 if equal else 0. */
			
 
				+static int
			
 
				+or_connect_failure_ht_eq(const or_connect_failure_entry_t *a,
			
 
				+                         const or_connect_failure_entry_t *b)
			
 
				+{
			
 
				+  return fast_memeq(a->identity_digest, b->identity_digest, DIGEST_LEN) &&
			
 
				+         tor_addr_eq(&a->addr, &b->addr) &&
			
 
				+         a->port == b->port;
			
 
				+}
			
 
				+
			
 
				+/* Helper: Return the hash for the hashtable of the given entry. For this
			
 
				+ * table, it is a combination of address, port and identity digest. */
			
 
				+static unsigned int
			
 
				+or_connect_failure_ht_hash(const or_connect_failure_entry_t *entry)
			
 
				+{
			
 
				+  size_t offset = 0, addr_size;
			
 
				+  const void *addr_ptr;
			
 
				+  /* Largest size is IPv6 and IPv4 is smaller so it is fine. */
			
 
				+  uint8_t data[16 + sizeof(uint16_t) + DIGEST_LEN];
			
 
				+
			
 
				+  /* Get the right address bytes depending on the family. */
			
 
				+  switch (tor_addr_family(&entry->addr)) {
			
 
				+  case AF_INET:
			
 
				+    addr_size = 4;
			
 
				+    addr_ptr = &entry->addr.addr.in_addr.s_addr;
			
 
				+    break;
			
 
				+  case AF_INET6:
			
 
				+    addr_size = 16;
			
 
				+    addr_ptr = &entry->addr.addr.in6_addr.s6_addr;
			
 
				+    break;
			
 
				+  default:
			
 
				+    tor_assert_nonfatal_unreached();
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  memcpy(data, addr_ptr, addr_size);
			
 
				+  offset += addr_size;
			
 
				+  memcpy(data + offset, entry->identity_digest, DIGEST_LEN);
			
 
				+  offset += DIGEST_LEN;
			
 
				+  set_uint16(data + offset, entry->port);
			
 
				+  offset += sizeof(uint16_t);
			
 
				+
			
 
				+  return (unsigned int) siphash24g(data, offset);
			
 
				+}
			
 
				+
			
 
				+HT_PROTOTYPE(or_connect_failure_ht, or_connect_failure_entry_t, node,
			
 
				+             or_connect_failure_ht_hash, or_connect_failure_ht_eq)
			
 
				+
			
 
				+HT_GENERATE2(or_connect_failure_ht, or_connect_failure_entry_t, node,
			
 
				+             or_connect_failure_ht_hash, or_connect_failure_ht_eq,
			
 
				+             0.6, tor_reallocarray_, tor_free_)
			
 
				+
			
 
				+/* Initialize a given connect failure entry with the given identity_digest,
			
 
				+ * addr and port. All field are optional except ocf. */
			
 
				+static void
			
 
				+or_connect_failure_init(const char *identity_digest, const tor_addr_t *addr,
			
 
				+                        uint16_t port, or_connect_failure_entry_t *ocf)
			
 
				+{
			
 
				+  tor_assert(ocf);
			
 
				+  if (identity_digest) {
			
 
				+    memcpy(ocf->identity_digest, identity_digest,
			
 
				+           sizeof(ocf->identity_digest));
			
 
				+  }
			
 
				+  if (addr) {
			
 
				+    tor_addr_copy(&ocf->addr, addr);
			
 
				+  }
			
 
				+  ocf->port = port;
			
 
				+}
			
 
				+
			
 
				+/* Return a newly allocated connection failure entry. It is initialized with
			
 
				+ * the given or_conn data. This can't fail. */
			
 
				+static or_connect_failure_entry_t *
			
 
				+or_connect_failure_new(const or_connection_t *or_conn)
			
 
				+{
			
 
				+  or_connect_failure_entry_t *ocf = tor_malloc_zero(sizeof(*ocf));
			
 
				+  or_connect_failure_init(or_conn->identity_digest, &or_conn->real_addr,
			
 
				+                          TO_CONN(or_conn)->port, ocf);
			
 
				+  return ocf;
			
 
				+}
			
 
				+
			
 
				+/* Return a connection failure entry matching the given or_conn. NULL is
			
 
				+ * returned if not found. */
			
 
				+static or_connect_failure_entry_t *
			
 
				+or_connect_failure_find(const or_connection_t *or_conn)
			
 
				+{
			
 
				+  or_connect_failure_entry_t lookup;
			
 
				+  tor_assert(or_conn);
			
 
				+  or_connect_failure_init(or_conn->identity_digest, &TO_CONN(or_conn)->addr,
			
 
				+                          TO_CONN(or_conn)->port, &lookup);
			
 
				+  return HT_FIND(or_connect_failure_ht, &or_connect_failures_map, &lookup);
			
 
				+}
			
 
				+
			
 
				+/* Note down in the connection failure cache that a failure occurred on the
			
 
				+ * given or_conn. */
			
 
				+STATIC void
			
 
				+note_or_connect_failed(const or_connection_t *or_conn)
			
 
				+{
			
 
				+  or_connect_failure_entry_t *ocf = NULL;
			
 
				+
			
 
				+  tor_assert(or_conn);
			
 
				+
			
 
				+  ocf = or_connect_failure_find(or_conn);
			
 
				+  if (ocf == NULL) {
			
 
				+    ocf = or_connect_failure_new(or_conn);
			
 
				+    HT_INSERT(or_connect_failure_ht, &or_connect_failures_map, ocf);
			
 
				+  }
			
 
				+  ocf->last_failed_connect_ts = approx_time();
			
 
				+}
			
 
				+
			
 
				+/* Cleanup the connection failure cache and remove all entries below the
			
 
				+ * given cutoff. */
			
 
				+static void
			
 
				+or_connect_failure_map_cleanup(time_t cutoff)
			
 
				+{
			
 
				+  or_connect_failure_entry_t **ptr, **next, *entry;
			
 
				+
			
 
				+  for (ptr = HT_START(or_connect_failure_ht, &or_connect_failures_map);
			
 
				+       ptr != NULL; ptr = next) {
			
 
				+    entry = *ptr;
			
 
				+    if (entry->last_failed_connect_ts <= cutoff) {
			
 
				+      next = HT_NEXT_RMV(or_connect_failure_ht, &or_connect_failures_map, ptr);
			
 
				+      tor_free(entry);
			
 
				+    } else {
			
 
				+      next = HT_NEXT(or_connect_failure_ht, &or_connect_failures_map, ptr);
			
 
				+    }
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+/* Return true iff the given OR connection can connect to its destination that
			
 
				+ * is the triplet identity_digest, address and port.
			
 
				+ *
			
 
				+ * The or_conn MUST have gone through connection_or_check_canonicity() so the
			
 
				+ * base address is properly set to what we know or doesn't know. */
			
 
				+STATIC int
			
 
				+should_connect_to_relay(const or_connection_t *or_conn)
			
 
				+{
			
 
				+  time_t now, cutoff;
			
 
				+  time_t connect_failed_since_ts = 0;
			
 
				+  or_connect_failure_entry_t *ocf;
			
 
				+
			
 
				+  tor_assert(or_conn);
			
 
				+
			
 
				+  now = approx_time();
			
 
				+  cutoff = now - OR_CONNECT_FAILURE_LIFETIME;
			
 
				+
			
 
				+  /* Opportunistically try to cleanup the failure cache. We do that at regular
			
 
				+   * interval so it doesn't grow too big. */
			
 
				+  if (or_connect_failure_map_next_cleanup_ts <= now) {
			
 
				+    or_connect_failure_map_cleanup(cutoff);
			
 
				+    or_connect_failure_map_next_cleanup_ts =
			
 
				+      now + OR_CONNECT_FAILURE_CLEANUP_INTERVAL;
			
 
				+  }
			
 
				+
			
 
				+  /* Look if we have failed previously to the same destination as this
			
 
				+   * OR connection. */
			
 
				+  ocf = or_connect_failure_find(or_conn);
			
 
				+  if (ocf) {
			
 
				+    connect_failed_since_ts = ocf->last_failed_connect_ts;
			
 
				+  }
			
 
				+  /* If we do have an unable to connect timestamp and it is below cutoff, we
			
 
				+   * can connect. Or we have never failed before so let it connect. */
			
 
				+  if (connect_failed_since_ts > cutoff) {
			
 
				+    goto no_connect;
			
 
				+  }
			
 
				+
			
 
				+  /* Ok we can connect! */
			
 
				+  return 1;
			
 
				+ no_connect:
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				 /** <b>conn</b> is in the 'connecting' state, and it failed to complete
			
 
				  * a TCP connection. Send notifications appropriately.
			
 
				  *
			
@@ -1135,6 +1346,7 @@ connection_or_connect_failed(or_connection_t *conn,
 
				   control_event_or_conn_status(conn, OR_CONN_EVENT_FAILED, reason);
			
 
				   if (!authdir_mode_tests_reachability(get_options()))
			
 
				     control_event_bootstrap_prob_or(msg, reason, conn);
			
 
				+  note_or_connect_failed(conn);
			
 
				 }
			
 
				 
			
 
				 /** <b>conn</b> got an error in connection_handle_read_impl() or
			
@@ -1225,6 +1437,19 @@ connection_or_connect, (const tor_addr_t *_addr, uint16_t port,
 
				   conn->chan = chan;
			
 
				   chan->conn = conn;
			
 
				   connection_or_init_conn_from_address(conn, &addr, port, id_digest, ed_id, 1);
			
 
				+
			
 
				+  /* We have a proper OR connection setup, now check if we can connect to it
			
 
				+   * that is we haven't had a failure earlier. This is to avoid to try to
			
 
				+   * constantly connect to relays that we think are not reachable. */
			
 
				+  if (!should_connect_to_relay(conn)) {
			
 
				+    log_info(LD_GENERAL, "Can't connect to identity %s at %s:%u because we "
			
 
				+                         "failed earlier. Refusing.",
			
 
				+             hex_str(id_digest, DIGEST_LEN), fmt_addr(&TO_CONN(conn)->addr),
			
 
				+             TO_CONN(conn)->port);
			
 
				+    connection_free_(TO_CONN(conn));
			
 
				+    return NULL;
			
 
				+  }
			
 
				+
			
 
				   connection_or_change_state(conn, OR_CONN_STATE_CONNECTING);
			
 
				   control_event_or_conn_status(conn, OR_CONN_EVENT_LAUNCHED, 0);
			
 
				 
			
--- a/src/or/connection_or.h
+++ b/src/or/connection_or.h
@@ -120,6 +120,11 @@ int connection_or_single_set_badness_(time_t now,
 
				                                       int force);
			
 
				 void connection_or_group_set_badness_(smartlist_t *group, int force);
			
 
				 
			
 
				+#ifdef CONNECTION_OR_PRIVATE
			
 
				+STATIC int should_connect_to_relay(const or_connection_t *or_conn);
			
 
				+STATIC void note_or_connect_failed(const or_connection_t *or_conn);
			
 
				+#endif
			
 
				+
			
 
				 #ifdef TOR_UNIT_TESTS
			
 
				 extern int certs_cell_ed25519_disabled_for_testing;
			
 
				 #endif
			
--- a/src/or/nodelist.c
+++ b/src/or/nodelist.c
@@ -161,8 +161,8 @@ init_nodelist(void)
 
				 }
			
 
				 
			
 
				 /** As node_get_by_id, but returns a non-const pointer */
			
 
				-node_t *
			
 
				-node_get_mutable_by_id(const char *identity_digest)
			
 
				+MOCK_IMPL(node_t *,
			
 
				+node_get_mutable_by_id,(const char *identity_digest))
			
 
				 {
			
 
				   node_t search, *node;
			
 
				   if (PREDICT_UNLIKELY(the_nodelist == NULL))
			
--- a/src/or/nodelist.h
+++ b/src/or/nodelist.h
@@ -16,7 +16,7 @@
 
				     tor_assert((n)->ri || (n)->rs);                             \
			
 
				   } STMT_END
			
 
				 
			
 
				-node_t *node_get_mutable_by_id(const char *identity_digest);
			
 
				+MOCK_DECL(node_t *, node_get_mutable_by_id,(const char *identity_digest));
			
 
				 MOCK_DECL(const node_t *, node_get_by_id, (const char *identity_digest));
			
 
				 node_t *node_get_mutable_by_ed25519_id(const ed25519_public_key_t *ed_id);
			
 
				 MOCK_DECL(const node_t *, node_get_by_ed25519_id,
			
--- a/src/test/test_connection.c
+++ b/src/test/test_connection.c
@@ -5,6 +5,7 @@
 
				 
			
 
				 #define CONNECTION_PRIVATE
			
 
				 #define MAIN_PRIVATE
			
 
				+#define CONNECTION_OR_PRIVATE
			
 
				 
			
 
				 #include "or.h"
			
 
				 #include "test.h"
			
@@ -13,9 +14,11 @@
 
				 #include "hs_common.h"
			
 
				 #include "main.h"
			
 
				 #include "microdesc.h"
			
 
				+#include "nodelist.h"
			
 
				 #include "networkstatus.h"
			
 
				 #include "rendcache.h"
			
 
				 #include "directory.h"
			
 
				+#include "connection_or.h"
			
 
				 
			
 
				 #include "test_connection.h"
			
 
				 #include "test_helpers.h"
			
@@ -776,6 +779,99 @@ test_conn_download_status(void *arg)
 
				   /* the teardown function removes all the connections in the global list*/;
			
 
				 }
			
 
				 
			
 
				+static node_t test_node;
			
 
				+
			
 
				+static node_t *
			
 
				+mock_node_get_mutable_by_id(const char *digest)
			
 
				+{
			
 
				+  (void) digest;
			
 
				+  static routerinfo_t node_ri;
			
 
				+  memset(&node_ri, 0, sizeof(node_ri));
			
 
				+
			
 
				+  test_node.ri = &node_ri;
			
 
				+  memset(test_node.identity, 'c', sizeof(test_node.identity));
			
 
				+
			
 
				+  tor_addr_t ipv4_addr;
			
 
				+  tor_addr_parse(&ipv4_addr, "18.0.0.1");
			
 
				+  node_ri.addr = tor_addr_to_ipv4h(&ipv4_addr);
			
 
				+  node_ri.or_port = 1;
			
 
				+
			
 
				+  return &test_node;
			
 
				+}
			
 
				+
			
 
				+static const node_t *
			
 
				+mock_node_get_by_id(const char *digest)
			
 
				+{
			
 
				+  (void) digest;
			
 
				+  memset(test_node.identity, 'c', sizeof(test_node.identity));
			
 
				+  return &test_node;
			
 
				+}
			
 
				+
			
 
				+/* Test whether we correctly track failed connections between relays. */
			
 
				+static void
			
 
				+test_failed_orconn_tracker(void *arg)
			
 
				+{
			
 
				+  (void) arg;
			
 
				+
			
 
				+  int can_connect;
			
 
				+  time_t now = 1281533250; /* 2010-08-11 13:27:30 UTC */
			
 
				+  (void) now;
			
 
				+
			
 
				+  update_approx_time(now);
			
 
				+
			
 
				+  /* Prepare the OR connection that will be used in this test */
			
 
				+  or_connection_t or_conn;
			
 
				+  tt_int_op(AF_INET,OP_EQ, tor_addr_parse(&or_conn.real_addr, "18.0.0.1"));
			
 
				+  tt_int_op(AF_INET,OP_EQ, tor_addr_parse(&or_conn.base_.addr, "18.0.0.1"));
			
 
				+  or_conn.base_.port = 1;
			
 
				+  memset(or_conn.identity_digest, 'c', sizeof(or_conn.identity_digest));
			
 
				+
			
 
				+  /* Check whether we can connect with an empty failure cache:
			
 
				+   * this should succeed */
			
 
				+  can_connect = should_connect_to_relay(&or_conn);
			
 
				+  tt_int_op(can_connect, OP_EQ, 1);
			
 
				+
			
 
				+  /* Now add the destination to the failure cache */
			
 
				+  note_or_connect_failed(&or_conn);
			
 
				+
			
 
				+  /* Check again: now it shouldn't connect */
			
 
				+  can_connect = should_connect_to_relay(&or_conn);
			
 
				+  tt_int_op(can_connect, OP_EQ, 0);
			
 
				+
			
 
				+  /* Move time forward and check again: the cache should have been cleared and
			
 
				+   * now it should connect */
			
 
				+  now += 3600;
			
 
				+  update_approx_time(now);
			
 
				+  can_connect = should_connect_to_relay(&or_conn);
			
 
				+  tt_int_op(can_connect, OP_EQ, 1);
			
 
				+
			
 
				+  /* Now mock the node_get_*by_id() functions to start using the node subsystem
			
 
				+   * optimization. */
			
 
				+  MOCK(node_get_by_id, mock_node_get_by_id);
			
 
				+  MOCK(node_get_mutable_by_id, mock_node_get_mutable_by_id);
			
 
				+
			
 
				+  /* Since we just started using the node subsystem it will allow connections
			
 
				+   * now */
			
 
				+  can_connect = should_connect_to_relay(&or_conn);
			
 
				+  tt_int_op(can_connect, OP_EQ, 1);
			
 
				+
			
 
				+  /* Mark it as failed */
			
 
				+  note_or_connect_failed(&or_conn);
			
 
				+
			
 
				+  /* Check that it shouldn't connect now */
			
 
				+  can_connect = should_connect_to_relay(&or_conn);
			
 
				+  tt_int_op(can_connect, OP_EQ, 0);
			
 
				+
			
 
				+  /* Move time forward and check again: now it should connect  */
			
 
				+  now += 3600;
			
 
				+  update_approx_time(now);
			
 
				+  can_connect = should_connect_to_relay(&or_conn);
			
 
				+  tt_int_op(can_connect, OP_EQ, 1);
			
 
				+
			
 
				+ done:
			
 
				+  ;
			
 
				+}
			
 
				+
			
 
				 #define CONNECTION_TESTCASE(name, fork, setup)                           \
			
 
				   { #name, test_conn_##name, fork, &setup, NULL }
			
 
				 
			
@@ -792,6 +888,7 @@ struct testcase_t connection_tests[] = {
 
				   CONNECTION_TESTCASE_ARG(download_status,  TT_FORK,
			
 
				                           test_conn_download_status_st, FLAV_NS),
			
 
				 //CONNECTION_TESTCASE(func_suffix, TT_FORK, setup_func_pair),
			
 
				+  { "failed_orconn_tracker", test_failed_orconn_tracker, TT_FORK, NULL, NULL },
			
 
				   END_OF_TESTCASES
			
 
				 };