Browse Source

Merge remote-tracking branch 'dgoulet/bug24767_033_03' into maint-0.3.3

Nick Mathewson 6 years ago
parent
commit
e0bbef48bf
6 changed files with 335 additions and 3 deletions
  1. 5 0
      changes/bug24767
  2. 225 0
      src/or/connection_or.c
  3. 5 0
      src/or/connection_or.h
  4. 2 2
      src/or/nodelist.c
  5. 1 1
      src/or/nodelist.h
  6. 97 0
      src/test/test_connection.c

+ 5 - 0
changes/bug24767

@@ -0,0 +1,5 @@
+  o Major bugfixes (relay, connection):
+    - Refuse to connect again to a relay from which we failed previously with
+      a connection refused, timeout or error (at the TCP level). The relay
+      won't be retried for 60 seconds after the failure occured. Fixes bug
+      24767; bugfix on 0.0.6.

+ 225 - 0
src/or/connection_or.c

@@ -28,6 +28,7 @@
  * part of a subclass (channel_tls_t).
  */
 #define TOR_CHANNEL_INTERNAL_
+#define CONNECTION_OR_PRIVATE
 #include "channel.h"
 #include "channeltls.h"
 #include "circuitbuild.h"
@@ -1122,6 +1123,216 @@ connection_or_group_set_badness_(smartlist_t *group, int force)
   } SMARTLIST_FOREACH_END(or_conn);
 }
 
+/* Lifetime of a connection failure. After that, we'll retry. This is in
+ * seconds. */
+#define OR_CONNECT_FAILURE_LIFETIME 60
+/* The interval to use with when to clean up the failure cache. */
+#define OR_CONNECT_FAILURE_CLEANUP_INTERVAL 60
+
+/* When is the next time we have to cleanup the failure map. We keep this
+ * because we clean it opportunistically. */
+static time_t or_connect_failure_map_next_cleanup_ts = 0;
+
+/* OR connection failure entry data structure. It is kept in the connection
+ * failure map defined below and indexed by OR identity digest, address and
+ * port.
+ *
+ * We need to identify a connection failure with these three values because we
+ * want to avoid to wrongfully blacklist a relay if someone is trying to
+ * extend to a known identity digest but with the wrong IP/port. For instance,
+ * it can happen if a relay changed its port but the client still has an old
+ * descriptor with the old port. We want to stop connecting to that
+ * IP/port/identity all together, not only the relay identity. */
+typedef struct or_connect_failure_entry_t {
+  HT_ENTRY(or_connect_failure_entry_t) node;
+  /* Identity digest of the connection where it is connecting to. */
+  uint8_t identity_digest[DIGEST_LEN];
+  /* This is the connection address from the base connection_t. After the
+   * connection is checked for canonicity, the base address should represent
+   * what we know instead of where we are connecting to. This is what we need
+   * so we can correlate known relays within the consensus. */
+  tor_addr_t addr;
+  uint16_t port;
+  /* Last time we were unable to connect. */
+  time_t last_failed_connect_ts;
+} or_connect_failure_entry_t;
+
+/* Map where we keep connection failure entries. They are indexed by addr,
+ * port and identity digest. */
+static HT_HEAD(or_connect_failure_ht, or_connect_failure_entry_t)
+       or_connect_failures_map = HT_INITIALIZER();
+
+/* Helper: Hashtable equal function. Return 1 if equal else 0. */
+static int
+or_connect_failure_ht_eq(const or_connect_failure_entry_t *a,
+                         const or_connect_failure_entry_t *b)
+{
+  return fast_memeq(a->identity_digest, b->identity_digest, DIGEST_LEN) &&
+         tor_addr_eq(&a->addr, &b->addr) &&
+         a->port == b->port;
+}
+
+/* Helper: Return the hash for the hashtable of the given entry. For this
+ * table, it is a combination of address, port and identity digest. */
+static unsigned int
+or_connect_failure_ht_hash(const or_connect_failure_entry_t *entry)
+{
+  size_t offset = 0, addr_size;
+  const void *addr_ptr;
+  /* Largest size is IPv6 and IPv4 is smaller so it is fine. */
+  uint8_t data[16 + sizeof(uint16_t) + DIGEST_LEN];
+
+  /* Get the right address bytes depending on the family. */
+  switch (tor_addr_family(&entry->addr)) {
+  case AF_INET:
+    addr_size = 4;
+    addr_ptr = &entry->addr.addr.in_addr.s_addr;
+    break;
+  case AF_INET6:
+    addr_size = 16;
+    addr_ptr = &entry->addr.addr.in6_addr.s6_addr;
+    break;
+  default:
+    tor_assert_nonfatal_unreached();
+    return 0;
+  }
+
+  memcpy(data, addr_ptr, addr_size);
+  offset += addr_size;
+  memcpy(data + offset, entry->identity_digest, DIGEST_LEN);
+  offset += DIGEST_LEN;
+  set_uint16(data + offset, entry->port);
+  offset += sizeof(uint16_t);
+
+  return (unsigned int) siphash24g(data, offset);
+}
+
+HT_PROTOTYPE(or_connect_failure_ht, or_connect_failure_entry_t, node,
+             or_connect_failure_ht_hash, or_connect_failure_ht_eq)
+
+HT_GENERATE2(or_connect_failure_ht, or_connect_failure_entry_t, node,
+             or_connect_failure_ht_hash, or_connect_failure_ht_eq,
+             0.6, tor_reallocarray_, tor_free_)
+
+/* Initialize a given connect failure entry with the given identity_digest,
+ * addr and port. All field are optional except ocf. */
+static void
+or_connect_failure_init(const char *identity_digest, const tor_addr_t *addr,
+                        uint16_t port, or_connect_failure_entry_t *ocf)
+{
+  tor_assert(ocf);
+  if (identity_digest) {
+    memcpy(ocf->identity_digest, identity_digest,
+           sizeof(ocf->identity_digest));
+  }
+  if (addr) {
+    tor_addr_copy(&ocf->addr, addr);
+  }
+  ocf->port = port;
+}
+
+/* Return a newly allocated connection failure entry. It is initialized with
+ * the given or_conn data. This can't fail. */
+static or_connect_failure_entry_t *
+or_connect_failure_new(const or_connection_t *or_conn)
+{
+  or_connect_failure_entry_t *ocf = tor_malloc_zero(sizeof(*ocf));
+  or_connect_failure_init(or_conn->identity_digest, &or_conn->real_addr,
+                          TO_CONN(or_conn)->port, ocf);
+  return ocf;
+}
+
+/* Return a connection failure entry matching the given or_conn. NULL is
+ * returned if not found. */
+static or_connect_failure_entry_t *
+or_connect_failure_find(const or_connection_t *or_conn)
+{
+  or_connect_failure_entry_t lookup;
+  tor_assert(or_conn);
+  or_connect_failure_init(or_conn->identity_digest, &TO_CONN(or_conn)->addr,
+                          TO_CONN(or_conn)->port, &lookup);
+  return HT_FIND(or_connect_failure_ht, &or_connect_failures_map, &lookup);
+}
+
+/* Note down in the connection failure cache that a failure occurred on the
+ * given or_conn. */
+STATIC void
+note_or_connect_failed(const or_connection_t *or_conn)
+{
+  or_connect_failure_entry_t *ocf = NULL;
+
+  tor_assert(or_conn);
+
+  ocf = or_connect_failure_find(or_conn);
+  if (ocf == NULL) {
+    ocf = or_connect_failure_new(or_conn);
+    HT_INSERT(or_connect_failure_ht, &or_connect_failures_map, ocf);
+  }
+  ocf->last_failed_connect_ts = approx_time();
+}
+
+/* Cleanup the connection failure cache and remove all entries below the
+ * given cutoff. */
+static void
+or_connect_failure_map_cleanup(time_t cutoff)
+{
+  or_connect_failure_entry_t **ptr, **next, *entry;
+
+  for (ptr = HT_START(or_connect_failure_ht, &or_connect_failures_map);
+       ptr != NULL; ptr = next) {
+    entry = *ptr;
+    if (entry->last_failed_connect_ts <= cutoff) {
+      next = HT_NEXT_RMV(or_connect_failure_ht, &or_connect_failures_map, ptr);
+      tor_free(entry);
+    } else {
+      next = HT_NEXT(or_connect_failure_ht, &or_connect_failures_map, ptr);
+    }
+  }
+}
+
+/* Return true iff the given OR connection can connect to its destination that
+ * is the triplet identity_digest, address and port.
+ *
+ * The or_conn MUST have gone through connection_or_check_canonicity() so the
+ * base address is properly set to what we know or doesn't know. */
+STATIC int
+should_connect_to_relay(const or_connection_t *or_conn)
+{
+  time_t now, cutoff;
+  time_t connect_failed_since_ts = 0;
+  or_connect_failure_entry_t *ocf;
+
+  tor_assert(or_conn);
+
+  now = approx_time();
+  cutoff = now - OR_CONNECT_FAILURE_LIFETIME;
+
+  /* Opportunistically try to cleanup the failure cache. We do that at regular
+   * interval so it doesn't grow too big. */
+  if (or_connect_failure_map_next_cleanup_ts <= now) {
+    or_connect_failure_map_cleanup(cutoff);
+    or_connect_failure_map_next_cleanup_ts =
+      now + OR_CONNECT_FAILURE_CLEANUP_INTERVAL;
+  }
+
+  /* Look if we have failed previously to the same destination as this
+   * OR connection. */
+  ocf = or_connect_failure_find(or_conn);
+  if (ocf) {
+    connect_failed_since_ts = ocf->last_failed_connect_ts;
+  }
+  /* If we do have an unable to connect timestamp and it is below cutoff, we
+   * can connect. Or we have never failed before so let it connect. */
+  if (connect_failed_since_ts > cutoff) {
+    goto no_connect;
+  }
+
+  /* Ok we can connect! */
+  return 1;
+ no_connect:
+  return 0;
+}
+
 /** <b>conn</b> is in the 'connecting' state, and it failed to complete
  * a TCP connection. Send notifications appropriately.
  *
@@ -1135,6 +1346,7 @@ connection_or_connect_failed(or_connection_t *conn,
   control_event_or_conn_status(conn, OR_CONN_EVENT_FAILED, reason);
   if (!authdir_mode_tests_reachability(get_options()))
     control_event_bootstrap_prob_or(msg, reason, conn);
+  note_or_connect_failed(conn);
 }
 
 /** <b>conn</b> got an error in connection_handle_read_impl() or
@@ -1225,6 +1437,19 @@ connection_or_connect, (const tor_addr_t *_addr, uint16_t port,
   conn->chan = chan;
   chan->conn = conn;
   connection_or_init_conn_from_address(conn, &addr, port, id_digest, ed_id, 1);
+
+  /* We have a proper OR connection setup, now check if we can connect to it
+   * that is we haven't had a failure earlier. This is to avoid to try to
+   * constantly connect to relays that we think are not reachable. */
+  if (!should_connect_to_relay(conn)) {
+    log_info(LD_GENERAL, "Can't connect to identity %s at %s:%u because we "
+                         "failed earlier. Refusing.",
+             hex_str(id_digest, DIGEST_LEN), fmt_addr(&TO_CONN(conn)->addr),
+             TO_CONN(conn)->port);
+    connection_free_(TO_CONN(conn));
+    return NULL;
+  }
+
   connection_or_change_state(conn, OR_CONN_STATE_CONNECTING);
   control_event_or_conn_status(conn, OR_CONN_EVENT_LAUNCHED, 0);
 

+ 5 - 0
src/or/connection_or.h

@@ -120,6 +120,11 @@ int connection_or_single_set_badness_(time_t now,
                                       int force);
 void connection_or_group_set_badness_(smartlist_t *group, int force);
 
+#ifdef CONNECTION_OR_PRIVATE
+STATIC int should_connect_to_relay(const or_connection_t *or_conn);
+STATIC void note_or_connect_failed(const or_connection_t *or_conn);
+#endif
+
 #ifdef TOR_UNIT_TESTS
 extern int certs_cell_ed25519_disabled_for_testing;
 #endif

+ 2 - 2
src/or/nodelist.c

@@ -161,8 +161,8 @@ init_nodelist(void)
 }
 
 /** As node_get_by_id, but returns a non-const pointer */
-node_t *
-node_get_mutable_by_id(const char *identity_digest)
+MOCK_IMPL(node_t *,
+node_get_mutable_by_id,(const char *identity_digest))
 {
   node_t search, *node;
   if (PREDICT_UNLIKELY(the_nodelist == NULL))

+ 1 - 1
src/or/nodelist.h

@@ -16,7 +16,7 @@
     tor_assert((n)->ri || (n)->rs);                             \
   } STMT_END
 
-node_t *node_get_mutable_by_id(const char *identity_digest);
+MOCK_DECL(node_t *, node_get_mutable_by_id,(const char *identity_digest));
 MOCK_DECL(const node_t *, node_get_by_id, (const char *identity_digest));
 node_t *node_get_mutable_by_ed25519_id(const ed25519_public_key_t *ed_id);
 MOCK_DECL(const node_t *, node_get_by_ed25519_id,

+ 97 - 0
src/test/test_connection.c

@@ -5,6 +5,7 @@
 
 #define CONNECTION_PRIVATE
 #define MAIN_PRIVATE
+#define CONNECTION_OR_PRIVATE
 
 #include "or.h"
 #include "test.h"
@@ -13,9 +14,11 @@
 #include "hs_common.h"
 #include "main.h"
 #include "microdesc.h"
+#include "nodelist.h"
 #include "networkstatus.h"
 #include "rendcache.h"
 #include "directory.h"
+#include "connection_or.h"
 
 #include "test_connection.h"
 #include "test_helpers.h"
@@ -776,6 +779,99 @@ test_conn_download_status(void *arg)
   /* the teardown function removes all the connections in the global list*/;
 }
 
+static node_t test_node;
+
+static node_t *
+mock_node_get_mutable_by_id(const char *digest)
+{
+  (void) digest;
+  static routerinfo_t node_ri;
+  memset(&node_ri, 0, sizeof(node_ri));
+
+  test_node.ri = &node_ri;
+  memset(test_node.identity, 'c', sizeof(test_node.identity));
+
+  tor_addr_t ipv4_addr;
+  tor_addr_parse(&ipv4_addr, "18.0.0.1");
+  node_ri.addr = tor_addr_to_ipv4h(&ipv4_addr);
+  node_ri.or_port = 1;
+
+  return &test_node;
+}
+
+static const node_t *
+mock_node_get_by_id(const char *digest)
+{
+  (void) digest;
+  memset(test_node.identity, 'c', sizeof(test_node.identity));
+  return &test_node;
+}
+
+/* Test whether we correctly track failed connections between relays. */
+static void
+test_failed_orconn_tracker(void *arg)
+{
+  (void) arg;
+
+  int can_connect;
+  time_t now = 1281533250; /* 2010-08-11 13:27:30 UTC */
+  (void) now;
+
+  update_approx_time(now);
+
+  /* Prepare the OR connection that will be used in this test */
+  or_connection_t or_conn;
+  tt_int_op(AF_INET,OP_EQ, tor_addr_parse(&or_conn.real_addr, "18.0.0.1"));
+  tt_int_op(AF_INET,OP_EQ, tor_addr_parse(&or_conn.base_.addr, "18.0.0.1"));
+  or_conn.base_.port = 1;
+  memset(or_conn.identity_digest, 'c', sizeof(or_conn.identity_digest));
+
+  /* Check whether we can connect with an empty failure cache:
+   * this should succeed */
+  can_connect = should_connect_to_relay(&or_conn);
+  tt_int_op(can_connect, OP_EQ, 1);
+
+  /* Now add the destination to the failure cache */
+  note_or_connect_failed(&or_conn);
+
+  /* Check again: now it shouldn't connect */
+  can_connect = should_connect_to_relay(&or_conn);
+  tt_int_op(can_connect, OP_EQ, 0);
+
+  /* Move time forward and check again: the cache should have been cleared and
+   * now it should connect */
+  now += 3600;
+  update_approx_time(now);
+  can_connect = should_connect_to_relay(&or_conn);
+  tt_int_op(can_connect, OP_EQ, 1);
+
+  /* Now mock the node_get_*by_id() functions to start using the node subsystem
+   * optimization. */
+  MOCK(node_get_by_id, mock_node_get_by_id);
+  MOCK(node_get_mutable_by_id, mock_node_get_mutable_by_id);
+
+  /* Since we just started using the node subsystem it will allow connections
+   * now */
+  can_connect = should_connect_to_relay(&or_conn);
+  tt_int_op(can_connect, OP_EQ, 1);
+
+  /* Mark it as failed */
+  note_or_connect_failed(&or_conn);
+
+  /* Check that it shouldn't connect now */
+  can_connect = should_connect_to_relay(&or_conn);
+  tt_int_op(can_connect, OP_EQ, 0);
+
+  /* Move time forward and check again: now it should connect  */
+  now += 3600;
+  update_approx_time(now);
+  can_connect = should_connect_to_relay(&or_conn);
+  tt_int_op(can_connect, OP_EQ, 1);
+
+ done:
+  ;
+}
+
 #define CONNECTION_TESTCASE(name, fork, setup)                           \
   { #name, test_conn_##name, fork, &setup, NULL }
 
@@ -792,6 +888,7 @@ struct testcase_t connection_tests[] = {
   CONNECTION_TESTCASE_ARG(download_status,  TT_FORK,
                           test_conn_download_status_st, FLAV_NS),
 //CONNECTION_TESTCASE(func_suffix, TT_FORK, setup_func_pair),
+  { "failed_orconn_tracker", test_failed_orconn_tracker, TT_FORK, NULL, NULL },
   END_OF_TESTCASES
 };