Browse Source

handle ugly edge case in retrying entrynodes

Specifically, a circ attempt that we'd launched while the network was
down could timeout after we've marked our entrynodes up, marking them
back down again. The fix is to annotate as bad the OR conns that were
around before we did the retry, so if a circuit that's attached to them
times out we don't do anything about it.
Roger Dingledine 13 years ago
parent
commit
9997676802
5 changed files with 30 additions and 10 deletions
  1. 10 1
      src/or/circuitbuild.c
  2. 8 1
      src/or/circuituse.c
  3. 10 6
      src/or/connection_or.c
  4. 1 1
      src/or/connection_or.h
  5. 1 1
      src/or/main.c

+ 10 - 1
src/or/circuitbuild.c

@@ -4604,7 +4604,16 @@ entries_retry_helper(or_options_t *options, int act)
         any_known = 1;
         any_known = 1;
         if (ri->is_running)
         if (ri->is_running)
           any_running = 1; /* some entry is both known and running */
           any_running = 1; /* some entry is both known and running */
-        else if (act) { /* mark it for retry */
+        else if (act) {
+          /* Mark-for-close all TLS connections to this node, since
+           * otherwise there could be one that started 30 seconds
+           * ago, and in 30 seconds it will time out, causing us to mark
+           * the node down and undermine the retry attempt. We mark even
+           * the established conns, since if the network just came back
+           * we'll want to attach circuits to fresh conns. */
+          connection_or_set_bad_connections(ri->cache_info.identity_digest, 1);
+
+          /* mark it for retry */
           router_set_status(ri->cache_info.identity_digest, 1);
           router_set_status(ri->cache_info.identity_digest, 1);
           e->can_retry = 1;
           e->can_retry = 1;
           e->bad_since = 0;
           e->bad_since = 0;

+ 8 - 1
src/or/circuituse.c

@@ -955,8 +955,15 @@ circuit_build_failed(origin_circuit_t *circ)
      * to blame, blame it. Also, avoid this relay for a while, and
      * to blame, blame it. Also, avoid this relay for a while, and
      * fail any one-hop directory fetches destined for it. */
      * fail any one-hop directory fetches destined for it. */
     const char *n_conn_id = circ->cpath->extend_info->identity_digest;
     const char *n_conn_id = circ->cpath->extend_info->identity_digest;
+    int already_marked = 0;
     if (circ->_base.n_conn) {
     if (circ->_base.n_conn) {
       or_connection_t *n_conn = circ->_base.n_conn;
       or_connection_t *n_conn = circ->_base.n_conn;
+      if (n_conn->is_bad_for_new_circs) {
+        /* no need to blow away circuits/streams/etc. Also, don't mark this
+         * router as newly down, since maybe this was just an old circuit
+         * attempt that's finally timing out now. */
+        already_marked = 1;
+      }
       log_info(LD_OR,
       log_info(LD_OR,
                "Our circuit failed to get a response from the first hop "
                "Our circuit failed to get a response from the first hop "
                "(%s:%d). I'm going to try to rotate to a better connection.",
                "(%s:%d). I'm going to try to rotate to a better connection.",
@@ -966,7 +973,7 @@ circuit_build_failed(origin_circuit_t *circ)
       log_info(LD_OR,
       log_info(LD_OR,
                "Our circuit died before the first hop with no connection");
                "Our circuit died before the first hop with no connection");
     }
     }
-    if (n_conn_id) {
+    if (n_conn_id && !already_marked) {
       entry_guard_register_connect_status(n_conn_id, 0, 1, time(NULL));
       entry_guard_register_connect_status(n_conn_id, 0, 1, time(NULL));
       /* if there are any one-hop streams waiting on this circuit, fail
       /* if there are any one-hop streams waiting on this circuit, fail
        * them now so they can retry elsewhere. */
        * them now so they can retry elsewhere. */

+ 10 - 6
src/or/connection_or.c

@@ -610,7 +610,7 @@ connection_or_get_for_extend(const char *digest,
  * appropriate.  Helper for connection_or_set_bad_connections().
  * appropriate.  Helper for connection_or_set_bad_connections().
  */
  */
 static void
 static void
-connection_or_group_set_badness(or_connection_t *head)
+connection_or_group_set_badness(or_connection_t *head, int force)
 {
 {
   or_connection_t *or_conn = NULL, *best = NULL;
   or_connection_t *or_conn = NULL, *best = NULL;
   int n_old = 0, n_inprogress = 0, n_canonical = 0, n_other = 0;
   int n_old = 0, n_inprogress = 0, n_canonical = 0, n_other = 0;
@@ -622,8 +622,9 @@ connection_or_group_set_badness(or_connection_t *head)
     if (or_conn->_base.marked_for_close ||
     if (or_conn->_base.marked_for_close ||
         or_conn->is_bad_for_new_circs)
         or_conn->is_bad_for_new_circs)
       continue;
       continue;
-    if (or_conn->_base.timestamp_created + TIME_BEFORE_OR_CONN_IS_TOO_OLD
-        < now) {
+    if (force ||
+        or_conn->_base.timestamp_created + TIME_BEFORE_OR_CONN_IS_TOO_OLD
+          < now) {
       log_info(LD_OR,
       log_info(LD_OR,
                "Marking OR conn to %s:%d as too old for new circuits "
                "Marking OR conn to %s:%d as too old for new circuits "
                "(fd %d, %d secs old).",
                "(fd %d, %d secs old).",
@@ -718,8 +719,10 @@ connection_or_group_set_badness(or_connection_t *head)
   }
   }
 }
 }
 
 
-/** Go through all the OR connections, and set the is_bad_for_new_circs
+/** Go through all the OR connections (or if <b>digest</b> is non-NULL, just
+ * the OR connections with that digest), and set the is_bad_for_new_circs
  * flag on:
  * flag on:
+ *    - all connections if <b>force</b> is true.
  *    - all connections that are too old.
  *    - all connections that are too old.
  *    - all open non-canonical connections for which a canonical connection
  *    - all open non-canonical connections for which a canonical connection
  *      exists to the same router.
  *      exists to the same router.
@@ -732,13 +735,14 @@ connection_or_group_set_badness(or_connection_t *head)
  * better than another.
  * better than another.
  */
  */
 void
 void
-connection_or_set_bad_connections(void)
+connection_or_set_bad_connections(const char *digest, int force)
 {
 {
   if (!orconn_identity_map)
   if (!orconn_identity_map)
     return;
     return;
 
 
   DIGESTMAP_FOREACH(orconn_identity_map, identity, or_connection_t *, conn) {
   DIGESTMAP_FOREACH(orconn_identity_map, identity, or_connection_t *, conn) {
-    connection_or_group_set_badness(conn);
+    if (!digest || !memcmp(digest, conn->identity_digest, DIGEST_LEN))
+      connection_or_group_set_badness(conn, force);
   } DIGESTMAP_FOREACH_END;
   } DIGESTMAP_FOREACH_END;
 }
 }
 
 

+ 1 - 1
src/or/connection_or.h

@@ -18,7 +18,7 @@ or_connection_t *connection_or_get_for_extend(const char *digest,
                                               const tor_addr_t *target_addr,
                                               const tor_addr_t *target_addr,
                                               const char **msg_out,
                                               const char **msg_out,
                                               int *launch_out);
                                               int *launch_out);
-void connection_or_set_bad_connections(void);
+void connection_or_set_bad_connections(const char *digest, int force);
 
 
 int connection_or_reached_eof(or_connection_t *conn);
 int connection_or_reached_eof(or_connection_t *conn);
 int connection_or_process_inbuf(or_connection_t *conn);
 int connection_or_process_inbuf(or_connection_t *conn);

+ 1 - 1
src/or/main.c

@@ -1173,7 +1173,7 @@ run_scheduled_events(time_t now)
     circuit_expire_old_circuits_serverside(now);
     circuit_expire_old_circuits_serverside(now);
 
 
   /** 5. We do housekeeping for each connection... */
   /** 5. We do housekeeping for each connection... */
-  connection_or_set_bad_connections();
+  connection_or_set_bad_connections(NULL, 0);
   for (i=0;i<smartlist_len(connection_array);i++) {
   for (i=0;i<smartlist_len(connection_array);i++) {
     run_connection_housekeeping(i, now);
     run_connection_housekeeping(i, now);
   }
   }