Преглед на файлове

Start noticing and reporting bootstrapping failures too. It looks like
we never bothered learning why OR conns fail, so next step is to add some
infrastructure for that.


svn:r15091

Roger Dingledine преди 16 години
родител
ревизия
baa3cea213
променени са 6 файла, в които са добавени 94 реда и са изтрити 32 реда
  1. 1 1
      doc/spec/proposals/ideas/xxx-bootstrap-phases.txt
  2. 17 9
      src/or/connection.c
  3. 3 0
      src/or/connection_or.c
  4. 66 20
      src/or/control.c
  5. 2 1
      src/or/directory.c
  6. 5 1
      src/or/or.h

+ 1 - 1
doc/spec/proposals/ideas/xxx-bootstrap-phases.txt

@@ -165,7 +165,7 @@ Status: Open
   tag=circuit_create "Establishing circuits"
 
   Once we've finished our TLS handshake with an entry guard, we will
-  set about rying to make some 3-hop circuits in case we need them soon.
+  set about trying to make some 3-hop circuits in case we need them soon.
 
   Phase 100:
   tag=done summary="Done"

+ 17 - 9
src/or/connection.c

@@ -497,9 +497,18 @@ connection_about_to_close_connection(connection_t *conn)
         if (connection_or_nonopen_was_started_here(or_conn)) {
           rep_hist_note_connect_failed(or_conn->identity_digest, now);
           entry_guard_register_connect_status(or_conn->identity_digest,0,now);
-          router_set_status(or_conn->identity_digest, 0);
-          control_event_or_conn_status(or_conn, OR_CONN_EVENT_FAILED,
-                  control_tls_error_to_reason(or_conn->tls_error));
+          if (!get_options()->HttpsProxy)
+            router_set_status(or_conn->identity_digest, 0);
+          if (conn->state == OR_CONN_STATE_CONNECTING) {
+            control_event_or_conn_status(or_conn, OR_CONN_EVENT_FAILED, 0);
+            control_event_bootstrap_problem(
+              tor_socket_strerror(or_conn->socket_error), 0);
+          } else {
+            int reason = control_tls_error_to_reason(or_conn->tls_error);
+            control_event_or_conn_status(or_conn, OR_CONN_EVENT_FAILED,
+                                         reason);
+            control_event_bootstrap_problem("foo", reason);
+          }
         }
         /* Inform any pending (not attached) circs that they should
          * give up. */
@@ -1842,13 +1851,15 @@ loop_again:
   before = buf_datalen(conn->inbuf);
   if (connection_read_to_buf(conn, &max_to_read) < 0) {
     /* There's a read error; kill the connection.*/
-    connection_close_immediate(conn); /* Don't flush; connection is dead. */
+    if (conn->type == CONN_TYPE_OR)
+      TO_OR_CONN(conn)->socket_error = tor_socket_errno(conn->s);
     if (CONN_IS_EDGE(conn)) {
       edge_connection_t *edge_conn = TO_EDGE_CONN(conn);
       connection_edge_end_errno(edge_conn);
       if (edge_conn->socks_request) /* broken, don't send a socks reply back */
         edge_conn->socks_request->has_finished = 1;
     }
+    connection_close_immediate(conn); /* Don't flush; connection is dead. */
     connection_mark_for_close(conn);
     return -1;
   }
@@ -2145,14 +2156,11 @@ connection_handle_write(connection_t *conn, int force)
         log_info(LD_NET,"in-progress connect failed. Removing.");
         if (CONN_IS_EDGE(conn))
           connection_edge_end_errno(TO_EDGE_CONN(conn));
+        if (conn->type == CONN_TYPE_OR)
+          TO_OR_CONN(conn)->socket_error = e;
 
         connection_close_immediate(conn);
         connection_mark_for_close(conn);
-        /* it's safe to pass OPs to router_set_status(), since it just
-         * ignores unrecognized routers
-         */
-        if (conn->type == CONN_TYPE_OR && !get_options()->HttpsProxy)
-          router_set_status(TO_OR_CONN(conn)->identity_digest, 0);
         return -1;
       } else {
         return 0; /* no change, see if next time is better */

+ 3 - 0
src/or/connection_or.c

@@ -546,6 +546,8 @@ connection_or_connect(uint32_t addr, uint16_t port, const char *id_digest)
       }
       control_event_or_conn_status(conn, OR_CONN_EVENT_FAILED,
               END_OR_CONN_REASON_TCP_REFUSED);
+      /* XXX connection_connect() can fail for all sorts of other reasons */
+      control_event_bootstrap_problem("foo", END_OR_CONN_REASON_TCP_REFUSED);
       connection_free(TO_CONN(conn));
       return NULL;
     case 0:
@@ -795,6 +797,7 @@ connection_or_check_valid_tls_handshake(or_connection_t *conn,
       router_set_status(conn->identity_digest, 0);
       control_event_or_conn_status(conn, OR_CONN_EVENT_FAILED,
               END_OR_CONN_REASON_OR_IDENTITY);
+      control_event_bootstrap_problem("foo", END_OR_CONN_REASON_OR_IDENTITY);
       as_advertised = 0;
     }
     if (authdir_mode_tests_reachability(options)) {

+ 66 - 20
src/or/control.c

@@ -3825,7 +3825,7 @@ init_cookie_authentication(int enabled)
 
 /** Convert the name of a bootstrapping phase <b>s</b> into strings
  * <b>tag</b> and <b>summary</b> suitable for display by the controller. */
-static void
+static int
 bootstrap_status_to_string(bootstrap_status_t s, const char **tag,
                            const char **summary)
 {
@@ -3887,48 +3887,94 @@ bootstrap_status_to_string(bootstrap_status_t s, const char **tag,
       *summary = "Done";
       break;
     default:
-      log_warn(LD_BUG, "Unrecognized bootstrap status code %d", s);
+//      log_warn(LD_BUG, "Unrecognized bootstrap status code %d", s);
       *tag = *summary = "unknown";
+      return -1;
   }
+  return 0;
 }
 
+/** What percentage through the bootstrap process are we? We remember
+ * this so we can avoid sending redundant bootstrap status events, and
+ * so we can guess context for the bootstrap messages which are
+ * ambiguous. */
+static int bootstrap_percent = 0;
+
+/** How many problems have we had getting to the next bootstrapping phase?
+ * These include failure to establish a connection to a Tor relay,
+ * failures to finish the TLS handshake, failures to validate the
+ * consensus document, etc. */
+static int bootstrap_problems = 0;
+
+/* We only tell the controller once we've hit a threshold of problems
+ * for the current phase. */
+#define BOOTSTRAP_PROBLEM_THRESHOLD 10
+
 /** Called when Tor has made progress at bootstrapping its directory
- * information and initial circuits. <b>status</b> is the new status,
- * that is, what task we will be doing next. <b>percent</b> is zero if
- * we just started this task, else it represents progress on the task.
- */
-int
-control_event_bootstrap(bootstrap_status_t status, int percent)
+ * information and initial circuits.
+ *
+ * <b>status</b> is the new status, that is, what task we will be doing
+ * next. <b>percent</b> is zero if we just started this task, else it
+ * represents progress on the task. */
+void
+control_event_bootstrap(bootstrap_status_t status, int progress)
 {
-  static int last_percent = 0;
   const char *tag, *summary;
 
-  if (last_percent == 100)
-    return 0; /* already bootstrapped; nothing to be done here. */
+  if (bootstrap_percent == 100)
+    return; /* already bootstrapped; nothing to be done here. */
 
   /* special case for handshaking status, since our TLS handshaking code
    * can't distinguish what the connection is going to be for. */
   if (status == BOOTSTRAP_STATUS_HANDSHAKE) {
-    if (last_percent < BOOTSTRAP_STATUS_CONN_OR) {
+    if (bootstrap_percent < BOOTSTRAP_STATUS_CONN_OR) {
       status =  BOOTSTRAP_STATUS_HANDSHAKE_DIR;
     } else {
       status = BOOTSTRAP_STATUS_HANDSHAKE_OR;
     }
   }
 
-  if (status > last_percent || (percent && percent > last_percent)) {
+  if (status > bootstrap_percent ||
+      (progress && progress > bootstrap_percent)) {
     bootstrap_status_to_string(status, &tag, &summary);
     log_notice(LD_CONTROL, "Bootstrapped %d%%: %s.",
-               percent ? percent : status, summary);
+               progress ? progress : status, summary);
     control_event_client_status(LOG_NOTICE,
         "BOOTSTRAP PROGRESS=%d TAG=%s SUMMARY=\"%s\"",
-        percent ? percent : status, tag, summary);
+        progress ? progress : status, tag, summary);
+    if (status > bootstrap_percent) {
+      bootstrap_percent = status; /* new milestone reached */
+    }
+    if (progress > bootstrap_percent) {
+      /* incremental progress within a milestone */
+      bootstrap_percent = progress;
+    }
+    bootstrap_problems = 0; /* Progress! Reset our problem counter. */
   }
+}
 
-  if (status > last_percent) /* new milestone reached */
-    last_percent = status ;
-  if (percent > last_percent) /* incremental progress within a milestone */
-    last_percent = percent;
-  return 0;
+/* Called when Tor has failed to make bootstrapping progress in a way
+ * that indicates a problem. <b>warn</b> gives a hint as to why, and
+ * <b>reason</b> provides an "or_conn_end_reason" tag).
+ */
+void
+control_event_bootstrap_problem(const char *warn, int reason)
+{
+  int status = bootstrap_percent;
+  const char *tag, *summary;
+
+//  if (++bootstrap_problems != BOOTSTRAP_PROBLEM_THRESHOLD)
+//    return; /* no worries yet */
+
+  while (bootstrap_status_to_string(status, &tag, &summary) < 0)
+    status--; /* find a recognized status string based on current progress */
+
+  log_warn(LD_CONTROL, "Problem bootstrapping. Stuck at %d%%: %s. (%s; %s)",
+           status, summary, warn,
+           or_conn_end_reason_to_string(reason));
+  control_event_client_status(LOG_WARN,
+      "BOOTSTRAP PROGRESS=%d TAG=%s SUMMARY=\"%s\" WARNING=\"%s\" %s",
+      bootstrap_percent, tag, summary, warn,
+      or_conn_end_reason_to_string(reason));
 }
 

+ 2 - 1
src/or/directory.c

@@ -2186,7 +2186,8 @@ note_client_request(int purpose, int compressed, size_t bytes)
     case DIR_PURPOSE_FETCH_CONSENSUS:     kind = "dl/consensus"; break;
     case DIR_PURPOSE_FETCH_CERTIFICATE:   kind = "dl/cert"; break;
     case DIR_PURPOSE_FETCH_STATUS_VOTE:   kind = "dl/vote"; break;
-    case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: kind = "dl/detached_sig"; break;
+    case DIR_PURPOSE_FETCH_DETACHED_SIGNATURES: kind = "dl/detached_sig";
+         break;
     case DIR_PURPOSE_FETCH_SERVERDESC:    kind = "dl/server"; break;
     case DIR_PURPOSE_FETCH_EXTRAINFO:     kind = "dl/extra"; break;
     case DIR_PURPOSE_UPLOAD_DIR:          kind = "dl/ul-dir"; break;

+ 5 - 1
src/or/or.h

@@ -905,6 +905,9 @@ typedef struct or_connection_t {
 
   tor_tls_t *tls; /**< TLS connection state. */
   int tls_error; /**< Last tor_tls error code. */
+  /* XXX either merge this with tls_error, or do all our activity right
+   * when we compute this value so we don't have to store it. */
+  int socket_error; /**< If conn dies, remember why. */
   /** When we last used this conn for any client traffic. If not
    * recent, we can rate limit it further. */
   time_t client_used;
@@ -3035,7 +3038,8 @@ typedef enum {
   BOOTSTRAP_STATUS_DONE=100
 } bootstrap_status_t;
 
-int control_event_bootstrap(bootstrap_status_t status, int percent);
+void control_event_bootstrap(bootstrap_status_t status, int progress);
+void control_event_bootstrap_problem(const char *warn, int reason);
 
 #ifdef CONTROL_PRIVATE
 /* Used only by control.c and test.c */