Browse Source

Merge remote-tracking branch 'ahf-oniongit/bugs/24605'

Nick Mathewson 6 years ago
parent
commit
7ca0a9c4ae
7 changed files with 118 additions and 2 deletions
  1. 4 0
      changes/bug24605
  2. 5 0
      doc/tor.1.txt
  3. 6 1
      src/or/config.c
  4. 82 1
      src/or/main.c
  5. 5 0
      src/or/main.h
  6. 2 0
      src/or/or.h
  7. 14 0
      src/or/status.c

+ 4 - 0
changes/bug24605

@@ -0,0 +1,4 @@
+  o Minor features (instrumentation):
+    - Add the MainloopStats option to Tor that allows developers to get
+      instrumentation information from the main event loop via the heartbeat
+      messages. Closes ticket 24605.

+ 5 - 0
doc/tor.1.txt

@@ -2032,6 +2032,11 @@ is non-zero):
     to 0 will disable the heartbeat. Otherwise, it must be at least 30
     minutes. (Default: 6 hours)
 
+[[MainloopStats]] **MainloopStats** **0**|**1**::
+    Log main loop statistics every **HeartbeatPeriod** seconds. This is a log
+    level __notice__ message designed to help developers instrumenting Tor's
+    main event loop. (Default: 0)
+
 [[AccountingMax]] **AccountingMax** __N__ **bytes**|**KBytes**|**MBytes**|**GBytes**|**TBytes**|**KBits**|**MBits**|**GBits**|**TBits**::
     Limits the max number of bytes sent and received within a set time period
     using a given calculation rule (see: AccountingStart, AccountingRule).

+ 6 - 1
src/or/config.c

@@ -18,7 +18,7 @@
  * inspecting values that are calculated as a result of the
  * configured options.
  *
- * <h3>How to add  new options</h3>
+ * <h3>How to add new options</h3>
  *
  * To add new items to the torrc, there are a minimum of three places to edit:
  * <ul>
@@ -360,6 +360,7 @@ static config_var_t option_vars_[] = {
   V(GuardLifetime,               INTERVAL, "0 minutes"),
   V(HardwareAccel,               BOOL,     "0"),
   V(HeartbeatPeriod,             INTERVAL, "6 hours"),
+  V(MainloopStats,               BOOL,     "0"),
   V(AccelName,                   STRING,   NULL),
   V(AccelDir,                    FILENAME, NULL),
   V(HashedControlPassword,       LINELIST, NULL),
@@ -2157,6 +2158,10 @@ options_act(const or_options_t *old_options)
     if (options->PerConnBWRate != old_options->PerConnBWRate ||
         options->PerConnBWBurst != old_options->PerConnBWBurst)
       connection_or_update_token_buckets(get_connection_array(), options);
+
+    if (options->MainloopStats != old_options->MainloopStats) {
+      reset_main_loop_counters();
+    }
   }
 
   /* Only collect directory-request statistics on relays and bridges. */

+ 82 - 1
src/or/main.c

@@ -179,6 +179,12 @@ static uint64_t stats_n_bytes_written = 0;
 time_t time_of_process_start = 0;
 /** How many seconds have we been running? */
 long stats_n_seconds_working = 0;
+/** How many times have we returned from the main loop successfully? */
+static uint64_t stats_n_main_loop_successes = 0;
+/** How many times have we received an error from the main loop? */
+static uint64_t stats_n_main_loop_errors = 0;
+/** How many times have we returned from the main loop with no events. */
+static uint64_t stats_n_main_loop_idle = 0;
 
 /** How often will we honor SIGNEWNYM requests? */
 #define MAX_SIGNEWNYM_RATE 10
@@ -493,6 +499,57 @@ connection_is_reading(connection_t *conn)
     (conn->read_event && event_pending(conn->read_event, EV_READ, NULL));
 }
 
+/** Reset our main loop counters. */
+void
+reset_main_loop_counters(void)
+{
+  stats_n_main_loop_successes = 0;
+  stats_n_main_loop_errors = 0;
+  stats_n_main_loop_idle = 0;
+}
+
+/** Increment the main loop success counter. */
+static void
+increment_main_loop_success_count(void)
+{
+  ++stats_n_main_loop_successes;
+}
+
+/** Get the main loop success counter. */
+uint64_t
+get_main_loop_success_count(void)
+{
+  return stats_n_main_loop_successes;
+}
+
+/** Increment the main loop error counter. */
+static void
+increment_main_loop_error_count(void)
+{
+  ++stats_n_main_loop_errors;
+}
+
+/** Get the main loop error counter. */
+uint64_t
+get_main_loop_error_count(void)
+{
+  return stats_n_main_loop_errors;
+}
+
+/** Increment the main loop idle counter. */
+static void
+increment_main_loop_idle_count(void)
+{
+  ++stats_n_main_loop_idle;
+}
+
+/** Get the main loop idle counter. */
+uint64_t
+get_main_loop_idle_count(void)
+{
+  return stats_n_main_loop_idle;
+}
+
 /** Check whether <b>conn</b> is correct in having (or not having) a
  * read/write event (passed in <b>ev</b>). On success, return 0. On failure,
  * log a warning and return -1. */
@@ -2693,6 +2750,8 @@ run_main_loop_once(void)
   if (main_loop_should_exit)
     return 0;
 
+  const or_options_t *options = get_options();
+
 #ifndef _WIN32
   /* Make it easier to tell whether libevent failure is our fault or not. */
   errno = 0;
@@ -2702,7 +2761,14 @@ run_main_loop_once(void)
    * so that libevent knows to run their callbacks. */
   SMARTLIST_FOREACH(active_linked_connection_lst, connection_t *, conn,
                     event_active(conn->read_event, EV_READ, 1));
-  called_loop_once = smartlist_len(active_linked_connection_lst) ? 1 : 0;
+
+  if (options->MainloopStats) {
+    /* We always enforce that EVLOOP_ONCE is passed to event_base_loop() if we
+     * are collecting main loop statistics. */
+    called_loop_once = 1;
+  } else {
+    called_loop_once = smartlist_len(active_linked_connection_lst) ? 1 : 0;
+  }
 
   /* Make sure we know (about) what time it is. */
   update_approx_time(time(NULL));
@@ -2714,6 +2780,21 @@ run_main_loop_once(void)
   loop_result = event_base_loop(tor_libevent_get_base(),
                                 called_loop_once ? EVLOOP_ONCE : 0);
 
+  if (options->MainloopStats) {
+    /* Update our main loop counters. */
+    if (loop_result == 0) {
+      // The call was succesful.
+      increment_main_loop_success_count();
+    } else if (loop_result == -1) {
+      // The call was erroneous.
+      increment_main_loop_error_count();
+    } else if (loop_result == 1) {
+      // The call didn't have any active or pending events
+      // to handle.
+      increment_main_loop_idle_count();
+    }
+  }
+
   /* Oh, the loop failed.  That might be an error that we need to
    * catch, but more likely, it's just an interrupted poll() call or something,
    * and we should try again. */

+ 5 - 0
src/or/main.h

@@ -81,6 +81,11 @@ void tor_free_all(int postfork);
 int do_main_loop(void);
 int tor_init(int argc, char **argv);
 
+void reset_main_loop_counters(void);
+uint64_t get_main_loop_success_count(void);
+uint64_t get_main_loop_error_count(void);
+uint64_t get_main_loop_idle_count(void);
+
 extern time_t time_of_process_start;
 extern long stats_n_seconds_working;
 extern int quiet_level;

+ 2 - 0
src/or/or.h

@@ -3972,6 +3972,8 @@ typedef struct {
 
   int HeartbeatPeriod; /**< Log heartbeat messages after this many seconds
                         * have passed. */
+  int MainloopStats; /**< Log main loop statistics as part of the
+                      * heartbeat messages. */
 
   char *HTTPProxy; /**< hostname[:port] to use as http proxy, if any. */
   tor_addr_t HTTPProxyAddr; /**< Parsed IPv4 addr for http proxy, if any. */

+ 14 - 0
src/or/status.c

@@ -157,6 +157,20 @@ log_heartbeat(time_t now)
     tor_free(msg);
   }
 
+  if (options->MainloopStats) {
+    const uint64_t main_loop_success_count = get_main_loop_success_count();
+    const uint64_t main_loop_error_count = get_main_loop_error_count();
+    const uint64_t main_loop_idle_count = get_main_loop_idle_count();
+
+    log_fn(LOG_NOTICE, LD_HEARTBEAT, "Main event loop statistics: "
+         U64_FORMAT " succesful returns, "
+         U64_FORMAT " erroneous returns, and "
+         U64_FORMAT " idle returns.",
+         U64_PRINTF_ARG(main_loop_success_count),
+         U64_PRINTF_ARG(main_loop_error_count),
+         U64_PRINTF_ARG(main_loop_idle_count));
+  }
+
   tor_free(uptime);
   tor_free(bw_sent);
   tor_free(bw_rcvd);