Browse Source

Merge commit 'origin/maint-0.2.1'

Nick Mathewson 16 years ago
parent
commit
9f28cfe86a
7 changed files with 56 additions and 39 deletions
  1. 12 0
      ChangeLog
  2. 3 3
      src/or/dirserv.c
  3. 1 1
      src/or/eventdns.c
  4. 5 4
      src/or/main.c
  5. 3 2
      src/or/or.h
  6. 18 2
      src/or/rephist.c
  7. 14 27
      src/or/routerlist.c

+ 12 - 0
ChangeLog

@@ -40,9 +40,21 @@ Changes in version 0.2.2.1-alpha - 2009-??-??
 
 
 Changes in version 0.2.1.17-?? - 2009-??-??
+  o Major bugfixes:
+    - Directory authorities were neglecting to mark relays down in their
+      internal histories if the relays fall off the routerlist without
+      ever being found unreachable. So there were relays in the histories
+      that haven't been seen for eight months, and are listed as being
+      up for eight months. This wreaked havoc on the "median wfu"
+      and "median mtbf" calculations, in turn making Guard and Stable
+      flags very wrong, hurting network performance. Fixes bugs 696 and
+      969. Bugfix on 0.2.0.6-alpha.
+
   o Minor bugfixes:
     - Serve the DirPortFrontPage page even when we have been approaching
       our quotas recently.  Fixes bug 1013; bugfix on 0.2.1.8-alpha.
+    - Do not cap bandwidths reported by directory authorities; they are
+      already adjusted to reflect reality.
 
   o Major features:
     - Clients now use the bandwidth values in the consensus, rather than

+ 3 - 3
src/or/dirserv.c

@@ -797,7 +797,7 @@ directory_remove_invalid(void)
     if (r & FP_REJECT) {
       log_info(LD_DIRSERV, "Router '%s' is now rejected: %s",
                ent->nickname, msg?msg:"");
-      routerlist_remove(rl, ent, 0);
+      routerlist_remove(rl, ent, 0, time(NULL));
       i--;
       changed = 1;
       continue;
@@ -951,8 +951,8 @@ dirserv_set_router_is_running(routerinfo_t *router, time_t now)
     answer = get_options()->AssumeReachable ||
              now < router->last_reachable + REACHABLE_TIMEOUT;
 
-  if (router->is_running && !answer) {
-    /* it was running but now it's not. tell rephist. */
+  if (!answer) {
+    /* not considered reachable. tell rephist. */
     rep_hist_note_router_unreachable(router->cache_info.identity_digest, now);
   }
 

+ 1 - 1
src/or/eventdns.c

@@ -1792,7 +1792,7 @@ evdns_server_request_format_response(struct server_request *req, int err)
 	if (j > 512) {
 overflow:
 		j = 512;
-		buf[3] |= 0x02; /* set the truncated bit. */
+		buf[2] |= 0x02; /* set the truncated bit. */
 	}
 
 	req->response_len = (size_t)j;

+ 5 - 4
src/or/main.c

@@ -925,7 +925,7 @@ run_scheduled_events(time_t now)
     time_to_downrate_stability = rep_hist_downrate_old_runs(now);
   if (authdir_mode_tests_reachability(options)) {
     if (time_to_save_stability < now) {
-      if (time_to_save_stability && rep_hist_record_mtbf_data()<0) {
+      if (time_to_save_stability && rep_hist_record_mtbf_data(now, 1)<0) {
         log_warn(LD_GENERAL, "Couldn't store mtbf data.");
       }
 #define SAVE_STABILITY_INTERVAL (30*60)
@@ -1970,14 +1970,15 @@ tor_cleanup(void)
   /* Remove our pid file. We don't care if there was an error when we
    * unlink, nothing we could do about it anyways. */
   if (options->command == CMD_RUN_TOR) {
+    time_t now = time(NULL);
     if (options->PidFile)
       unlink(options->PidFile);
     if (accounting_is_enabled(options))
-      accounting_record_bandwidth_usage(time(NULL), get_or_state());
+      accounting_record_bandwidth_usage(now, get_or_state());
     or_state_mark_dirty(get_or_state(), 0); /* force an immediate save. */
-    or_state_save(time(NULL));
+    or_state_save(now);
     if (authdir_mode_tests_reachability(options))
-      rep_hist_record_mtbf_data();
+      rep_hist_record_mtbf_data(now, 0);
   }
 #ifdef USE_DMALLOC
   dmalloc_log_stats();

+ 3 - 2
src/or/or.h

@@ -3969,7 +3969,7 @@ void rep_history_clean(time_t before);
 
 void rep_hist_note_router_reachable(const char *id, time_t when);
 void rep_hist_note_router_unreachable(const char *id, time_t when);
-int rep_hist_record_mtbf_data(void);
+int rep_hist_record_mtbf_data(time_t now, int missing_means_down);
 int rep_hist_load_mtbf_data(time_t now);
 
 time_t rep_hist_downrate_old_runs(time_t now);
@@ -4405,7 +4405,8 @@ void routerinfo_free(routerinfo_t *router);
 void extrainfo_free(extrainfo_t *extrainfo);
 void routerlist_free(routerlist_t *rl);
 void dump_routerlist_mem_usage(int severity);
-void routerlist_remove(routerlist_t *rl, routerinfo_t *ri, int make_old);
+void routerlist_remove(routerlist_t *rl, routerinfo_t *ri, int make_old,
+                       time_t now);
 void routerlist_free_all(void);
 void routerlist_reset_warnings(void);
 void router_set_status(const char *digest, int up);

+ 18 - 2
src/or/rephist.c

@@ -683,9 +683,13 @@ rep_history_clean(time_t before)
   }
 }
 
-/** Write MTBF data to disk.  Returns 0 on success, negative on failure. */
+/** Write MTBF data to disk. Return 0 on success, negative on failure.
+ *
+ * If <b>missing_means_down</b>, then if we're about to write an entry
+ * that is still considered up but isn't in our routerlist, consider it
+ * to be down. */
 int
-rep_hist_record_mtbf_data(void)
+rep_hist_record_mtbf_data(time_t now, int missing_means_down)
 {
   char time_buf[ISO_TIME_LEN+1];
 
@@ -745,6 +749,18 @@ rep_hist_record_mtbf_data(void)
     hist = (or_history_t*) or_history_p;
 
     base16_encode(dbuf, sizeof(dbuf), digest, DIGEST_LEN);
+
+    if (missing_means_down && hist->start_of_run &&
+        !router_get_by_digest(digest)) {
+      /* We think this relay is running, but it's not listed in our
+       * routerlist. Somehow it fell out without telling us it went
+       * down. Complain and also correct it. */
+      log_info(LD_HIST,
+               "Relay '%s' is listed as up in rephist, but it's not in "
+               "our routerlist. Correcting.", dbuf);
+      rep_hist_note_router_unreachable(digest, now);
+    }
+
     PRINTF((f, "R %s\n", dbuf));
     if (hist->start_of_run > 0) {
       format_iso_time(time_buf, hist->start_of_run);

+ 14 - 27
src/or/routerlist.c

@@ -1523,15 +1523,12 @@ router_get_advertised_bandwidth_capped(routerinfo_t *router)
   return result;
 }
 
-/** Eventually, the number we return will come from the directory
- * consensus, so clients can dynamically update to better numbers.
- *
- * But for now, or in case there is no consensus available, just return
- * a sufficient default. */
-static uint32_t
-get_max_believable_bandwidth(void)
+/** Return bw*1000, unless bw*1000 would overflow, in which case return
+ * INT32_MAX. */
+static INLINE int32_t
+kb_to_bytes(uint32_t bw)
 {
-  return DEFAULT_MAX_BELIEVABLE_BANDWIDTH;
+  return (bw > (INT32_MAX/1000)) ? INT32_MAX : bw*1000;
 }
 
 /** Helper function:
@@ -1568,7 +1565,6 @@ smartlist_choose_by_bandwidth(smartlist_t *sl, bandwidth_weight_rule_t rule,
   int n_unknown = 0;
   bitarray_t *exit_bits;
   bitarray_t *guard_bits;
-  uint32_t max_believable_bw = get_max_believable_bandwidth();
   int me_idx = -1;
 
   /* Can't choose exit and guard at same time */
@@ -1598,7 +1594,7 @@ smartlist_choose_by_bandwidth(smartlist_t *sl, bandwidth_weight_rule_t rule,
       is_exit = status->is_exit;
       is_guard = status->is_possible_guard;
       if (status->has_bandwidth) {
-        this_bw = status->bandwidth*1000;
+        this_bw = kb_to_bytes(status->bandwidth);
       } else { /* guess */
         /* XXX022 once consensuses always list bandwidths, we can take
          * this guessing business out. -RD */
@@ -1617,7 +1613,7 @@ smartlist_choose_by_bandwidth(smartlist_t *sl, bandwidth_weight_rule_t rule,
       is_exit = router->is_exit;
       is_guard = router->is_possible_guard;
       if (rs && rs->has_bandwidth) {
-        this_bw = rs->bandwidth*1000;
+        this_bw = kb_to_bytes(rs->bandwidth);
       } else if (rs) { /* guess; don't trust the descriptor */
         /* XXX022 once consensuses always list bandwidths, we can take
          * this guessing business out. -RD */
@@ -1626,27 +1622,15 @@ smartlist_choose_by_bandwidth(smartlist_t *sl, bandwidth_weight_rule_t rule,
         flags |= is_exit ? 2 : 0;
         flags |= is_guard ? 4 : 0;
       } else /* bridge or other descriptor not in our consensus */
-        this_bw = router_get_advertised_bandwidth(router);
+        this_bw = router_get_advertised_bandwidth_capped(router);
     }
     if (is_exit)
       bitarray_set(exit_bits, i);
     if (is_guard)
       bitarray_set(guard_bits, i);
-    /* if they claim something huge, don't believe it */
-    if (this_bw > max_believable_bw) {
-      char fp[HEX_DIGEST_LEN+1];
-      base16_encode(fp, sizeof(fp), statuses ?
-                      status->identity_digest :
-                      router->cache_info.identity_digest,
-                    DIGEST_LEN);
-      log_fn(LOG_PROTOCOL_WARN, LD_DIR,
-             "Bandwidth %d for router %s (%s) exceeds allowed max %d, capping",
-             this_bw, router ? router->nickname : "(null)",
-             fp, max_believable_bw);
-      this_bw = max_believable_bw;
-    }
     if (is_known) {
       bandwidths[i] = (int32_t) this_bw; // safe since MAX_BELIEVABLE<INT32_MAX
+      tor_assert(bandwidths[i] >= 0);
       if (is_guard)
         total_guard_bw += this_bw;
       else
@@ -2645,7 +2629,7 @@ routerlist_insert_old(routerlist_t *rl, routerinfo_t *ri)
  * If <b>make_old</b> is true, instead of deleting the router, we try adding
  * it to rl-&gt;old_routers. */
 void
-routerlist_remove(routerlist_t *rl, routerinfo_t *ri, int make_old)
+routerlist_remove(routerlist_t *rl, routerinfo_t *ri, int make_old, time_t now)
 {
   routerinfo_t *ri_tmp;
   extrainfo_t *ei_tmp;
@@ -2653,6 +2637,9 @@ routerlist_remove(routerlist_t *rl, routerinfo_t *ri, int make_old)
   tor_assert(0 <= idx && idx < smartlist_len(rl->routers));
   tor_assert(smartlist_get(rl->routers, idx) == ri);
 
+  /* make sure the rephist module knows that it's not running */
+  rep_hist_note_router_unreachable(ri->cache_info.identity_digest, now);
+
   ri->cache_info.routerlist_index = -1;
   smartlist_del(rl->routers, idx);
   if (idx < smartlist_len(rl->routers)) {
@@ -3344,7 +3331,7 @@ routerlist_remove_old_routers(void)
         log_info(LD_DIR,
                  "Forgetting obsolete (too old) routerinfo for router '%s'",
                  router->nickname);
-        routerlist_remove(routerlist, router, 1);
+        routerlist_remove(routerlist, router, 1, now);
         i--;
       }
     }