瀏覽代碼

Merge branch 'bug5956_squashed'

Nick Mathewson 11 年之前
父節點
當前提交
29136bd7e4
共有 7 個文件被更改,包括 219 次插入63 次删除
  1. 8 0
      changes/feature5956
  2. 12 0
      doc/tor.1.txt
  3. 13 0
      src/or/config.c
  4. 104 48
      src/or/nodelist.c
  5. 3 0
      src/or/or.h
  6. 76 14
      src/or/routerlist.c
  7. 3 1
      src/or/routerlist.h

+ 8 - 0
changes/feature5956

@@ -0,0 +1,8 @@
+  o Major features:
+    - When deciding whether we have enough descriptors to build circuits,
+      instead of looking at raw circuit counts, look at which fraction of
+      (bandwidth-weighted) paths we're able to build. This approach keeps
+      clients from building circuits if their paths are likely to stand out
+      statistically. The default fraction of paths needed is taken from the
+      consensus directory; you can override it with the new
+      PathsNeededToBuildCircuits option.  Fixes issue 5956.

+ 12 - 0
doc/tor.1.txt

@@ -1299,6 +1299,18 @@ The following options are useful only for clients (that is, if
     things may influence the choice. This option breaks a tie to the
     favor of IPv6. (Default: 0)
 
+**PathsNeededToBuildCircuits** __NUM__::
+    Tor clients don't build circuits for user traffic until they know
+    about enough of the network so that they could potentially construct
+    enough of the possible paths through the network. If this option
+    is set to a fraction between 0.25 and 0.95, Tor won't build circuits
+    until it has enough descriptors or microdescriptors to construct
+    that fraction of possible paths. Note that setting this option too low
+    can make your Tor client less anonymous, and setting it too high can
+    prevent your Tor client from bootstrapping.  If this option is negative,
+    Tor will use a default value chosen by the directory
+    authorities. (Default: -1.)
+
 
 SERVER OPTIONS
 --------------

+ 13 - 0
src/or/config.c

@@ -331,6 +331,7 @@ static config_var_t option_vars_[] = {
   V(PathBiasExtremeUseRate,         DOUBLE,   "-1"),
   V(PathBiasScaleUseThreshold,      INT,      "-1"),
 
+  V(PathsNeededToBuildCircuits,  DOUBLE,   "-1"),
   OBSOLETE("PathlenCoinWeight"),
   V(PerConnBWBurst,              MEMUNIT,  "0"),
   V(PerConnBWRate,               MEMUNIT,  "0"),
@@ -2392,6 +2393,18 @@ options_validate(or_options_t *old_options, or_options_t *options,
     return -1;
   }
 
+  if (options->PathsNeededToBuildCircuits >= 0.0) {
+    if (options->PathsNeededToBuildCircuits < 0.25) {
+      log_warn(LD_CONFIG, "PathsNeededToBuildCircuits is too low. Increasing "
+               "to 0.25");
+      options->PathsNeededToBuildCircuits = 0.25;
+    } else if (options->PathsNeededToBuildCircuits < 0.95) {
+      log_warn(LD_CONFIG, "PathsNeededToBuildCircuits is too high. Decreasing "
+               "to 0.95");
+      options->PathsNeededToBuildCircuits = 0.95;
+    }
+  }
+
   if (options->MaxClientCircuitsPending <= 0 ||
       options->MaxClientCircuitsPending > MAX_MAX_CLIENT_CIRCUITS_PENDING) {
     tor_asprintf(msg,

+ 104 - 48
src/or/nodelist.c

@@ -1213,7 +1213,7 @@ static int have_min_dir_info = 0;
 static int need_to_update_have_min_dir_info = 1;
 /** String describing what we're missing before we have enough directory
  * info. */
-static char dir_info_status[128] = "";
+static char dir_info_status[256] = "";
 
 /** Return true iff we have enough networkstatus and router information to
  * start building circuits.  Right now, this means "more than half the
@@ -1253,10 +1253,12 @@ get_dir_info_status_string(void)
  * descriptors for.  Store the former in *<b>num_usable</b> and the latter in
  * *<b>num_present</b>.  If <b>in_set</b> is non-NULL, only consider those
  * routers in <b>in_set</b>.  If <b>exit_only</b> is true, only consider nodes
- * with the Exit flag.
+ * with the Exit flag.  If *descs_out is present, add a node_t for each
+ * usable descriptor to it.
  */
 static void
 count_usable_descriptors(int *num_present, int *num_usable,
+                         smartlist_t *descs_out,
                          const networkstatus_t *consensus,
                          const or_options_t *options, time_t now,
                          routerset_t *in_set, int exit_only)
@@ -1266,6 +1268,10 @@ count_usable_descriptors(int *num_present, int *num_usable,
 
   SMARTLIST_FOREACH_BEGIN(consensus->routerstatus_list, routerstatus_t *, rs)
     {
+       const node_t *node = node_get_by_id(rs->identity_digest);
+       if (!node)
+         continue; /* This would be a bug: every entry in the consensus is
+                    * supposed to have a node. */
        if (exit_only && ! rs->is_exit)
          continue;
        if (in_set && ! routerset_contains_routerstatus(in_set, rs, -1))
@@ -1282,6 +1288,8 @@ count_usable_descriptors(int *num_present, int *num_usable,
            /* we have the descriptor listed in the consensus. */
            ++*num_present;
          }
+         if (descs_out)
+           smartlist_add(descs_out, (node_t*)node);
        }
      }
   SMARTLIST_FOREACH_END(rs);
@@ -1291,6 +1299,66 @@ count_usable_descriptors(int *num_present, int *num_usable,
             md ? "microdesc" : "desc", exit_only ? " exits" : "s");
 }
 
+/** Return an extimate of which fraction of usable paths through the Tor
+ * network we have available for use. */
+static double
+compute_frac_paths_available(const networkstatus_t *consensus,
+                             const or_options_t *options, time_t now,
+                             int *num_present_out, int *num_usable_out,
+                             char **status_out)
+{
+  smartlist_t *guards = smartlist_new();
+  smartlist_t *mid    = smartlist_new();
+  smartlist_t *exits  = smartlist_new();
+  smartlist_t *myexits= smartlist_new();
+  double f_guard, f_mid, f_exit, f_myexit;
+  int np, nu; /* Ignored */
+
+  count_usable_descriptors(num_present_out, num_usable_out,
+                           mid, consensus, options, now, NULL, 0);
+  if (options->EntryNodes) {
+    count_usable_descriptors(&np, &nu, guards, consensus, options, now,
+                             options->EntryNodes, 0);
+  } else {
+    SMARTLIST_FOREACH(mid, const node_t *, node, {
+      if (node->is_possible_guard)
+        smartlist_add(guards, (node_t*)node);
+    });
+  }
+
+  count_usable_descriptors(&np, &nu, exits, consensus, options, now,
+                           NULL, 1);
+  count_usable_descriptors(&np, &nu, myexits, consensus, options, now,
+                           options->ExitNodes, 1);
+
+  f_guard = frac_nodes_with_descriptors(guards, WEIGHT_FOR_GUARD);
+  f_mid   = frac_nodes_with_descriptors(mid,    WEIGHT_FOR_MID);
+  f_exit  = frac_nodes_with_descriptors(exits,  WEIGHT_FOR_EXIT);
+  f_myexit= frac_nodes_with_descriptors(myexits,WEIGHT_FOR_EXIT);
+
+  smartlist_free(guards);
+  smartlist_free(mid);
+  smartlist_free(exits);
+  smartlist_free(myexits);
+
+  /* This is a tricky point here: we don't want to make it easy for a
+   * directory to trickle exits to us until it learns which exits we have
+   * configured, so require that we have a threshold both of total exits
+   * and usable exits. */
+  if (f_myexit < f_exit)
+    f_exit = f_myexit;
+
+  tor_asprintf(status_out,
+               "%02d%% of guards bw, "
+               "%02d%% of midpoint bw, and "
+               "%02d%% of exit bw",
+               (int)(f_guard*100),
+               (int)(f_mid*100),
+               (int)(f_exit*100));
+
+  return f_guard * f_mid * f_exit;
+}
+
 /** We just fetched a new set of descriptors. Compute how far through
  * the "loading descriptors" bootstrapping phase we are, so we can inform
  * the controller of our progress. */
@@ -1306,7 +1374,7 @@ count_loading_descriptors_progress(void)
   if (!consensus)
     return 0; /* can't count descriptors if we have no list of them */
 
-  count_usable_descriptors(&num_present, &num_usable,
+  count_usable_descriptors(&num_present, &num_usable, NULL,
                            consensus, get_options(), now, NULL, 0);
 
   if (num_usable == 0)
@@ -1319,14 +1387,28 @@ count_loading_descriptors_progress(void)
                BOOTSTRAP_STATUS_LOADING_DESCRIPTORS));
 }
 
+/** Return the fraction of paths needed before we're willing to build
+ * circuits, as configured in <b>options</b>, or in the consensus <b>ns</b>. */
+static double
+get_frac_paths_needed_for_circs(const or_options_t *options,
+                                const networkstatus_t *ns)
+{
+#define DFLT_PCT_USABLE_NEEDED 60
+  if (options->PathsNeededToBuildCircuits >= 1.0) {
+    return options->PathsNeededToBuildCircuits;
+  } else {
+    return networkstatus_get_param(ns, "min_paths_for_circs_pct",
+                                   DFLT_PCT_USABLE_NEEDED,
+                                   25, 95)/100.0;
+  }
+}
+
 /** Change the value of have_min_dir_info, setting it true iff we have enough
  * network and router information to build circuits.  Clear the value of
  * need_to_update_have_min_dir_info. */
 static void
 update_router_have_minimum_dir_info(void)
 {
-  int num_present = 0, num_usable=0;
-  int num_exit_present = 0, num_exit_usable = 0;
   time_t now = time(NULL);
   int res;
   const or_options_t *options = get_options();
@@ -1355,55 +1437,29 @@ update_router_have_minimum_dir_info(void)
 
   using_md = consensus->flavor == FLAV_MICRODESC;
 
-  count_usable_descriptors(&num_present, &num_usable, consensus, options, now,
-                           NULL, 0);
-  count_usable_descriptors(&num_exit_present, &num_exit_usable,
-                           consensus, options, now, options->ExitNodes, 1);
-
-/* What fraction of desired server descriptors do we need before we will
- * build circuits? */
-#define FRAC_USABLE_NEEDED .75
-/* What fraction of desired _exit_ server descriptors do we need before we
- * will build circuits? */
-#define FRAC_EXIT_USABLE_NEEDED .5
-
-  if (num_present < num_usable * FRAC_USABLE_NEEDED) {
-    tor_snprintf(dir_info_status, sizeof(dir_info_status),
-                 "We have only %d/%d usable %sdescriptors.",
-                 num_present, num_usable, using_md ? "micro" : "");
-    res = 0;
-    control_event_bootstrap(BOOTSTRAP_STATUS_REQUESTING_DESCRIPTORS, 0);
-    goto done;
-  } else if (num_present < 2) {
-    tor_snprintf(dir_info_status, sizeof(dir_info_status),
-                 "Only %d %sdescriptor%s here and believed reachable!",
-                 num_present, using_md ? "micro" : "", num_present ? "" : "s");
-    res = 0;
-    goto done;
-  } else if (num_exit_present < num_exit_usable * FRAC_EXIT_USABLE_NEEDED) {
-    tor_snprintf(dir_info_status, sizeof(dir_info_status),
-                 "We have only %d/%d usable exit node descriptors.",
-                 num_exit_present, num_exit_usable);
-    res = 0;
-    control_event_bootstrap(BOOTSTRAP_STATUS_REQUESTING_DESCRIPTORS, 0);
-    goto done;
-  }
-
-  /* Check for entry nodes. */
-  if (options->EntryNodes) {
-    count_usable_descriptors(&num_present, &num_usable, consensus, options,
-                             now, options->EntryNodes, 0);
+  {
+    char *status = NULL;
+    int num_present=0, num_usable=0;
+    double paths = compute_frac_paths_available(consensus, options, now,
+                                                &num_present, &num_usable,
+                                                &status);
 
-    if (!num_usable || !num_present) {
+    if (paths < get_frac_paths_needed_for_circs(options,consensus)) {
       tor_snprintf(dir_info_status, sizeof(dir_info_status),
-                   "We have only %d/%d usable entry node %sdescriptors.",
-                   num_present, num_usable, using_md?"micro":"");
+                   "We need more %sdescriptors: we have %d/%d, and "
+                   "can only build %02d%% of likely paths. (We have %s.)",
+                   using_md?"micro":"", num_present, num_usable,
+                   (int)(paths*100), status);
+      /* log_notice(LD_NET, "%s", dir_info_status); */
+      tor_free(status);
       res = 0;
+      control_event_bootstrap(BOOTSTRAP_STATUS_REQUESTING_DESCRIPTORS, 0);
       goto done;
     }
-  }
 
-  res = 1;
+    tor_free(status);
+    res = 1;
+  }
 
  done:
   if (res && !have_min_dir_info) {

+ 3 - 0
src/or/or.h

@@ -3946,6 +3946,9 @@ typedef struct {
 
   /** Autobool: should we use the ntor handshake if we can? */
   int UseNTorHandshake;
+
+  /** Fraction: */
+  double PathsNeededToBuildCircuits;
 } or_options_t;
 
 /** Persistent state for an onion router, as saved to disk. */

+ 76 - 14
src/or/routerlist.c

@@ -42,6 +42,9 @@
 /****************************************************************************/
 
 /* static function prototypes */
+static int compute_weighted_bandwidths(const smartlist_t *sl,
+                                       bandwidth_weight_rule_t rule,
+                                       u64_dbl_t **bandwidths_out);
 static const routerstatus_t *router_pick_directory_server_impl(
                                            dirinfo_type_t auth, int flags);
 static const routerstatus_t *router_pick_trusteddirserver_impl(
@@ -1681,8 +1684,34 @@ kb_to_bytes(uint32_t bw)
  * guards proportionally less.
  */
 static const node_t *
-smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl,
+smartlist_choose_node_by_bandwidth_weights(const smartlist_t *sl,
                                            bandwidth_weight_rule_t rule)
+{
+  u64_dbl_t *bandwidths=NULL;
+
+  if (compute_weighted_bandwidths(sl, rule, &bandwidths) < 0)
+    return NULL;
+
+  scale_array_elements_to_u64(bandwidths, smartlist_len(sl),
+                              &sl_last_total_weighted_bw);
+
+  {
+    int idx = choose_array_element_by_weight(bandwidths,
+                                             smartlist_len(sl));
+    tor_free(bandwidths);
+    return idx < 0 ? NULL : smartlist_get(sl, idx);
+  }
+}
+
+/** Given a list of routers and a weighting rule as in
+ * smartlist_choose_node_by_bandwidth_weights, compute weighted bandwidth
+ * values for each node and store them in a freshly allocated
+ * *<b>bandwidths_out</b> of the same length as <b>sl</b>, and holding results
+ * as doubles. Return 0 on success, -1 on failure. */
+static int
+compute_weighted_bandwidths(const smartlist_t *sl,
+                            bandwidth_weight_rule_t rule,
+                            u64_dbl_t **bandwidths_out)
 {
   int64_t weight_scale;
   double Wg = -1, Wm = -1, We = -1, Wd = -1;
@@ -1702,7 +1731,7 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl,
              "Empty routerlist passed in to consensus weight node "
              "selection for rule %s",
              bandwidth_weight_rule_to_string(rule));
-    return NULL;
+    return -1;
   }
 
   weight_scale = circuit_build_times_get_bw_scale(NULL);
@@ -1756,7 +1785,7 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl,
     log_debug(LD_CIRC,
               "Got negative bandwidth weights. Defaulting to old selection"
               " algorithm.");
-    return NULL; // Use old algorithm.
+    return -1; // Use old algorithm.
   }
 
   Wg /= weight_scale;
@@ -1786,7 +1815,7 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl,
         log_warn(LD_BUG,
                  "Consensus is not listing bandwidths. Defaulting back to "
                  "old router selection algorithm.");
-        return NULL;
+        return -1;
       }
       this_bw = kb_to_bytes(node->rs->bandwidth);
     } else if (node->ri) {
@@ -1819,20 +1848,53 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl,
       sl_last_weighted_bw_of_me = (uint64_t) bandwidths[node_sl_idx].dbl;
   } SMARTLIST_FOREACH_END(node);
 
-  log_debug(LD_CIRC, "Choosing node for rule %s based on weights "
+  log_debug(LD_CIRC, "Generated weighted bandwidths for rule %s based "
+            "on weights "
             "Wg=%f Wm=%f We=%f Wd=%f with total bw "U64_FORMAT,
             bandwidth_weight_rule_to_string(rule),
             Wg, Wm, We, Wd, U64_PRINTF_ARG(weighted_bw));
 
-  scale_array_elements_to_u64(bandwidths, smartlist_len(sl),
-                              &sl_last_total_weighted_bw);
+  *bandwidths_out = bandwidths;
 
-  {
-    int idx = choose_array_element_by_weight(bandwidths,
-                                             smartlist_len(sl));
-    tor_free(bandwidths);
-    return idx < 0 ? NULL : smartlist_get(sl, idx);
+  return 0;
+}
+
+/** For all nodes in <b>sl</b>, return the fraction of those nodes, weighted
+ * by their weighted bandwidths with rule <b>rule</b>, for which we have
+ * descriptors. */
+double
+frac_nodes_with_descriptors(const smartlist_t *sl,
+                            bandwidth_weight_rule_t rule)
+{
+  u64_dbl_t *bandwidths = NULL;
+  double total, present;
+
+  if (smartlist_len(sl) == 0)
+    return 0.0;
+
+  if (compute_weighted_bandwidths(sl, rule, &bandwidths) < 0) {
+    int n_with_descs = 0;
+    SMARTLIST_FOREACH(sl, const node_t *, node, {
+      if (node_has_descriptor(node))
+        n_with_descs++;
+    });
+    return ((double)n_with_descs) / (double)smartlist_len(sl);
   }
+
+  total = present = 0.0;
+  SMARTLIST_FOREACH_BEGIN(sl, const node_t *, node) {
+    const double bw = bandwidths[node_sl_idx].dbl;
+    total += bw;
+    if (node_has_descriptor(node))
+      present += bw;
+  } SMARTLIST_FOREACH_END(node);
+
+  tor_free(bandwidths);
+
+  if (total < 1.0)
+    return 0;
+
+  return present / total;
 }
 
 /** Helper function:
@@ -1849,7 +1911,7 @@ smartlist_choose_node_by_bandwidth_weights(smartlist_t *sl,
  * guards proportionally less.
  */
 static const node_t *
-smartlist_choose_node_by_bandwidth(smartlist_t *sl,
+smartlist_choose_node_by_bandwidth(const smartlist_t *sl,
                                    bandwidth_weight_rule_t rule)
 {
   unsigned int i;
@@ -2055,7 +2117,7 @@ smartlist_choose_node_by_bandwidth(smartlist_t *sl,
 /** Choose a random element of status list <b>sl</b>, weighted by
  * the advertised bandwidth of each node */
 const node_t *
-node_sl_choose_by_bandwidth(smartlist_t *sl,
+node_sl_choose_by_bandwidth(const smartlist_t *sl,
                             bandwidth_weight_rule_t rule)
 { /*XXXX MOVE */
   const node_t *ret;

+ 3 - 1
src/or/routerlist.h

@@ -47,8 +47,10 @@ const routerinfo_t *routerlist_find_my_routerinfo(void);
 uint32_t router_get_advertised_bandwidth(const routerinfo_t *router);
 uint32_t router_get_advertised_bandwidth_capped(const routerinfo_t *router);
 
-const node_t *node_sl_choose_by_bandwidth(smartlist_t *sl,
+const node_t *node_sl_choose_by_bandwidth(const smartlist_t *sl,
                                           bandwidth_weight_rule_t rule);
+double frac_nodes_with_descriptors(const smartlist_t *sl,
+                                   bandwidth_weight_rule_t rule);
 
 const node_t *router_choose_random_node(smartlist_t *excludedsmartlist,
                                         struct routerset_t *excludedset,