Parcourir la source

Merge remote-tracking branch 'public/bug15942_v2_alternative'

Nick Mathewson il y a 8 ans
Parent
commit
703254a832
9 fichiers modifiés avec 219 ajouts et 20 suppressions
  1. 3 0
      changes/bug15942
  2. 118 11
      src/or/directory.c
  3. 6 0
      src/or/directory.h
  4. 1 0
      src/or/entrynodes.c
  5. 4 4
      src/or/networkstatus.c
  6. 18 0
      src/or/or.h
  7. 3 0
      src/or/routerlist.c
  8. 2 0
      src/or/routerparse.c
  9. 64 5
      src/test/test_dir.c

+ 3 - 0
changes/bug15942

@@ -0,0 +1,3 @@
+  o Bugfixes (downloading):
+    - Use random exponential backoffs when retrying downloads from the dir
+      servers. Fixes bug 15942; bugfix on ?????.

+ 118 - 11
src/or/directory.c

@@ -3763,17 +3763,84 @@ find_dl_schedule(download_status_t *dls, const or_options_t *options)
   return NULL;
 }
 
-/* Find the current delay for dls based on schedule.
- * Set dls->next_attempt_at based on now, and return the delay.
+/** Decide which minimum and maximum delay step we want to use based on
+ * descriptor type in <b>dls</b> and <b>options</b>.
+ * Helper function for download_status_schedule_get_delay(). */
+STATIC void
+find_dl_min_and_max_delay(download_status_t *dls, const or_options_t *options,
+                          int *min, int *max)
+{
+  tor_assert(dls);
+  tor_assert(options);
+  tor_assert(min);
+  tor_assert(max);
+
+  /*
+   * For now, just use the existing schedule config stuff and pick the
+   * first/last entries off to get min/max delay for backoff purposes
+   */
+  const smartlist_t *schedule = find_dl_schedule(dls, options);
+  tor_assert(schedule != NULL && smartlist_len(schedule) >= 2);
+  *min = *((int *)(smartlist_get(schedule, 0)));
+  *max = *((int *)((smartlist_get(schedule, smartlist_len(schedule) - 1))));
+}
+
+/** Advance one delay step.  The algorithm is to use the previous delay to
+ * compute an increment, we construct a value uniformly at random between
+ * delay and MAX(delay*2,delay+1).  We then clamp that value to be no larger
+ * than max_delay, and return it.
+ *
+ * Requires that delay is less than INT_MAX, and delay is in [0,max_delay].
+ */
+STATIC int
+next_random_exponential_delay(int delay, int max_delay)
+{
+  /* Check preconditions */
+  if (BUG(delay > max_delay))
+    delay = max_delay;
+  if (BUG(delay == INT_MAX))
+    delay -= 1; /* prevent overflow */
+  if (BUG(delay < 0))
+    delay = 0;
+
+  /* How much are we willing to add to the delay? */
+  int max_increment;
+
+  if (delay)
+    max_increment = delay; /* no more than double. */
+  else
+    max_increment = 1; /* we're always willing to slow down a little. */
+
+  /* the + 1 here is so that we include the end of the interval */
+  int increment = crypto_rand_int(max_increment+1);
+
+  if (increment < max_delay - delay)
+    return delay + increment;
+  else
+    return max_delay;
+}
+
+/** Find the current delay for dls based on schedule or min_delay/
+ * max_delay if we're using exponential backoff.  If dls->backoff is
+ * DL_SCHED_RANDOM_EXPONENTIAL, we must have 0 <= min_delay <= max_delay <=
+ * INT_MAX, but schedule may be set to NULL; otherwise schedule is required.
+ * This function sets dls->next_attempt_at based on now, and returns the delay.
  * Helper for download_status_increment_failure and
  * download_status_increment_attempt. */
 STATIC int
 download_status_schedule_get_delay(download_status_t *dls,
                                    const smartlist_t *schedule,
+                                   int min_delay, int max_delay,
                                    time_t now)
 {
   tor_assert(dls);
-  tor_assert(schedule);
+  /* We don't need a schedule if we're using random exponential backoff */
+  tor_assert(dls->backoff == DL_SCHED_RANDOM_EXPONENTIAL ||
+             schedule != NULL);
+  /* If we're using random exponential backoff, we do need min/max delay */
+  tor_assert(dls->backoff != DL_SCHED_RANDOM_EXPONENTIAL ||
+             (min_delay >= 0 && max_delay >= min_delay &&
+              max_delay <= INT_MAX));
 
   int delay = INT_MAX;
   uint8_t dls_schedule_position = (dls->increment_on
@@ -3781,12 +3848,42 @@ download_status_schedule_get_delay(download_status_t *dls,
                                    ? dls->n_download_attempts
                                    : dls->n_download_failures);
 
-  if (dls_schedule_position < smartlist_len(schedule))
-    delay = *(int *)smartlist_get(schedule, dls_schedule_position);
-  else if (dls_schedule_position == IMPOSSIBLE_TO_DOWNLOAD)
-    delay = INT_MAX;
-  else
-    delay = *(int *)smartlist_get(schedule, smartlist_len(schedule) - 1);
+  if (dls->backoff == DL_SCHED_DETERMINISTIC) {
+    if (dls_schedule_position < smartlist_len(schedule))
+      delay = *(int *)smartlist_get(schedule, dls_schedule_position);
+    else if (dls_schedule_position == IMPOSSIBLE_TO_DOWNLOAD)
+      delay = INT_MAX;
+    else
+      delay = *(int *)smartlist_get(schedule, smartlist_len(schedule) - 1);
+  } else if (dls->backoff == DL_SCHED_RANDOM_EXPONENTIAL) {
+    /* Check if we missed a reset somehow */
+    if (dls->last_backoff_position > dls_schedule_position) {
+      dls->last_backoff_position = 0;
+      dls->last_delay_used = 0;
+    }
+
+    if (dls_schedule_position > 0) {
+      delay = dls->last_delay_used;
+
+      while (dls->last_backoff_position < dls_schedule_position) {
+        /* Do one increment step */
+        delay = next_random_exponential_delay(delay, max_delay);
+        /* Update our position */
+        ++(dls->last_backoff_position);
+      }
+    } else {
+      /* If we're just starting out, use the minimum delay */
+      delay = min_delay;
+    }
+
+    /* Clamp it within min/max if we have them */
+    if (min_delay >= 0 && delay < min_delay) delay = min_delay;
+    if (max_delay != INT_MAX && delay > max_delay) delay = max_delay;
+
+    /* Store it for next time */
+    dls->last_backoff_position = dls_schedule_position;
+    dls->last_delay_used = delay;
+  }
 
   /* A negative delay makes no sense. Knowing that delay is
    * non-negative allows us to safely do the wrapping check below. */
@@ -3847,6 +3944,8 @@ download_status_increment_failure(download_status_t *dls, int status_code,
                                   const char *item, int server, time_t now)
 {
   int increment = -1;
+  int min_delay = 0, max_delay = INT_MAX;
+
   tor_assert(dls);
 
   /* only count the failure if it's permanent, or we're a server */
@@ -3867,7 +3966,9 @@ download_status_increment_failure(download_status_t *dls, int status_code,
     /* only return a failure retry time if this schedule increments on failures
      */
     const smartlist_t *schedule = find_dl_schedule(dls, get_options());
-    increment = download_status_schedule_get_delay(dls, schedule, now);
+    find_dl_min_and_max_delay(dls, get_options(), &min_delay, &max_delay);
+    increment = download_status_schedule_get_delay(dls, schedule,
+                                                   min_delay, max_delay, now);
   }
 
   download_status_log_helper(item, !dls->increment_on, "failed",
@@ -3896,6 +3997,8 @@ download_status_increment_attempt(download_status_t *dls, const char *item,
                                   time_t now)
 {
   int delay = -1;
+  int min_delay = 0, max_delay = INT_MAX;
+
   tor_assert(dls);
 
   if (dls->increment_on == DL_SCHED_INCREMENT_FAILURE) {
@@ -3910,7 +4013,9 @@ download_status_increment_attempt(download_status_t *dls, const char *item,
     ++dls->n_download_attempts;
 
   const smartlist_t *schedule = find_dl_schedule(dls, get_options());
-  delay = download_status_schedule_get_delay(dls, schedule, now);
+  find_dl_min_and_max_delay(dls, get_options(), &min_delay, &max_delay);
+  delay = download_status_schedule_get_delay(dls, schedule,
+                                             min_delay, max_delay, now);
 
   download_status_log_helper(item, dls->increment_on, "attempted",
                              "on failure", dls->n_download_attempts,
@@ -3942,6 +4047,8 @@ download_status_reset(download_status_t *dls)
   dls->n_download_failures = 0;
   dls->n_download_attempts = 0;
   dls->next_attempt_at = time(NULL) + *(int *)smartlist_get(schedule, 0);
+  dls->last_backoff_position = 0;
+  dls->last_delay_used = 0;
   /* Don't reset dls->want_authority or dls->increment_on */
 }
 

+ 6 - 0
src/or/directory.h

@@ -146,6 +146,7 @@ STATIC int directory_handle_command_get(dir_connection_t *conn,
                                         size_t req_body_len);
 STATIC int download_status_schedule_get_delay(download_status_t *dls,
                                               const smartlist_t *schedule,
+                                              int min_delay, int max_delay,
                                               time_t now);
 
 STATIC char* authdir_type_to_string(dirinfo_type_t auth);
@@ -154,6 +155,11 @@ STATIC int should_use_directory_guards(const or_options_t *options);
 STATIC zlib_compression_level_t choose_compression_level(ssize_t n_bytes);
 STATIC const smartlist_t *find_dl_schedule(download_status_t *dls,
                                            const or_options_t *options);
+STATIC void find_dl_min_and_max_delay(download_status_t *dls,
+                                      const or_options_t *options,
+                                      int *min, int *max);
+STATIC int next_random_exponential_delay(int delay, int max_delay);
+
 #endif
 
 #endif

+ 1 - 0
src/or/entrynodes.c

@@ -2033,6 +2033,7 @@ bridge_add_from_config(bridge_line_t *bridge_line)
   if (bridge_line->transport_name)
     b->transport_name = bridge_line->transport_name;
   b->fetch_status.schedule = DL_SCHED_BRIDGE;
+  b->fetch_status.backoff = DL_SCHED_RANDOM_EXPONENTIAL;
   b->socks_args = bridge_line->socks_args;
   if (!bridge_list)
     bridge_list = smartlist_new();

+ 4 - 4
src/or/networkstatus.c

@@ -86,9 +86,9 @@ static time_t time_to_download_next_consensus[N_CONSENSUS_FLAVORS];
 static download_status_t consensus_dl_status[N_CONSENSUS_FLAVORS] =
   {
     { 0, 0, 0, DL_SCHED_CONSENSUS, DL_WANT_ANY_DIRSERVER,
-                                   DL_SCHED_INCREMENT_FAILURE },
+      DL_SCHED_INCREMENT_FAILURE, DL_SCHED_RANDOM_EXPONENTIAL, 0, 0 },
     { 0, 0, 0, DL_SCHED_CONSENSUS, DL_WANT_ANY_DIRSERVER,
-                                   DL_SCHED_INCREMENT_FAILURE },
+      DL_SCHED_INCREMENT_FAILURE, DL_SCHED_RANDOM_EXPONENTIAL, 0, 0 },
   };
 
 #define N_CONSENSUS_BOOTSTRAP_SCHEDULES 2
@@ -105,10 +105,10 @@ static download_status_t
               consensus_bootstrap_dl_status[N_CONSENSUS_BOOTSTRAP_SCHEDULES] =
   {
     { 0, 0, 0, DL_SCHED_CONSENSUS, DL_WANT_AUTHORITY,
-                                   DL_SCHED_INCREMENT_ATTEMPT },
+      DL_SCHED_INCREMENT_ATTEMPT, DL_SCHED_RANDOM_EXPONENTIAL, 0, 0 },
     /* During bootstrap, DL_WANT_ANY_DIRSERVER means "use fallbacks". */
     { 0, 0, 0, DL_SCHED_CONSENSUS, DL_WANT_ANY_DIRSERVER,
-                                   DL_SCHED_INCREMENT_ATTEMPT },
+      DL_SCHED_INCREMENT_ATTEMPT, DL_SCHED_RANDOM_EXPONENTIAL, 0, 0 },
   };
 
 /** True iff we have logged a warning about this OR's version being older than

+ 18 - 0
src/or/or.h

@@ -1987,6 +1987,15 @@ typedef enum {
 #define download_schedule_increment_bitfield_t \
                                         ENUM_BF(download_schedule_increment_t)
 
+/** Enumeration: do we want to use the random exponential backoff
+ * mechanism? */
+typedef enum {
+  DL_SCHED_DETERMINISTIC = 0,
+  DL_SCHED_RANDOM_EXPONENTIAL = 1,
+} download_schedule_backoff_t;
+#define download_schedule_backoff_bitfield_t \
+                                        ENUM_BF(download_schedule_backoff_t)
+
 /** Information about our plans for retrying downloads for a downloadable
  * directory object.
  * Each type of downloadable directory object has a corresponding retry
@@ -2033,6 +2042,15 @@ typedef struct download_status_t {
   download_schedule_increment_bitfield_t increment_on : 1; /**< does this
                                         * schedule increment on each attempt,
                                         * or after each failure? */
+  download_schedule_backoff_bitfield_t backoff : 1; /**< do we use the
+                                        * deterministic schedule, or random
+                                        * exponential backoffs? */
+  uint8_t last_backoff_position; /**< number of attempts/failures, depending
+                                  * on increment_on, when we last recalculated
+                                  * the delay.  Only updated if backoff
+                                  * == 1. */
+  int last_delay_used; /**< last delay used for random exponential backoff;
+                        * only updated if backoff == 1 */
 } download_status_t;
 
 /** If n_download_failures is this high, the download can never happen. */

+ 3 - 0
src/or/routerlist.c

@@ -159,6 +159,9 @@ download_status_cert_init(download_status_t *dlstatus)
   dlstatus->schedule = DL_SCHED_CONSENSUS;
   dlstatus->want_authority = DL_WANT_ANY_DIRSERVER;
   dlstatus->increment_on = DL_SCHED_INCREMENT_FAILURE;
+  dlstatus->backoff = DL_SCHED_RANDOM_EXPONENTIAL;
+  dlstatus->last_backoff_position = 0;
+  dlstatus->last_delay_used = 0;
 
   /* Use the new schedule to set next_attempt_at */
   download_status_reset(dlstatus);

+ 2 - 0
src/or/routerparse.c

@@ -3209,6 +3209,8 @@ networkstatus_parse_vote_from_string(const char *s, const char **eos_out,
                                                      NULL, NULL,
                                                      ns->consensus_method,
                                                      flav)))
+        /* Use exponential-backoff scheduling when downloading microdescs */
+        rs->dl_status.backoff = DL_SCHED_RANDOM_EXPONENTIAL;
         smartlist_add(ns->routerstatus_list, rs);
     }
   }

+ 64 - 5
src/test/test_dir.c

@@ -3333,13 +3333,16 @@ test_dir_download_status_schedule(void *arg)
   (void)arg;
   download_status_t dls_failure = { 0, 0, 0, DL_SCHED_GENERIC,
                                              DL_WANT_AUTHORITY,
-                                             DL_SCHED_INCREMENT_FAILURE };
+                                             DL_SCHED_INCREMENT_FAILURE,
+                                             DL_SCHED_DETERMINISTIC, 0, 0 };
   download_status_t dls_attempt = { 0, 0, 0, DL_SCHED_CONSENSUS,
                                              DL_WANT_ANY_DIRSERVER,
-                                             DL_SCHED_INCREMENT_ATTEMPT};
+                                             DL_SCHED_INCREMENT_ATTEMPT,
+                                             DL_SCHED_DETERMINISTIC, 0, 0 };
   download_status_t dls_bridge  = { 0, 0, 0, DL_SCHED_BRIDGE,
                                              DL_WANT_AUTHORITY,
-                                             DL_SCHED_INCREMENT_FAILURE};
+                                             DL_SCHED_INCREMENT_FAILURE,
+                                             DL_SCHED_DETERMINISTIC, 0, 0 };
   int increment = -1;
   int expected_increment = -1;
   time_t current_time = time(NULL);
@@ -3355,6 +3358,7 @@ test_dir_download_status_schedule(void *arg)
   delay1 = 1000;
   increment = download_status_schedule_get_delay(&dls_failure,
                                                  schedule,
+                                                 0, INT_MAX,
                                                  TIME_MIN);
   expected_increment = delay1;
   tt_assert(increment == expected_increment);
@@ -3363,6 +3367,7 @@ test_dir_download_status_schedule(void *arg)
   delay1 = INT_MAX;
   increment =  download_status_schedule_get_delay(&dls_failure,
                                                   schedule,
+                                                  0, INT_MAX,
                                                   -1);
   expected_increment = delay1;
   tt_assert(increment == expected_increment);
@@ -3371,6 +3376,7 @@ test_dir_download_status_schedule(void *arg)
   delay1 = 0;
   increment = download_status_schedule_get_delay(&dls_attempt,
                                                  schedule,
+                                                 0, INT_MAX,
                                                  0);
   expected_increment = delay1;
   tt_assert(increment == expected_increment);
@@ -3379,6 +3385,7 @@ test_dir_download_status_schedule(void *arg)
   delay1 = 1000;
   increment = download_status_schedule_get_delay(&dls_attempt,
                                                  schedule,
+                                                 0, INT_MAX,
                                                  1);
   expected_increment = delay1;
   tt_assert(increment == expected_increment);
@@ -3387,6 +3394,7 @@ test_dir_download_status_schedule(void *arg)
   delay1 = INT_MAX;
   increment = download_status_schedule_get_delay(&dls_bridge,
                                                  schedule,
+                                                 0, INT_MAX,
                                                  current_time);
   expected_increment = delay1;
   tt_assert(increment == expected_increment);
@@ -3395,6 +3403,7 @@ test_dir_download_status_schedule(void *arg)
   delay1 = 1;
   increment = download_status_schedule_get_delay(&dls_bridge,
                                                  schedule,
+                                                 0, INT_MAX,
                                                  TIME_MAX);
   expected_increment = delay1;
   tt_assert(increment == expected_increment);
@@ -3407,6 +3416,7 @@ test_dir_download_status_schedule(void *arg)
   delay2 = 100;
   increment = download_status_schedule_get_delay(&dls_attempt,
                                                  schedule,
+                                                 0, INT_MAX,
                                                  current_time);
   expected_increment = delay2;
   tt_assert(increment == expected_increment);
@@ -3415,6 +3425,7 @@ test_dir_download_status_schedule(void *arg)
   delay2 = 1;
   increment = download_status_schedule_get_delay(&dls_bridge,
                                                  schedule,
+                                                 0, INT_MAX,
                                                  current_time);
   expected_increment = delay2;
   tt_assert(increment == expected_increment);
@@ -3427,6 +3438,7 @@ test_dir_download_status_schedule(void *arg)
   delay2 = 5;
   increment = download_status_schedule_get_delay(&dls_attempt,
                                                  schedule,
+                                                 0, INT_MAX,
                                                  current_time);
   expected_increment = delay2;
   tt_assert(increment == expected_increment);
@@ -3435,6 +3447,7 @@ test_dir_download_status_schedule(void *arg)
   delay2 = 17;
   increment = download_status_schedule_get_delay(&dls_bridge,
                                                  schedule,
+                                                 0, INT_MAX,
                                                  current_time);
   expected_increment = delay2;
   tt_assert(increment == expected_increment);
@@ -3447,6 +3460,7 @@ test_dir_download_status_schedule(void *arg)
   delay2 = 35;
   increment = download_status_schedule_get_delay(&dls_attempt,
                                                  schedule,
+                                                 0, INT_MAX,
                                                  current_time);
   expected_increment = INT_MAX;
   tt_assert(increment == expected_increment);
@@ -3455,6 +3469,7 @@ test_dir_download_status_schedule(void *arg)
   delay2 = 99;
   increment = download_status_schedule_get_delay(&dls_bridge,
                                                  schedule,
+                                                 0, INT_MAX,
                                                  current_time);
   expected_increment = INT_MAX;
   tt_assert(increment == expected_increment);
@@ -3465,16 +3480,59 @@ test_dir_download_status_schedule(void *arg)
   smartlist_free(schedule);
 }
 
+static void
+test_dir_download_status_random_backoff(void *arg)
+{
+  download_status_t dls_random =
+    { 0, 0, 0, DL_SCHED_GENERIC, DL_WANT_AUTHORITY,
+               DL_SCHED_INCREMENT_FAILURE, DL_SCHED_RANDOM_EXPONENTIAL, 0, 0 };
+  int increment = -1;
+  int old_increment;
+  time_t current_time = time(NULL);
+  const int min_delay = 0;
+  const int max_delay = 1000000;
+
+  (void)arg;
+
+  /* Check the random backoff cases */
+  old_increment = 0;
+  do {
+    increment = download_status_schedule_get_delay(&dls_random,
+                                                   NULL,
+                                                   min_delay, max_delay,
+                                                   current_time);
+    /* Test */
+    tt_int_op(increment, OP_GE, min_delay);
+    tt_int_op(increment, OP_LE, max_delay);
+    tt_int_op(increment, OP_GE, old_increment);
+    /* We at most double, and maybe add one */
+    tt_int_op(increment, OP_LE, 2 * old_increment + 1);
+
+    /* Advance */
+    current_time += increment;
+    ++(dls_random.n_download_attempts);
+    ++(dls_random.n_download_failures);
+
+    /* Try another maybe */
+    old_increment = increment;
+  } while (increment < max_delay);
+
+ done:
+  return;
+}
+
 static void
 test_dir_download_status_increment(void *arg)
 {
   (void)arg;
   download_status_t dls_failure = { 0, 0, 0, DL_SCHED_GENERIC,
     DL_WANT_AUTHORITY,
-    DL_SCHED_INCREMENT_FAILURE };
+    DL_SCHED_INCREMENT_FAILURE,
+    DL_SCHED_DETERMINISTIC, 0, 0 };
   download_status_t dls_attempt = { 0, 0, 0, DL_SCHED_BRIDGE,
     DL_WANT_ANY_DIRSERVER,
-    DL_SCHED_INCREMENT_ATTEMPT};
+    DL_SCHED_INCREMENT_ATTEMPT,
+    DL_SCHED_DETERMINISTIC, 0, 0 };
   int delay0 = -1;
   int delay1 = -1;
   int delay2 = -1;
@@ -4243,6 +4301,7 @@ struct testcase_t dir_tests[] = {
   DIR(fetch_type, 0),
   DIR(packages, 0),
   DIR(download_status_schedule, 0),
+  DIR(download_status_random_backoff, 0),
   DIR(download_status_increment, 0),
   DIR(authdir_type_to_string, 0),
   DIR(conn_purpose_to_string, 0),