Browse Source

Replace our random-exponential-delay algorithm.

This patch has implementations of the "decorrelated" and "full"
algorithms from https://www.awsarchitectureblog.com/2015/03/backoff.html
Nick Mathewson 6 years ago
parent
commit
cb29687e93
4 changed files with 98 additions and 37 deletions
  1. 6 0
      changes/bug23816
  2. 46 31
      src/or/directory.c
  3. 8 2
      src/or/directory.h
  4. 38 4
      src/test/test_dir.c

+ 6 - 0
changes/bug23816

@@ -0,0 +1,6 @@
+  o Minor bugfixes (directory client):
+    - On failure to download directory information, delay retry attempts
+      by a random amount based on the "decorrelated jitter" algorithm.
+      Our previous delay algorithm tended to produce extra-long delays too
+      easily. Fixes bug 23816; bugfix on 0.2.9.1-alpha.
+

+ 46 - 31
src/or/directory.c

@@ -3776,53 +3776,68 @@ find_dl_min_and_max_delay(download_status_t *dls, const or_options_t *options,
     *max = INT_MAX;
 }
 
-/** Advance one delay step.  The algorithm is to use the previous delay to
- * compute an increment, we construct a value uniformly at random between
- * delay and MAX(delay*2,delay+1).  We then clamp that value to be no larger
- * than max_delay, and return it.
+/** As next_random_exponential_delay() below, but does not compute a random
+ * value. Instead, compute the range of values that
+ * next_random_exponential_delay() should use when computing its random value.
+ * Store the low bound into *<b>low_bound_out</b>, and the high bound into
+ * *<b>high_bound_out</b>.  Guarantees that the low bound is strictly less
+ * than the high bound. */
+STATIC void
+next_random_exponential_delay_range(int *low_bound_out,
+                                    int *high_bound_out,
+                                    int delay,
+                                    int base_delay)
+{
+  // This is the "decorrelated jitter" approach, from
+  //    https://www.awsarchitectureblog.com/2015/03/backoff.html
+  // The formula is
+  //    sleep = min(cap, random_between(base, sleep * 3))
+
+  const int delay_times_3 = delay < INT_MAX/3 ? delay * 3 : INT_MAX;
+  *low_bound_out = base_delay;
+  if (delay_times_3 > base_delay) {
+    *high_bound_out = delay_times_3;
+  } else {
+    *high_bound_out = base_delay+1;
+  }
+}
+
+/** Advance one delay step.  The algorithm will generate a random delay,
+ * such that each failure is possibly (random) longer than the ones before.
+ *
+ * We then clamp that value to be no larger than max_delay, and return it.
+ *
+ * The <b>base_delay</b> parameter is lowest possible delay time (can't be
+ * zero); the <b>backoff_position</b> parameter is the number of times we've
+ * generated a delay; and the <b>delay</b> argument is the most recently used
+ * delay.
  *
  * Requires that delay is less than INT_MAX, and delay is in [0,max_delay].
  */
 STATIC int
-next_random_exponential_delay(int delay, int max_delay)
+next_random_exponential_delay(int delay,
+                              int base_delay,
+                              int max_delay)
 {
   /* Check preconditions */
   if (BUG(max_delay < 0))
     max_delay = 0;
   if (BUG(delay > max_delay))
     delay = max_delay;
-  if (delay == INT_MAX)
-    return INT_MAX; /* prevent overflow */
   if (BUG(delay < 0))
     delay = 0;
 
-  /* How much are we willing to add to the delay? */
-  int max_increment;
-  int multiplier = 3; /* no more than quadruple the previous delay */
-  if (get_options()->TestingTorNetwork) {
-    /* Decrease the multiplier in testing networks. This reduces the variance,
-     * so that bootstrap is more reliable. */
-    multiplier = 2; /* no more than triple the previous delay */
-  }
+  if (base_delay < 1)
+    base_delay = 1;
 
-  if (delay && delay < (INT_MAX-1) / multiplier) {
-    max_increment = delay * multiplier;
-  } else if (delay) {
-    max_increment = INT_MAX-1;
-  } else {
-    max_increment = 1;
-  }
+  int low_bound=0, high_bound=max_delay;
 
-  if (BUG(max_increment < 1))
-    max_increment = 1;
+  next_random_exponential_delay_range(&low_bound, &high_bound,
+                                      delay, base_delay);
 
-  /* the + 1 here is so that we always wait longer than last time. */
-  int increment = crypto_rand_int(max_increment)+1;
+  int rand_delay = crypto_rand_int_range(low_bound, high_bound);
 
-  if (increment < max_delay - delay)
-    return delay + increment;
-  else
-    return max_delay;
+  return MIN(rand_delay, max_delay);
 }
 
 /** Find the current delay for dls based on schedule or min_delay/
@@ -3871,7 +3886,7 @@ download_status_schedule_get_delay(download_status_t *dls,
 
       while (dls->last_backoff_position < dls_schedule_position) {
         /* Do one increment step */
-        delay = next_random_exponential_delay(delay, max_delay);
+        delay = next_random_exponential_delay(delay, min_delay, max_delay);
         /* Update our position */
         ++(dls->last_backoff_position);
       }

+ 8 - 2
src/or/directory.h

@@ -167,8 +167,14 @@ STATIC const smartlist_t *find_dl_schedule(download_status_t *dls,
 STATIC void find_dl_min_and_max_delay(download_status_t *dls,
                                       const or_options_t *options,
                                       int *min, int *max);
-STATIC int next_random_exponential_delay(int delay, int max_delay);
-
+STATIC int next_random_exponential_delay(int delay,
+                                         int base_delay,
+                                         int max_delay);
+
+STATIC void next_random_exponential_delay_range(int *low_bound_out,
+                                                int *high_bound_out,
+                                                int delay,
+                                                int base_delay);
 #endif
 
 #endif

+ 38 - 4
src/test/test_dir.c

@@ -3617,6 +3617,7 @@ test_dir_download_status_random_backoff(void *arg)
 
   /* Check the random backoff cases */
   old_increment = 0;
+  int n_attempts = 0;
   do {
     increment = download_status_schedule_get_delay(&dls_random,
                                                    NULL,
@@ -3625,9 +3626,9 @@ test_dir_download_status_random_backoff(void *arg)
     /* Test */
     tt_int_op(increment, OP_GE, min_delay);
     tt_int_op(increment, OP_LE, max_delay);
-    tt_int_op(increment, OP_GE, old_increment);
-    /* We at most quadruple, and maybe add one */
-    tt_int_op(increment, OP_LE, 4 * old_increment + 1);
+
+    if (old_increment)
+      tt_int_op(increment, OP_LE, old_increment * 3);
 
     /* Advance */
     current_time += increment;
@@ -3636,12 +3637,44 @@ test_dir_download_status_random_backoff(void *arg)
 
     /* Try another maybe */
     old_increment = increment;
-  } while (increment < max_delay);
+  } while (increment < max_delay && ++n_attempts < 1000);
 
  done:
   return;
 }
 
+static void
+test_dir_download_status_random_backoff_ranges(void *arg)
+{
+  (void)arg;
+  int lo, hi;
+  next_random_exponential_delay_range(&lo, &hi, 0, 10);
+  tt_int_op(lo, OP_EQ, 10);
+  tt_int_op(hi, OP_EQ, 11);
+
+  next_random_exponential_delay_range(&lo, &hi, 6, 10);
+  tt_int_op(lo, OP_EQ, 10);
+  tt_int_op(hi, OP_EQ, 6*3);
+
+  next_random_exponential_delay_range(&lo, &hi, 13, 10);
+  tt_int_op(lo, OP_EQ, 10);
+  tt_int_op(hi, OP_EQ, 13 * 3);
+
+  next_random_exponential_delay_range(&lo, &hi, 37, 10);
+  tt_int_op(lo, OP_EQ, 10);
+  tt_int_op(hi, OP_EQ, 111);
+
+  next_random_exponential_delay_range(&lo, &hi, 123, 10);
+  tt_int_op(lo, OP_EQ, 10);
+  tt_int_op(hi, OP_EQ, 369);
+
+  next_random_exponential_delay_range(&lo, &hi, INT_MAX-5, 10);
+  tt_int_op(lo, OP_EQ, 10);
+  tt_int_op(hi, OP_EQ, INT_MAX);
+ done:
+  ;
+}
+
 static void
 test_dir_download_status_increment(void *arg)
 {
@@ -5475,6 +5508,7 @@ struct testcase_t dir_tests[] = {
   DIR(packages, 0),
   DIR(download_status_schedule, 0),
   DIR(download_status_random_backoff, 0),
+  DIR(download_status_random_backoff_ranges, 0),
   DIR(download_status_increment, 0),
   DIR(authdir_type_to_string, 0),
   DIR(conn_purpose_to_string, 0),