Browse Source

Merge remote-tracking branch 'tor-github/pr/705'

Nick Mathewson 5 years ago
parent
commit
58fd864a85
4 changed files with 326 additions and 179 deletions
  1. 6 0
      changes/bug29298
  2. 162 96
      src/core/or/circuitpadding.c
  3. 74 29
      src/core/or/circuitpadding.h
  4. 84 54
      src/test/test_circuitpadding.c

+ 6 - 0
changes/bug29298

@@ -0,0 +1,6 @@
+  o Minor features (circuit padding):
+    - Allow the padding machine designer to pick the edges of their histogram
+      instead of trying to compute them automatically using an exponential
+      formula. Resolves some undefined behavior in the case of small histograms
+      and allows greater flexibility on machine design. Closes ticket 29298;
+      bugfix on 0.4.0.1-alpha.

+ 162 - 96
src/core/or/circuitpadding.c

@@ -224,25 +224,20 @@ circpad_machine_current_state(const circpad_machine_state_t *mi)
 }
 
 /**
- * Calculate the lower bound of a histogram bin. The upper bound
- * is obtained by calling this function with bin+1, and subtracting 1.
+ * Get the lower bound of a histogram bin.
  *
- * The 0th bin has a special value -- it only represents start_usec.
- * This is so we can specify a probability on 0-delay values.
+ * You can obtain the upper bound using histogram_get_bin_upper_bound().
  *
- * After bin 0, bins are exponentially spaced, so that each subsequent
- * bin is twice as large as the previous. This is done so that higher
- * time resolution is given to lower time values.
- *
- * The infinity bin is a the last bin in the array (histogram_len-1).
- * It has a usec value of CIRCPAD_DELAY_INFINITE (UINT32_MAX).
+ * This function can also be called with 'bin' set to a value equal or greater
+ * than histogram_len in which case the infinity bin is chosen and
+ * CIRCPAD_DELAY_INFINITE is returned.
  */
 STATIC circpad_delay_t
 circpad_histogram_bin_to_usec(const circpad_machine_state_t *mi,
                               circpad_hist_index_t bin)
 {
   const circpad_state_t *state = circpad_machine_current_state(mi);
-  circpad_delay_t start_usec;
+  circpad_delay_t rtt_add_usec = 0;
 
   /* Our state should have been checked to be non-null by the caller
    * (circpad_machine_remove_token()) */
@@ -250,27 +245,29 @@ circpad_histogram_bin_to_usec(const circpad_machine_state_t *mi,
     return CIRCPAD_DELAY_INFINITE;
   }
 
-  if (state->use_rtt_estimate)
-    start_usec = mi->rtt_estimate_usec+state->start_usec;
-  else
-    start_usec = state->start_usec;
-
-  if (bin >= CIRCPAD_INFINITY_BIN(state))
+  /* The infinity bin has an upper bound of infinity, so make sure we return
+   * that if they ask for it. */
+  if (bin > CIRCPAD_INFINITY_BIN(mi)) {
     return CIRCPAD_DELAY_INFINITE;
+  }
 
-  if (bin == 0)
-    return start_usec;
+  /* If we are using an RTT estimate, consider it as well. */
+  if (state->use_rtt_estimate) {
+    rtt_add_usec = mi->rtt_estimate_usec;
+  }
 
-  if (bin == 1)
-    return start_usec+1;
+  return state->histogram_edges[bin] + rtt_add_usec;
+}
 
-  /* The bin widths double every index, so that we can have more resolution
-   * for lower time values in the histogram. */
-  const circpad_time_t bin_width_exponent =
-        1 << (CIRCPAD_INFINITY_BIN(state) - bin);
-  return (circpad_delay_t)MIN(start_usec +
-                              state->range_usec/bin_width_exponent,
-                              CIRCPAD_DELAY_INFINITE);
+/**
+ * Like circpad_histogram_bin_to_usec() but return the upper bound of bin.
+ * (The upper bound is included in the bin.)
+ */
+STATIC circpad_delay_t
+histogram_get_bin_upper_bound(const circpad_machine_state_t *mi,
+                              circpad_hist_index_t bin)
+{
+  return circpad_histogram_bin_to_usec(mi, bin+1) - 1;
 }
 
 /** Return the midpoint of the histogram bin <b>bin_index</b>. */
@@ -279,8 +276,7 @@ circpad_get_histogram_bin_midpoint(const circpad_machine_state_t *mi,
                            int bin_index)
 {
   circpad_delay_t left_bound = circpad_histogram_bin_to_usec(mi, bin_index);
-  circpad_delay_t right_bound =
-    circpad_histogram_bin_to_usec(mi, bin_index+1)-1;
+  circpad_delay_t right_bound = histogram_get_bin_upper_bound(mi, bin_index);
 
   return left_bound + (right_bound - left_bound)/2;
 }
@@ -289,19 +285,17 @@ circpad_get_histogram_bin_midpoint(const circpad_machine_state_t *mi,
  * Return the bin that contains the usec argument.
  * "Contains" is defined as us in [lower, upper).
  *
- * This function will never return the infinity bin (histogram_len-1),
- * in order to simplify the rest of the code.
- *
- * This means that technically the last bin (histogram_len-2)
- * has range [start_usec+range_usec, CIRCPAD_DELAY_INFINITE].
+ * This function will never return the infinity bin (histogram_len-1), in order
+ * to simplify the rest of the code, so if a usec is provided that falls above
+ * the highest non-infinity bin, that bin index will be returned.
  */
 STATIC circpad_hist_index_t
 circpad_histogram_usec_to_bin(const circpad_machine_state_t *mi,
                               circpad_delay_t usec)
 {
   const circpad_state_t *state = circpad_machine_current_state(mi);
-  circpad_delay_t start_usec;
-  int32_t bin; /* Larger than return type to properly clamp overflow */
+  circpad_delay_t rtt_add_usec = 0;
+  circpad_hist_index_t bin;
 
   /* Our state should have been checked to be non-null by the caller
    * (circpad_machine_remove_token()) */
@@ -309,34 +303,25 @@ circpad_histogram_usec_to_bin(const circpad_machine_state_t *mi,
     return 0;
   }
 
-  if (state->use_rtt_estimate)
-    start_usec = mi->rtt_estimate_usec+state->start_usec;
-  else
-    start_usec = state->start_usec;
-
-  /* The first bin (#0) has zero width and starts (and ends) at start_usec. */
-  if (usec <= start_usec)
-    return 0;
-
-  if (usec == start_usec+1)
-    return 1;
+  /* If we are using an RTT estimate, consider it as well. */
+  if (state->use_rtt_estimate) {
+    rtt_add_usec = mi->rtt_estimate_usec;
+  }
 
-  const circpad_time_t histogram_range_usec = state->range_usec;
-  /* We need to find the bin corresponding to our position in the range.
-   * Since bins are exponentially spaced in powers of two, we need to
-   * take the log2 of our position in histogram_range_usec. However,
-   * since tor_log2() returns the floor(log2(u64)), we have to adjust
-   * it to behave like ceil(log2(u64)). This is verified in our tests
-   * to properly invert the operation done in
-   * circpad_histogram_bin_to_usec(). */
-  bin = CIRCPAD_INFINITY_BIN(state) -
-    tor_log2(2*histogram_range_usec/(usec-start_usec+1));
+  /* Walk through the bins and check the upper bound of each bin, if 'usec' is
+   * less-or-equal to that, return that bin. If rtt_estimate is enabled then
+   * add that to the upper bound of each bin.
+   *
+   * We don't want to return the infinity bin here, so don't go there. */
+  for (bin = 0 ; bin < CIRCPAD_INFINITY_BIN(state) ; bin++) {
+    if (usec <= histogram_get_bin_upper_bound(mi, bin) + rtt_add_usec) {
+      return bin;
+    }
+  }
 
-  /* Clamp the return value to account for timevals before the start
-   * of bin 0, or after the last bin. Don't return the infinity bin
-   * index. */
-  bin = MIN(MAX(bin, 1), CIRCPAD_INFINITY_BIN(state)-1);
-  return bin;
+  /* We don't want to return the infinity bin here, so if we still didn't find
+   * the right bin, return the highest non-infinity bin */
+  return CIRCPAD_INFINITY_BIN(state)-1;
 }
 
 /**
@@ -398,20 +383,22 @@ circpad_choose_state_length(circpad_machine_state_t *mi)
 /**
  * Sample a value from our iat_dist, and clamp it safely
  * to circpad_delay_t.
+ *
+ * Before returning, add <b>delay_shift</b> (can be zero) to the sampled value.
  */
 static circpad_delay_t
 circpad_distribution_sample_iat_delay(const circpad_state_t *state,
-                                      circpad_delay_t start_usec)
+                                      circpad_delay_t delay_shift)
 {
   double val = circpad_distribution_sample(state->iat_dist);
   /* These comparisons are safe, because the output is in the range
    * [0, 2**32), and double has a precision of 53 bits. */
   val = MAX(0, val);
-  val = MIN(val, state->range_usec);
+  val = MIN(val, state->dist_max_sample_usec);
 
-  /* This addition is exact: val is at most 2**32-1, start_usec
+  /* This addition is exact: val is at most 2**32-1, min_delay
    * is at most 2**32-1, and doubles have a precision of 53 bits. */
-  val += start_usec;
+  val += delay_shift;
 
   /* Clamp the distribution at infinite delay val */
   return (circpad_delay_t)MIN(tor_llround(val), CIRCPAD_DELAY_INFINITE);
@@ -431,7 +418,6 @@ circpad_machine_sample_delay(circpad_machine_state_t *mi)
   const circpad_hist_token_t *histogram = NULL;
   circpad_hist_index_t curr_bin = 0;
   circpad_delay_t bin_start, bin_end;
-  circpad_delay_t start_usec;
   /* These three must all be larger than circpad_hist_token_t, because
    * we sum several circpad_hist_token_t values across the histogram */
   uint64_t curr_weight = 0;
@@ -440,14 +426,12 @@ circpad_machine_sample_delay(circpad_machine_state_t *mi)
 
   tor_assert(state);
 
-  if (state->use_rtt_estimate)
-    start_usec = mi->rtt_estimate_usec+state->start_usec;
-  else
-    start_usec = state->start_usec;
-
   if (state->iat_dist.type != CIRCPAD_DIST_NONE) {
     /* Sample from a fixed IAT distribution and return */
-    return circpad_distribution_sample_iat_delay(state, start_usec);
+    circpad_delay_t iat_delay_shift = state->use_rtt_estimate ?
+      mi->rtt_estimate_usec + state->dist_added_shift_usec :
+      state->dist_added_shift_usec;
+    return circpad_distribution_sample_iat_delay(state, iat_delay_shift);
   } else if (state->token_removal != CIRCPAD_TOKEN_REMOVAL_NONE) {
     /* We have a mutable histogram. Do basic sanity check and apply: */
     if (BUG(!mi->histogram) ||
@@ -512,16 +496,13 @@ circpad_machine_sample_delay(circpad_machine_state_t *mi)
    * function below samples from [bin_start, bin_end) */
   bin_end = circpad_histogram_bin_to_usec(mi, curr_bin+1);
 
-  /* Truncate the high bin in case it's the infinity bin:
-   * Don't actually schedule an "infinite"-1 delay */
-  bin_end = MIN(bin_end, start_usec+state->range_usec);
-
-  // Sample uniformly between histogram[i] to histogram[i+1]-1,
-  // but no need to sample if they are the same timeval (aka bin 0 or bin 1).
-  if (bin_end <= bin_start+1)
+  /* Bin edges are monotonically increasing so this is a bug. Handle it. */
+  if (BUG(bin_start > bin_end)) {
     return bin_start;
-  else
-    return (circpad_delay_t)crypto_rand_uint64_range(bin_start, bin_end);
+  }
+
+  /* Sample randomly from within the bin width */
+  return (circpad_delay_t)crypto_rand_uint64_range(bin_start, bin_end);
 }
 
 /**
@@ -627,7 +608,7 @@ circpad_machine_first_higher_index(const circpad_machine_state_t *mi,
   /* Don't remove from the infinity bin */
   for (; bin < CIRCPAD_INFINITY_BIN(mi); bin++) {
     if (mi->histogram[bin] &&
-        circpad_histogram_bin_to_usec(mi, bin+1) > target_bin_usec) {
+        histogram_get_bin_upper_bound(mi, bin) >= target_bin_usec) {
       return bin;
     }
   }
@@ -2083,16 +2064,92 @@ circpad_setup_machine_on_circ(circuit_t *on_circ,
   on_circ->padding_machine[machine->machine_index] = machine;
 }
 
-/* These padding machines are only used for tests pending #28634. */
 #ifdef TOR_UNIT_TESTS
+/** Validate a single state of a padding machine */
+static bool
+padding_machine_state_is_valid(const circpad_state_t *state)
+{
+  int b;
+  uint32_t tokens_count = 0;
+  circpad_delay_t prev_bin_edge = 0;
+
+  /* We only validate histograms */
+  if (!state->histogram_len) {
+    return true;
+  }
+
+  /* We need at least two bins in a histogram */
+  if (state->histogram_len < 2) {
+    log_warn(LD_GENERAL, "You can't have a histogram with less than 2 bins");
+    return false;
+  }
+
+  /* For each machine state, if it's a histogram, make sure all the
+   * histogram edges are well defined (i.e. are strictly monotonic). */
+  for (b = 0 ; b < state->histogram_len ; b++) {
+    /* Check that histogram edges are strictly increasing. Ignore the first
+     * edge since it can be zero. */
+    if (prev_bin_edge >= state->histogram_edges[b] && b > 0) {
+      log_warn(LD_GENERAL, "Histogram edges are not increasing [%u/%u]",
+               prev_bin_edge, state->histogram_edges[b]);
+      return false;
+    }
+
+    prev_bin_edge = state->histogram_edges[b];
+
+    /* Also count the number of tokens as we go through the histogram states */
+    tokens_count += state->histogram[b];
+  }
+  /* Verify that the total number of tokens is correct */
+  if (tokens_count != state->histogram_total_tokens) {
+    log_warn(LD_GENERAL, "Histogram token count is wrong [%u/%u]",
+             tokens_count, state->histogram_total_tokens);
+    return false;
+  }
+
+  return true;
+}
+
+/** Basic validation of padding machine */
+static bool
+padding_machine_is_valid(const circpad_machine_spec_t *machine)
+{
+  int i;
+
+  /* Validate the histograms of the padding machine */
+  for (i = 0 ; i < machine->num_states ; i++) {
+    if (!padding_machine_state_is_valid(&machine->states[i])) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+/* Validate and register <b>machine</b> into <b>machine_list</b>. If
+ * <b>machine_list</b> is NULL, then just validate. */
+STATIC void
+register_padding_machine(circpad_machine_spec_t *machine,
+                         smartlist_t *machine_list)
+{
+  if (!padding_machine_is_valid(machine)) {
+    log_warn(LD_GENERAL, "Machine #%u is invalid. Ignoring.",
+             machine->machine_num);
+    return;
+  }
+
+  if (machine_list) {
+    smartlist_add(machine_list, machine);
+  }
+}
+
+/* These padding machines are only used for tests pending #28634. */
 static void
 circpad_circ_client_machine_init(void)
 {
   circpad_machine_spec_t *circ_client_machine
       = tor_malloc_zero(sizeof(circpad_machine_spec_t));
 
-  // XXX: Better conditions for merge.. Or disable this machine in
-  // merge?
   circ_client_machine->conditions.min_hops = 2;
   circ_client_machine->conditions.state_mask =
       CIRCPAD_CIRC_BUILDING|CIRCPAD_CIRC_OPENED|CIRCPAD_CIRC_HAS_RELAY_EARLY;
@@ -2124,19 +2181,20 @@ circpad_circ_client_machine_init(void)
   circ_client_machine->states[CIRCPAD_STATE_BURST].token_removal =
       CIRCPAD_TOKEN_REMOVAL_CLOSEST;
 
-  // FIXME: Tune this histogram
   circ_client_machine->states[CIRCPAD_STATE_BURST].histogram_len = 2;
-  circ_client_machine->states[CIRCPAD_STATE_BURST].start_usec = 500;
-  circ_client_machine->states[CIRCPAD_STATE_BURST].range_usec = 1000000;
+  circ_client_machine->states[CIRCPAD_STATE_BURST].histogram_edges[0]= 500;
+  circ_client_machine->states[CIRCPAD_STATE_BURST].histogram_edges[1]= 1000000;
+
   /* We have 5 tokens in the histogram, which means that all circuits will look
    * like they have 7 hops (since we start this machine after the second hop,
    * and tokens are decremented for any valid hops, and fake extends are
    * used after that -- 2+5==7). */
   circ_client_machine->states[CIRCPAD_STATE_BURST].histogram[0] = 5;
+
   circ_client_machine->states[CIRCPAD_STATE_BURST].histogram_total_tokens = 5;
 
   circ_client_machine->machine_num = smartlist_len(origin_padding_machines);
-  smartlist_add(origin_padding_machines, circ_client_machine);
+  register_padding_machine(circ_client_machine, origin_padding_machines);
 }
 
 static void
@@ -2183,8 +2241,9 @@ circpad_circ_responder_machine_init(void)
   circ_responder_machine->states[CIRCPAD_STATE_BURST].use_rtt_estimate = 1;
   /* The histogram is 2 bins: an empty one, and infinity */
   circ_responder_machine->states[CIRCPAD_STATE_BURST].histogram_len = 2;
-  circ_responder_machine->states[CIRCPAD_STATE_BURST].start_usec = 5000;
-  circ_responder_machine->states[CIRCPAD_STATE_BURST].range_usec = 1000000;
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].histogram_edges[0]= 500;
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].histogram_edges[1] =
+    1000000;
   /* During burst state we wait forever for padding to arrive.
 
      We are waiting for a padding cell from the client to come in, so that we
@@ -2215,8 +2274,14 @@ circpad_circ_responder_machine_init(void)
      before you send a padding response */
   circ_responder_machine->states[CIRCPAD_STATE_GAP].use_rtt_estimate = 1;
   circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_len = 6;
-  circ_responder_machine->states[CIRCPAD_STATE_GAP].start_usec = 5000;
-  circ_responder_machine->states[CIRCPAD_STATE_GAP].range_usec = 1000000;
+  /* Specify histogram bins */
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_edges[0]= 500;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_edges[1]= 1000;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_edges[2]= 2000;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_edges[3]= 4000;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_edges[4]= 8000;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_edges[5]= 16000;
+  /* Specify histogram tokens */
   circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[0] = 0;
   circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[1] = 1;
   circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[2] = 2;
@@ -2224,11 +2289,12 @@ circpad_circ_responder_machine_init(void)
   circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[4] = 1;
   /* Total number of tokens */
   circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_total_tokens = 6;
+
   circ_responder_machine->states[CIRCPAD_STATE_GAP].token_removal =
       CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC;
 
   circ_responder_machine->machine_num = smartlist_len(relay_padding_machines);
-  smartlist_add(relay_padding_machines, circ_responder_machine);
+  register_padding_machine(circ_responder_machine, relay_padding_machines);
 }
 #endif
 

+ 74 - 29
src/core/or/circuitpadding.h

@@ -228,13 +228,18 @@ typedef uint16_t circpad_statenum_t;
 #define  CIRCPAD_STATENUM_MAX   (UINT16_MAX)
 
 /** A histogram is used to sample padding delays given a machine state.  This
- *  constant defines the maximum histogram width (i.e. the max number of bins)
+ *  constant defines the maximum histogram width (i.e. the max number of bins).
  *
- *  Each histogram bin is twice as large as the previous. Two exceptions: The
- *  first bin has zero width (which means that minimum delay is applied to the
- *  next padding cell), and the last bin (infinity bin) has infinite width
- *  (which means that the next padding cell will be delayed infinitely). */
-#define CIRCPAD_MAX_HISTOGRAM_LEN (sizeof(circpad_delay_t)*8 + 1)
+ * The current limit is arbitrary and could be raised if there is a need,
+ * however too many bins will be hard to serialize in the future.
+ *
+ * Memory concerns are not so great here since the corresponding histogram and
+ * histogram_edges arrays are global and not per-circuit.
+ *
+ * If we ever upgrade this to a value that can't be represented by 8-bits we
+ * also need to upgrade circpad_hist_index_t.
+ */
+#define CIRCPAD_MAX_HISTOGRAM_LEN (100)
 
 /**
  * A state of a padding state machine. The information here are immutable and
@@ -248,38 +253,60 @@ typedef uint16_t circpad_statenum_t;
  * or the consensus.
  */
 typedef struct circpad_state_t {
-  /** If a histogram is used for this state, this specifies the number of bins
-   *  of this histogram. Histograms must have at least 2 bins.
+  /**
+   * If a histogram is used for this state, this specifies the number of bins
+   * of this histogram. Histograms must have at least 2 bins.
+   *
+   * In particular, the following histogram:
+   *
+   * Tokens
+   *         +
+   *      10 |    +----+
+   *       9 |    |    |           +---------+
+   *       8 |    |    |           |         |
+   *       7 |    |    |     +-----+         |
+   *       6 +----+ Bin+-----+     |         +---------------+
+   *       5 |    | #1 |     |     |         |               |
+   *         | Bin|    | Bin | Bin |  Bin #4 |    Bin #5     |
+   *         | #0 |    | #2  | #3  |         | (infinity bin)|
+   *         |    |    |     |     |         |               |
+   *         |    |    |     |     |         |               |
+   *       0 +----+----+-----+-----+---------+---------------+
+   *         0   100  200   350   500      1000              ∞  microseconds
    *
-   *  If a delay probability distribution is used for this state, this is set
-   *  to 0. */
+   * would be specified the following way:
+   *    histogram_len = 6;
+   *    histogram[] =        {   6,  10,   6,  7,    9,     6 }
+   *    histogram_edges[] =  { 0, 100, 200, 350, 500, 1000 }
+   *
+   * The final bin is called the "infinity bin" and if it's chosen we don't
+   * schedule any padding. The infinity bin is strange because its lower edge
+   * is the max value of possible non-infinite delay allowed by this histogram,
+   * and its upper edge is CIRCPAD_DELAY_INFINITE. You can tell if the infinity
+   * bin is chosen by inspecting its bin index or inspecting its upper edge.
+   *
+   * If a delay probability distribution is used for this state, this is set
+   * to 0. */
   circpad_hist_index_t histogram_len;
   /** The histogram itself: an array of uint16s of tokens, whose
-   *  widths are exponentially spaced, in microseconds */
+   *  widths are exponentially spaced, in microseconds.
+   *
+   *  This array must have histogram_len elements that are strictly
+   *  monotonically increasing. */
   circpad_hist_token_t histogram[CIRCPAD_MAX_HISTOGRAM_LEN];
+  /* The histogram bin edges in usec.
+   *
+   * Each element of this array specifies the left edge of the corresponding
+   * bin. The rightmost edge is always infinity and is not specified in this
+   * array.
+   *
+   * This array must have histogram_len elements. */
+  circpad_delay_t histogram_edges[CIRCPAD_MAX_HISTOGRAM_LEN+1];
   /** Total number of tokens in this histogram. This is a constant and is *not*
    *  decremented every time we spend a token. It's used for initializing and
    *  refilling the histogram. */
   uint32_t histogram_total_tokens;
 
-  /** Minimum padding delay of this state in microseconds.
-   *
-   *  If histograms are used, this is the left (and right) bound of the first
-   *  bin (since it has zero width).
-   *
-   *  If a delay probability distribution is used, this represents the minimum
-   *  delay we can sample from the distribution.
-   */
-  circpad_delay_t start_usec;
-
-  /** If histograms are used, this is the width of the whole histogram in
-   *  microseconds, and it's used to calculate individual bin width.
-   *
-   *  If a delay probability distribution is used, this is used as the max
-   *  delay we can sample from the distribution.
-   */
-  circpad_delay_t range_usec;
-
   /**
    * Represents a delay probability distribution (aka IAT distribution). It's a
    * parametrized way of encoding inter-packet delay information in
@@ -292,6 +319,16 @@ typedef struct circpad_state_t {
    * results of sampling from this distribution (range_sec is used as a max).
    */
   circpad_distribution_t iat_dist;
+  /*  If a delay probability distribution is used, this is used as the max
+   *  value we can sample from the distribution. However, RTT measurements and
+   *  dist_added_shift gets applied on top of this value to derive the final
+   *  padding delay. */
+  circpad_delay_t dist_max_sample_usec;
+  /*  If a delay probability distribution is used and this is set, we will add
+   *  this value on top of the value sampled from the IAT distribution to
+   *  derive the final padding delay (We also add the RTT measurement if it's
+   *  enabled.). */
+  circpad_delay_t dist_added_shift_usec;
 
   /**
    * The length dist is a parameterized way of encoding how long this
@@ -686,9 +723,17 @@ circpad_send_command_to_hop,(struct origin_circuit_t *circ, uint8_t hopnum,
                              uint8_t relay_command, const uint8_t *payload,
                              ssize_t payload_len));
 
+STATIC circpad_delay_t
+histogram_get_bin_upper_bound(const circpad_machine_state_t *mi,
+                              circpad_hist_index_t bin);
+
 #ifdef TOR_UNIT_TESTS
 extern smartlist_t *origin_padding_machines;
 extern smartlist_t *relay_padding_machines;
+
+STATIC void
+register_padding_machine(circpad_machine_spec_t *machine,
+                         smartlist_t *machine_list);
 #endif
 
 #endif

+ 84 - 54
src/test/test_circuitpadding.c

@@ -333,7 +333,7 @@ test_circuitpadding_rtt(void *arg)
             OP_EQ,
             relay_side->padding_info[0]->rtt_estimate_usec+
             circpad_machine_current_state(
-             relay_side->padding_info[0])->start_usec);
+             relay_side->padding_info[0])->histogram_edges[0]);
 
   circpad_cell_event_nonpadding_received((circuit_t*)relay_side);
   circpad_cell_event_nonpadding_received((circuit_t*)relay_side);
@@ -349,7 +349,7 @@ test_circuitpadding_rtt(void *arg)
             OP_EQ,
             relay_side->padding_info[0]->rtt_estimate_usec+
             circpad_machine_current_state(
-             relay_side->padding_info[0])->start_usec);
+             relay_side->padding_info[0])->histogram_edges[0]);
 
   /* Test 2: Termination of RTT measurement (from the previous test) */
   tt_int_op(relay_side->padding_info[0]->stop_rtt_update, OP_EQ, 1);
@@ -367,7 +367,7 @@ test_circuitpadding_rtt(void *arg)
             OP_EQ,
             relay_side->padding_info[0]->rtt_estimate_usec+
             circpad_machine_current_state(
-             relay_side->padding_info[0])->start_usec);
+             relay_side->padding_info[0])->histogram_edges[0]);
 
   /* Test 3: Make sure client side machine properly ignores RTT */
   circpad_cell_event_nonpadding_received((circuit_t*)client_side);
@@ -383,7 +383,7 @@ test_circuitpadding_rtt(void *arg)
   tt_int_op(circpad_histogram_bin_to_usec(client_side->padding_info[0], 0),
             OP_EQ,
             circpad_machine_current_state(
-                client_side->padding_info[0])->start_usec);
+                client_side->padding_info[0])->histogram_edges[0]);
  done:
   free_fake_orcirc(relay_side);
   circuitmux_detach_all_circuits(dummy_channel.cmux, NULL);
@@ -414,19 +414,23 @@ helper_create_basic_machine(void)
   circ_client_machine.states[CIRCPAD_STATE_BURST].
       next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_CANCEL;
 
-  // FIXME: Is this what we want?
   circ_client_machine.states[CIRCPAD_STATE_BURST].token_removal =
       CIRCPAD_TOKEN_REMOVAL_HIGHER;
 
-  // FIXME: Tune this histogram
   circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_len = 5;
-  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 500;
-  circ_client_machine.states[CIRCPAD_STATE_BURST].range_usec = 1000000;
+
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_edges[0] = 500;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_edges[1] = 2500;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_edges[2] = 5000;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_edges[3] = 10000;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_edges[4] = 20000;
+
   circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[0] = 1;
   circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[1] = 0;
   circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[2] = 2;
   circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[3] = 2;
   circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[4] = 2;
+
   circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_total_tokens = 7;
   circ_client_machine.states[CIRCPAD_STATE_BURST].use_rtt_estimate = 1;
 
@@ -458,15 +462,25 @@ helper_create_machine_with_big_histogram(circpad_removal_t removal_strategy)
   burst_state->token_removal = CIRCPAD_TOKEN_REMOVAL_HIGHER;
 
   burst_state->histogram_len = BIG_HISTOGRAM_LEN;
-  burst_state->start_usec = 0;
-  burst_state->range_usec = 1000;
 
   int n_tokens = 0;
-  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+  int i;
+  for (i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
     burst_state->histogram[i] = tokens_per_bin;
     n_tokens += tokens_per_bin;
   }
 
+  burst_state->histogram_edges[0] = 0;
+  burst_state->histogram_edges[1] = 1;
+  burst_state->histogram_edges[2] = 7;
+  burst_state->histogram_edges[3] = 15;
+  burst_state->histogram_edges[4] = 31;
+  burst_state->histogram_edges[5] = 62;
+  burst_state->histogram_edges[6] = 125;
+  burst_state->histogram_edges[7] = 250;
+  burst_state->histogram_edges[8] = 500;
+  burst_state->histogram_edges[9] = 1000;
+
   burst_state->histogram_total_tokens = n_tokens;
   burst_state->length_dist.type = CIRCPAD_DIST_UNIFORM;
   burst_state->length_dist.param1 = n_tokens;
@@ -527,12 +541,20 @@ test_circuitpadding_token_removal_higher(void *arg)
 
   /* Test left boundaries of each histogram bin: */
   const circpad_delay_t bin_left_bounds[] =
-    {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE};
-  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    {0, 1,    7,  15,  31,  62,  125, 250, 500, 1000, CIRCPAD_DELAY_INFINITE};
+  for (int i = 0; i <= BIG_HISTOGRAM_LEN ; i++) {
     tt_uint_op(bin_left_bounds[i], OP_EQ,
                circpad_histogram_bin_to_usec(mi, i));
   }
 
+  /* Test right boundaries of each histogram bin: */
+  const circpad_delay_t bin_right_bounds[] =
+    {0,    6,  14,  30,  61,  124, 249, 499, 999, CIRCPAD_DELAY_INFINITE-1};
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_uint_op(bin_right_bounds[i], OP_EQ,
+               histogram_get_bin_upper_bound(mi, i));
+  }
+
   /* Check that all bins have two tokens right now */
   for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
     tt_int_op(mi->histogram[i], OP_EQ, 2);
@@ -576,8 +598,8 @@ test_circuitpadding_token_removal_higher(void *arg)
   /* Test below the lowest bin, for coverage */
   tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
             CIRCPAD_STATE_BURST);
-  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
-  mi->padding_scheduled_at_usec = current_time - 1;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_edges[0] = 100;
+  mi->padding_scheduled_at_usec = current_time;
   circpad_machine_remove_token(mi);
   tt_int_op(mi->histogram[0], OP_EQ, 1);
 
@@ -624,8 +646,8 @@ test_circuitpadding_token_removal_lower(void *arg)
 
   /* Test left boundaries of each histogram bin: */
   const circpad_delay_t bin_left_bounds[] =
-    {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE};
-  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    {0, 1,    7,  15,  31,  62,  125, 250, 500, 1000, CIRCPAD_DELAY_INFINITE};
+  for (int i = 0; i <= BIG_HISTOGRAM_LEN ; i++) {
     tt_uint_op(bin_left_bounds[i], OP_EQ,
                circpad_histogram_bin_to_usec(mi, i));
   }
@@ -673,7 +695,8 @@ test_circuitpadding_token_removal_lower(void *arg)
   /* Test above the highest bin, for coverage */
   tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
             CIRCPAD_STATE_BURST);
-  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].
+    histogram_edges[BIG_HISTOGRAM_LEN-2] = 100;
   mi->padding_scheduled_at_usec = current_time - 29202;
   circpad_machine_remove_token(mi);
   tt_int_op(mi->histogram[BIG_HISTOGRAM_LEN-2], OP_EQ, 1);
@@ -721,8 +744,8 @@ test_circuitpadding_closest_token_removal(void *arg)
 
   /* Test left boundaries of each histogram bin: */
   const circpad_delay_t bin_left_bounds[] =
-    {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE};
-  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    {0, 1,    7,  15,  31,  62,  125, 250, 500, 1000, CIRCPAD_DELAY_INFINITE};
+  for (int i = 0; i <= BIG_HISTOGRAM_LEN ; i++) {
     tt_uint_op(bin_left_bounds[i], OP_EQ,
                circpad_histogram_bin_to_usec(mi, i));
   }
@@ -769,7 +792,9 @@ test_circuitpadding_closest_token_removal(void *arg)
   /* Test below the lowest bin, for coverage */
   tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
             CIRCPAD_STATE_BURST);
-  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_edges[0] = 100;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_edges[1] = 101;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_edges[2] = 120;
   mi->padding_scheduled_at_usec = current_time - 102;
   mi->histogram[0] = 0;
   circpad_machine_remove_token(mi);
@@ -778,7 +803,6 @@ test_circuitpadding_closest_token_removal(void *arg)
   /* Test above the highest bin, for coverage */
   tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
             CIRCPAD_STATE_BURST);
-  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
   mi->padding_scheduled_at_usec = current_time - 29202;
   circpad_machine_remove_token(mi);
   tt_int_op(mi->histogram[BIG_HISTOGRAM_LEN-2], OP_EQ, 1);
@@ -826,8 +850,8 @@ test_circuitpadding_closest_token_removal_usec(void *arg)
 
   /* Test left boundaries of each histogram bin: */
   const circpad_delay_t bin_left_bounds[] =
-    {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE};
-  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    {0, 1,    7,  15,  31,  62,  125, 250, 500, 1000, CIRCPAD_DELAY_INFINITE};
+  for (int i = 0; i <= BIG_HISTOGRAM_LEN ; i++) {
     tt_uint_op(bin_left_bounds[i], OP_EQ,
                circpad_histogram_bin_to_usec(mi, i));
   }
@@ -877,7 +901,9 @@ test_circuitpadding_closest_token_removal_usec(void *arg)
   /* Test below the lowest bin, for coverage */
   tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
             CIRCPAD_STATE_BURST);
-  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_edges[0] = 100;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_edges[1] = 101;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_edges[2] = 120;
   mi->padding_scheduled_at_usec = current_time - 102;
   mi->histogram[0] = 0;
   circpad_machine_remove_token(mi);
@@ -886,7 +912,8 @@ test_circuitpadding_closest_token_removal_usec(void *arg)
   /* Test above the highest bin, for coverage */
   tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
             CIRCPAD_STATE_BURST);
-  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].
+    histogram_edges[BIG_HISTOGRAM_LEN-2] = 100;
   mi->padding_scheduled_at_usec = current_time - 29202;
   circpad_machine_remove_token(mi);
   tt_int_op(mi->histogram[BIG_HISTOGRAM_LEN-2], OP_EQ, 1);
@@ -1043,7 +1070,7 @@ test_circuitpadding_tokens(void *arg)
 
   // Test 1: converting usec->bin->usec->bin
   // Bin 0+1 have different semantics.
-  for (circpad_delay_t i = 0; i <= state->start_usec+1; i++) {
+  for (circpad_delay_t i = 0; i <= state->histogram_edges[0]; i++) {
     int bin = circpad_histogram_usec_to_bin(client_side->padding_info[0],
                                             i);
     circpad_delay_t usec =
@@ -1053,8 +1080,9 @@ test_circuitpadding_tokens(void *arg)
     tt_int_op(bin, OP_EQ, bin2);
     tt_int_op(i, OP_LE, usec);
   }
-  for (circpad_delay_t i = state->start_usec+1;
-           i <= state->start_usec + state->range_usec; i++) {
+  for (circpad_delay_t i = state->histogram_edges[0]+1;
+       i <= state->histogram_edges[0] +
+         state->histogram_edges[state->histogram_len-2]; i++) {
     int bin = circpad_histogram_usec_to_bin(client_side->padding_info[0],
                                             i);
     circpad_delay_t usec =
@@ -1106,8 +1134,7 @@ test_circuitpadding_tokens(void *arg)
   /* 2.c. Bin 0 */
   {
     tt_int_op(mi->histogram[0], OP_EQ, 1);
-    circpad_machine_remove_higher_token(mi,
-         state->start_usec/2);
+    circpad_machine_remove_higher_token(mi, state->histogram_edges[0]/2);
     tt_int_op(mi->histogram[0], OP_EQ, 0);
   }
 
@@ -1126,8 +1153,7 @@ test_circuitpadding_tokens(void *arg)
   /* 3.a. Bin 0 */
   {
     tt_int_op(mi->histogram[0], OP_EQ, 1);
-    circpad_machine_remove_higher_token(mi,
-         state->start_usec/2);
+    circpad_machine_remove_higher_token(mi, state->histogram_edges[0]/2);
     tt_int_op(mi->histogram[0], OP_EQ, 0);
   }
 
@@ -1615,15 +1641,20 @@ helper_create_conditional_machine(void)
   ret->states[CIRCPAD_STATE_BURST].
       next_state[CIRCPAD_EVENT_LENGTH_COUNT] = CIRCPAD_STATE_END;
 
+  /* Use EXACT removal strategy, otherwise setup_tokens() does not work */
   ret->states[CIRCPAD_STATE_BURST].token_removal =
-      CIRCPAD_TOKEN_REMOVAL_NONE;
+      CIRCPAD_TOKEN_REMOVAL_EXACT;
 
   ret->states[CIRCPAD_STATE_BURST].histogram_len = 3;
-  ret->states[CIRCPAD_STATE_BURST].start_usec = 0;
-  ret->states[CIRCPAD_STATE_BURST].range_usec = 1000000;
+
+  ret->states[CIRCPAD_STATE_BURST].histogram_edges[0] = 0;
+  ret->states[CIRCPAD_STATE_BURST].histogram_edges[1] = 1;
+  ret->states[CIRCPAD_STATE_BURST].histogram_edges[2] = 1000000;
+
   ret->states[CIRCPAD_STATE_BURST].histogram[0] = 6;
   ret->states[CIRCPAD_STATE_BURST].histogram[1] = 0;
-  ret->states[CIRCPAD_STATE_BURST].histogram[1] = 0;
+  ret->states[CIRCPAD_STATE_BURST].histogram[2] = 0;
+
   ret->states[CIRCPAD_STATE_BURST].histogram_total_tokens = 6;
   ret->states[CIRCPAD_STATE_BURST].use_rtt_estimate = 0;
   ret->states[CIRCPAD_STATE_BURST].length_includes_nonpadding = 1;
@@ -1654,8 +1685,7 @@ helper_create_conditional_machines(void)
   add->conditions.state_mask = CIRCPAD_CIRC_BUILDING|
            CIRCPAD_CIRC_NO_STREAMS|CIRCPAD_CIRC_HAS_RELAY_EARLY;
   add->conditions.purpose_mask = CIRCPAD_PURPOSE_ALL;
-
-  smartlist_add(origin_padding_machines, add);
+  register_padding_machine(add, origin_padding_machines);
 
   add = helper_create_conditional_machine();
   add->machine_num = 3;
@@ -1674,15 +1704,15 @@ helper_create_conditional_machines(void)
   add->conditions.state_mask = CIRCPAD_CIRC_OPENED|
            CIRCPAD_CIRC_STREAMS|CIRCPAD_CIRC_HAS_NO_RELAY_EARLY;
   add->conditions.purpose_mask = CIRCPAD_PURPOSE_ALL;
-  smartlist_add(origin_padding_machines, add);
+  register_padding_machine(add, origin_padding_machines);
 
   add = helper_create_conditional_machine();
   add->machine_num = 2;
-  smartlist_add(relay_padding_machines, add);
+  register_padding_machine(add, relay_padding_machines);
 
   add = helper_create_conditional_machine();
   add->machine_num = 3;
-  smartlist_add(relay_padding_machines, add);
+  register_padding_machine(add, relay_padding_machines);
 }
 
 void
@@ -2069,48 +2099,48 @@ helper_circpad_circ_distribution_machine_setup(int min, int max)
   zero_st->iat_dist.type = CIRCPAD_DIST_UNIFORM;
   zero_st->iat_dist.param1 = min;
   zero_st->iat_dist.param2 = max;
-  zero_st->start_usec = min;
-  zero_st->range_usec = max;
+  zero_st->dist_added_shift_usec = min;
+  zero_st->dist_max_sample_usec = max;
 
   circpad_state_t *first_st = &circ_client_machine.states[1];
   first_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 2;
   first_st->iat_dist.type = CIRCPAD_DIST_LOGISTIC;
   first_st->iat_dist.param1 = min;
   first_st->iat_dist.param2 = max;
-  first_st->start_usec = min;
-  first_st->range_usec = max;
+  first_st->dist_added_shift_usec = min;
+  first_st->dist_max_sample_usec = max;
 
   circpad_state_t *second_st = &circ_client_machine.states[2];
   second_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 3;
   second_st->iat_dist.type = CIRCPAD_DIST_LOG_LOGISTIC;
   second_st->iat_dist.param1 = min;
   second_st->iat_dist.param2 = max;
-  second_st->start_usec = min;
-  second_st->range_usec = max;
+  second_st->dist_added_shift_usec = min;
+  second_st->dist_max_sample_usec = max;
 
   circpad_state_t *third_st = &circ_client_machine.states[3];
   third_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 4;
   third_st->iat_dist.type = CIRCPAD_DIST_GEOMETRIC;
   third_st->iat_dist.param1 = min;
   third_st->iat_dist.param2 = max;
-  third_st->start_usec = min;
-  third_st->range_usec = max;
+  third_st->dist_added_shift_usec = min;
+  third_st->dist_max_sample_usec = max;
 
   circpad_state_t *fourth_st = &circ_client_machine.states[4];
   fourth_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 5;
   fourth_st->iat_dist.type = CIRCPAD_DIST_WEIBULL;
   fourth_st->iat_dist.param1 = min;
   fourth_st->iat_dist.param2 = max;
-  fourth_st->start_usec = min;
-  fourth_st->range_usec = max;
+  fourth_st->dist_added_shift_usec = min;
+  fourth_st->dist_max_sample_usec = max;
 
   circpad_state_t *fifth_st = &circ_client_machine.states[5];
   fifth_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 6;
   fifth_st->iat_dist.type = CIRCPAD_DIST_PARETO;
   fifth_st->iat_dist.param1 = min;
   fifth_st->iat_dist.param2 = max;
-  fifth_st->start_usec = min;
-  fifth_st->range_usec = max;
+  fifth_st->dist_added_shift_usec = min;
+  fifth_st->dist_max_sample_usec = max;
 }
 
 /** Simple test that the padding delays sampled from a uniform distribution