Selaa lähdekoodia

Merge remote-tracking branch 'asn-github/adaptive_padding-final'

Nick Mathewson 6 vuotta sitten
vanhempi
commit
b169c8c14f
53 muutettua tiedostoa jossa 10102 lisäystä ja 40 poistoa
  1. 28 7
      doc/tor.1.txt
  2. 6 0
      src/app/config/config.c
  3. 3 0
      src/app/config/or_options_st.h
  4. 8 2
      src/app/main/main.c
  5. 2 0
      src/core/include.am
  6. 26 0
      src/core/or/circuit_st.h
  7. 22 1
      src/core/or/circuitbuild.c
  8. 4 0
      src/core/or/circuitlist.c
  9. 2562 0
      src/core/or/circuitpadding.c
  10. 696 0
      src/core/or/circuitpadding.h
  11. 14 0
      src/core/or/circuituse.c
  12. 5 0
      src/core/or/connection_edge.c
  13. 7 0
      src/core/or/or.h
  14. 4 0
      src/core/or/origin_circuit_st.h
  15. 6 1
      src/core/or/protover.c
  16. 1 0
      src/core/or/protover.h
  17. 25 11
      src/core/or/relay.c
  18. 5 0
      src/core/or/relay.h
  19. 2 0
      src/core/or/versions.c
  20. 1 2
      src/feature/hibernate/hibernate.c
  21. 2 0
      src/feature/nodelist/networkstatus.c
  22. 2 2
      src/feature/nodelist/nodelist.c
  23. 2 0
      src/feature/nodelist/routerlist.c
  24. 11 0
      src/lib/crypt_ops/crypto_rand.c
  25. 1 0
      src/lib/crypt_ops/crypto_rand.h
  26. 1 0
      src/lib/defs/include.am
  27. 23 0
      src/lib/defs/time.h
  28. 2 0
      src/lib/math/.may_include
  29. 25 0
      src/lib/math/fp.c
  30. 1 0
      src/lib/math/fp.h
  31. 4 2
      src/lib/math/include.am
  32. 1717 0
      src/lib/math/prob_distr.c
  33. 158 0
      src/lib/math/prob_distr.h
  34. 13 0
      src/lib/smartlist_core/smartlist_foreach.h
  35. 1 0
      src/lib/time/.may_include
  36. 2 2
      src/lib/time/compat_time.c
  37. 1 1
      src/lib/time/compat_time.h
  38. 1 2
      src/lib/time/tvdiff.c
  39. 6 2
      src/rust/protover/protover.rs
  40. 1 0
      src/test/Makefile.nmake
  41. 3 0
      src/test/include.am
  42. 64 0
      src/test/prob_distr_mpfr_ref.c
  43. 2 0
      src/test/test.c
  44. 3 0
      src/test/test.h
  45. 2356 0
      src/test/test_circuitpadding.c
  46. 25 0
      src/test/test_containers.c
  47. 1428 0
      src/test/test_prob_distr.c
  48. 1 0
      src/test/test_slow.c
  49. 24 1
      src/test/test_util.c
  50. 549 0
      src/trunnel/circpad_negotiation.c
  51. 195 0
      src/trunnel/circpad_negotiation.h
  52. 44 0
      src/trunnel/circpad_negotiation.trunnel
  53. 7 4
      src/trunnel/include.am

+ 28 - 7
doc/tor.1.txt

@@ -1021,6 +1021,26 @@ The following options are useful only for clients (that is, if
     The .exit address notation, if enabled via MapAddress, overrides
     The .exit address notation, if enabled via MapAddress, overrides
     this option.
     this option.
 
 
+[[MiddleNodes]] **MiddleNodes** __node__,__node__,__...__::
+    A list of identity fingerprints and country codes of nodes
+    to use for "middle" hops in your normal circuits.
+    Normal circuits include all circuits except for direct connections
+    to directory servers. Middle hops are all hops other than exit and entry. +
++
+    This is an **experimental** feature that is meant to be used by researchers
+    and developers to test new features in the Tor network safely. Using it
+    without care will strongly influence your anonymity. This feature might get
+    removed in the future.
++
+    The HSLayer2Node and HSLayer3Node options override this option for onion
+    service circuits, if they are set. The vanguards addon will read this
+    option, and if set, it will set HSLayer2Nodes and HSLayer3Nodes to nodes
+    from this set.
++
+    The ExcludeNodes option overrides this option: any node listed in both
+    MiddleNodes and ExcludeNodes is treated as excluded. See
+    the **ExcludeNodes** option for more information on how to specify nodes.
+
 [[EntryNodes]] **EntryNodes** __node__,__node__,__...__::
 [[EntryNodes]] **EntryNodes** __node__,__node__,__...__::
     A list of identity fingerprints and country codes of nodes
     A list of identity fingerprints and country codes of nodes
     to use for the first hop in your normal circuits.
     to use for the first hop in your normal circuits.
@@ -1037,13 +1057,14 @@ The following options are useful only for clients (that is, if
     If StrictNodes is set to 1, Tor will treat solely the ExcludeNodes option
     If StrictNodes is set to 1, Tor will treat solely the ExcludeNodes option
     as a requirement to follow for all the circuits you generate, even if
     as a requirement to follow for all the circuits you generate, even if
     doing so will break functionality for you (StrictNodes applies to neither
     doing so will break functionality for you (StrictNodes applies to neither
-    ExcludeExitNodes nor to ExitNodes).  If StrictNodes is set to 0, Tor will
-    still try to avoid nodes in the ExcludeNodes list, but it will err on the
-    side of avoiding unexpected errors.  Specifically, StrictNodes 0 tells Tor
-    that it is okay to use an excluded node when it is *necessary* to perform
-    relay reachability self-tests, connect to a hidden service, provide a
-    hidden service to a client, fulfill a .exit request, upload directory
-    information, or download directory information.  (Default: 0)
+    ExcludeExitNodes nor to ExitNodes, nor to MiddleNodes).  If StrictNodes
+    is set to 0, Tor will still try to avoid nodes in the ExcludeNodes list,
+    but it will err on the side of avoiding unexpected errors.
+    Specifically, StrictNodes 0 tells Tor that it is okay to use an excluded
+    node when it is *necessary* to perform relay reachability self-tests,
+    connect to a hidden service, provide a hidden service to a client,
+    fulfill a .exit request, upload directory information, or download
+    directory information.  (Default: 0)
 
 
 [[FascistFirewall]] **FascistFirewall** **0**|**1**::
 [[FascistFirewall]] **FascistFirewall** **0**|**1**::
     If 1, Tor will only create outgoing connections to ORs running on ports
     If 1, Tor will only create outgoing connections to ORs running on ports

+ 6 - 0
src/app/config/config.c

@@ -421,6 +421,10 @@ static config_var_t option_vars_[] = {
   V(ExcludeExitNodes,            ROUTERSET, NULL),
   V(ExcludeExitNodes,            ROUTERSET, NULL),
   OBSOLETE("ExcludeSingleHopRelays"),
   OBSOLETE("ExcludeSingleHopRelays"),
   V(ExitNodes,                   ROUTERSET, NULL),
   V(ExitNodes,                   ROUTERSET, NULL),
+  /* Researchers need a way to tell their clients to use specific
+   * middles that they also control, to allow safe live-network
+   * experimentation with new padding machines. */
+  V(MiddleNodes,                 ROUTERSET, NULL),
   V(ExitPolicy,                  LINELIST, NULL),
   V(ExitPolicy,                  LINELIST, NULL),
   V(ExitPolicyRejectPrivate,     BOOL,     "1"),
   V(ExitPolicyRejectPrivate,     BOOL,     "1"),
   V(ExitPolicyRejectLocalInterfaces, BOOL, "0"),
   V(ExitPolicyRejectLocalInterfaces, BOOL, "0"),
@@ -1693,6 +1697,7 @@ options_need_geoip_info(const or_options_t *options, const char **reason_out)
   int routerset_usage =
   int routerset_usage =
     routerset_needs_geoip(options->EntryNodes) ||
     routerset_needs_geoip(options->EntryNodes) ||
     routerset_needs_geoip(options->ExitNodes) ||
     routerset_needs_geoip(options->ExitNodes) ||
+    routerset_needs_geoip(options->MiddleNodes) ||
     routerset_needs_geoip(options->ExcludeExitNodes) ||
     routerset_needs_geoip(options->ExcludeExitNodes) ||
     routerset_needs_geoip(options->ExcludeNodes) ||
     routerset_needs_geoip(options->ExcludeNodes) ||
     routerset_needs_geoip(options->HSLayer2Nodes) ||
     routerset_needs_geoip(options->HSLayer2Nodes) ||
@@ -2132,6 +2137,7 @@ options_act(const or_options_t *old_options)
                          options->HSLayer2Nodes) ||
                          options->HSLayer2Nodes) ||
         !routerset_equal(old_options->HSLayer3Nodes,
         !routerset_equal(old_options->HSLayer3Nodes,
                          options->HSLayer3Nodes) ||
                          options->HSLayer3Nodes) ||
+        !routerset_equal(old_options->MiddleNodes, options->MiddleNodes) ||
         options->StrictNodes != old_options->StrictNodes) {
         options->StrictNodes != old_options->StrictNodes) {
       log_info(LD_CIRC,
       log_info(LD_CIRC,
                "Changed to using entry guards or bridges, or changed "
                "Changed to using entry guards or bridges, or changed "

+ 3 - 0
src/app/config/or_options_st.h

@@ -72,6 +72,9 @@ struct or_options_t {
   routerset_t *ExitNodes; /**< Structure containing nicknames, digests,
   routerset_t *ExitNodes; /**< Structure containing nicknames, digests,
                            * country codes and IP address patterns of ORs to
                            * country codes and IP address patterns of ORs to
                            * consider as exits. */
                            * consider as exits. */
+  routerset_t *MiddleNodes; /**< Structure containing nicknames, digests,
+                             * country codes and IP address patterns of ORs to
+                             * consider as middles. */
   routerset_t *EntryNodes;/**< Structure containing nicknames, digests,
   routerset_t *EntryNodes;/**< Structure containing nicknames, digests,
                            * country codes and IP address patterns of ORs to
                            * country codes and IP address patterns of ORs to
                            * consider as entry points. */
                            * consider as entry points. */

+ 8 - 2
src/app/main/main.c

@@ -22,6 +22,7 @@
 #include "core/mainloop/netstatus.h"
 #include "core/mainloop/netstatus.h"
 #include "core/or/channel.h"
 #include "core/or/channel.h"
 #include "core/or/channelpadding.h"
 #include "core/or/channelpadding.h"
+#include "core/or/circuitpadding.h"
 #include "core/or/channeltls.h"
 #include "core/or/channeltls.h"
 #include "core/or/circuitlist.h"
 #include "core/or/circuitlist.h"
 #include "core/or/circuitmux_ewma.h"
 #include "core/or/circuitmux_ewma.h"
@@ -645,9 +646,13 @@ tor_init(int argc, char *argv[])
   /* The options are now initialised */
   /* The options are now initialised */
   const or_options_t *options = get_options();
   const or_options_t *options = get_options();
 
 
-  /* Initialize channelpadding parameters to defaults until we get
-   * a consensus */
+  /* Initialize channelpadding and circpad parameters to defaults
+   * until we get a consensus */
   channelpadding_new_consensus_params(NULL);
   channelpadding_new_consensus_params(NULL);
+  circpad_new_consensus_params(NULL);
+
+  /* Initialize circuit padding to defaults+torrc until we get a consensus */
+  circpad_machines_init();
 
 
   /* Initialize predicted ports list after loading options */
   /* Initialize predicted ports list after loading options */
   predicted_ports_init();
   predicted_ports_init();
@@ -766,6 +771,7 @@ tor_free_all(int postfork)
   dns_free_all();
   dns_free_all();
   clear_pending_onions();
   clear_pending_onions();
   circuit_free_all();
   circuit_free_all();
+  circpad_machines_free();
   entry_guards_free_all();
   entry_guards_free_all();
   pt_free_all();
   pt_free_all();
   channel_tls_free_all();
   channel_tls_free_all();

+ 2 - 0
src/core/include.am

@@ -32,6 +32,7 @@ LIBTOR_APP_A_SOURCES = 				\
 	src/core/or/circuitlist.c		\
 	src/core/or/circuitlist.c		\
 	src/core/or/circuitmux.c		\
 	src/core/or/circuitmux.c		\
 	src/core/or/circuitmux_ewma.c		\
 	src/core/or/circuitmux_ewma.c		\
+	src/core/or/circuitpadding.c		\
 	src/core/or/circuitstats.c		\
 	src/core/or/circuitstats.c		\
 	src/core/or/circuituse.c		\
 	src/core/or/circuituse.c		\
 	src/core/or/command.c			\
 	src/core/or/command.c			\
@@ -227,6 +228,7 @@ noinst_HEADERS +=					\
 	src/core/or/circuitmux.h			\
 	src/core/or/circuitmux.h			\
 	src/core/or/circuitmux_ewma.h			\
 	src/core/or/circuitmux_ewma.h			\
 	src/core/or/circuitstats.h			\
 	src/core/or/circuitstats.h			\
+	src/core/or/circuitpadding.h			\
 	src/core/or/circuituse.h			\
 	src/core/or/circuituse.h			\
 	src/core/or/command.h				\
 	src/core/or/command.h				\
 	src/core/or/connection_edge.h			\
 	src/core/or/connection_edge.h			\

+ 26 - 0
src/core/or/circuit_st.h

@@ -12,6 +12,11 @@
 #include "core/or/cell_queue_st.h"
 #include "core/or/cell_queue_st.h"
 
 
 struct hs_token_t;
 struct hs_token_t;
+struct circpad_machine_spec_t;
+struct circpad_machine_state_t;
+
+/** Number of padding state machines on a circuit. */
+#define CIRCPAD_MAX_MACHINES (2)
 
 
 /** "magic" value for an origin_circuit_t */
 /** "magic" value for an origin_circuit_t */
 #define ORIGIN_CIRCUIT_MAGIC 0x35315243u
 #define ORIGIN_CIRCUIT_MAGIC 0x35315243u
@@ -177,6 +182,27 @@ struct circuit_t {
   /** Hashtable node: used to look up the circuit by its HS token using the HS
   /** Hashtable node: used to look up the circuit by its HS token using the HS
       circuitmap. */
       circuitmap. */
   HT_ENTRY(circuit_t) hs_circuitmap_node;
   HT_ENTRY(circuit_t) hs_circuitmap_node;
+
+  /** Adaptive Padding state machines: these are immutable. The state machines
+   *  that come from the consensus are saved to a global structure, to avoid
+   *  per-circuit allocations. This merely points to the global copy in
+   *  origin_padding_machines or relay_padding_machines that should never
+   *  change or get deallocated.
+   *
+   *  Each element of this array corresponds to a different padding machine,
+   *  and we can have up to CIRCPAD_MAX_MACHINES such machines. */
+  const struct circpad_machine_spec_t *padding_machine[CIRCPAD_MAX_MACHINES];
+
+  /** Adaptive Padding machine info for above machines. This is the
+   *  per-circuit mutable information, such as the current state and
+   *  histogram token counts. Some of it is optional (aka NULL).
+   *  If a machine is being shut down, these indexes can be NULL
+   *  without the corresponding padding_machine being NULL, while we
+   *  wait for the other end to respond to our shutdown request.
+   *
+   *  Each element of this array corresponds to a different padding machine,
+   *  and we can have up to CIRCPAD_MAX_MACHINES such machines. */
+  struct circpad_machine_state_t *padding_info[CIRCPAD_MAX_MACHINES];
 };
 };
 
 
 #endif
 #endif

+ 22 - 1
src/core/or/circuitbuild.c

@@ -43,6 +43,7 @@
 #include "core/or/circuitlist.h"
 #include "core/or/circuitlist.h"
 #include "core/or/circuitstats.h"
 #include "core/or/circuitstats.h"
 #include "core/or/circuituse.h"
 #include "core/or/circuituse.h"
+#include "core/or/circuitpadding.h"
 #include "core/or/command.h"
 #include "core/or/command.h"
 #include "core/or/connection_edge.h"
 #include "core/or/connection_edge.h"
 #include "core/or/connection_or.h"
 #include "core/or/connection_or.h"
@@ -950,12 +951,15 @@ circuit_send_next_onion_skin(origin_circuit_t *circ)
   crypt_path_t *hop = onion_next_hop_in_cpath(circ->cpath);
   crypt_path_t *hop = onion_next_hop_in_cpath(circ->cpath);
   circuit_build_times_handle_completed_hop(circ);
   circuit_build_times_handle_completed_hop(circ);
 
 
+  circpad_machine_event_circ_added_hop(circ);
+
   if (hop) {
   if (hop) {
     /* Case two: we're on a hop after the first. */
     /* Case two: we're on a hop after the first. */
     return circuit_send_intermediate_onion_skin(circ, hop);
     return circuit_send_intermediate_onion_skin(circ, hop);
   }
   }
 
 
   /* Case three: the circuit is finished. Do housekeeping tasks on it. */
   /* Case three: the circuit is finished. Do housekeeping tasks on it. */
+  circpad_machine_event_circ_built(circ);
   return circuit_build_no_more_hops(circ);
   return circuit_build_no_more_hops(circ);
 }
 }
 
 
@@ -2606,7 +2610,24 @@ choose_good_middle_server(uint8_t purpose,
     return choice;
     return choice;
   }
   }
 
 
-  choice = router_choose_random_node(excluded, options->ExcludeNodes, flags);
+  if (options->MiddleNodes) {
+    smartlist_t *sl = smartlist_new();
+    routerset_get_all_nodes(sl, options->MiddleNodes,
+                            options->ExcludeNodes, 1);
+
+    smartlist_subtract(sl, excluded);
+
+    choice = node_sl_choose_by_bandwidth(sl, WEIGHT_FOR_MID);
+    smartlist_free(sl);
+    if (choice) {
+      log_fn(LOG_INFO, LD_CIRC, "Chose fixed middle node: %s",
+          hex_str(choice->identity, DIGEST_LEN));
+    } else {
+      log_fn(LOG_NOTICE, LD_CIRC, "Restricted middle not available");
+    }
+  } else {
+    choice = router_choose_random_node(excluded, options->ExcludeNodes, flags);
+  }
   smartlist_free(excluded);
   smartlist_free(excluded);
   return choice;
   return choice;
 }
 }

+ 4 - 0
src/core/or/circuitlist.c

@@ -62,6 +62,7 @@
 #include "core/or/circuitlist.h"
 #include "core/or/circuitlist.h"
 #include "core/or/circuituse.h"
 #include "core/or/circuituse.h"
 #include "core/or/circuitstats.h"
 #include "core/or/circuitstats.h"
+#include "core/or/circuitpadding.h"
 #include "core/mainloop/connection.h"
 #include "core/mainloop/connection.h"
 #include "app/config/config.h"
 #include "app/config/config.h"
 #include "core/or/connection_edge.h"
 #include "core/or/connection_edge.h"
@@ -1231,6 +1232,9 @@ circuit_free_(circuit_t *circ)
            CIRCUIT_IS_ORIGIN(circ) ?
            CIRCUIT_IS_ORIGIN(circ) ?
               TO_ORIGIN_CIRCUIT(circ)->global_identifier : 0);
               TO_ORIGIN_CIRCUIT(circ)->global_identifier : 0);
 
 
+  /* Free any circuit padding structures */
+  circpad_circuit_free_all_machineinfos(circ);
+
   if (should_free) {
   if (should_free) {
     memwipe(mem, 0xAA, memlen); /* poison memory */
     memwipe(mem, 0xAA, memlen); /* poison memory */
     tor_free(mem);
     tor_free(mem);

+ 2562 - 0
src/core/or/circuitpadding.c

@@ -0,0 +1,2562 @@
+/* Copyright (c) 2017 The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file circuitpadding.c
+ * \brief Circuit-level padding implementation
+ *
+ * \details
+ *
+ * This file implements Tor proposal 254 "Padding Negotiation" which is heavily
+ * inspired by the paper "Toward an Efficient Website Fingerprinting Defense"
+ * by M. Juarez, M. Imani, M. Perry, C. Diaz, M. Wright.
+ *
+ * In particular the code in this file describes mechanisms for clients to
+ * negotiate various types of circuit-level padding from relays.
+ *
+ * Each padding type is described by a state machine (circpad_machine_spec_t),
+ * which is also referred as a "padding machine" in this file.  Currently,
+ * these state machines are hardcoded in the source code (e.g. see
+ * circpad_circ_client_machine_init()), but in the future we will be able to
+ * serialize them in the torrc or the consensus.
+ *
+ * As specified by prop#254, clients can negotiate padding with relays by using
+ * PADDING_NEGOTIATE cells. After successful padding negotiation, padding
+ * machines are assigned to the circuit in their mutable form as a
+ * circpad_machine_state_t.
+ *
+ * Each state of a padding state machine can be either:
+ * - A histogram that specifies inter-arrival padding delays.
+ * - Or a parametrized probability distribution that specifies inter-arrival
+ *   delays (see circpad_distribution_type_t).
+ *
+ * Padding machines start from the START state and finish with the END
+ * state. They can transition between states using the events in
+ * circpad_event_t.
+ *
+ * When a padding machine reaches the END state, it gets wiped from the circuit
+ * so that other padding machines can take over if needed (see
+ * circpad_machine_spec_transitioned_to_end()).
+ **/
+
+#define CIRCUITPADDING_PRIVATE
+
+#include <math.h>
+#include "lib/math/fp.h"
+#include "lib/math/prob_distr.h"
+#include "core/or/or.h"
+#include "core/or/circuitpadding.h"
+#include "core/or/circuitlist.h"
+#include "core/or/circuituse.h"
+#include "core/or/relay.h"
+#include "feature/stats/rephist.h"
+#include "feature/nodelist/networkstatus.h"
+
+#include "core/or/channel.h"
+
+#include "lib/time/compat_time.h"
+#include "lib/defs/time.h"
+#include "lib/crypt_ops/crypto_rand.h"
+
+#include "core/or/crypt_path_st.h"
+#include "core/or/circuit_st.h"
+#include "core/or/origin_circuit_st.h"
+#include "feature/nodelist/routerstatus_st.h"
+#include "feature/nodelist/node_st.h"
+#include "core/or/cell_st.h"
+#include "core/or/extend_info_st.h"
+#include "core/crypto/relay_crypto.h"
+#include "feature/nodelist/nodelist.h"
+
+#include "app/config/config.h"
+
+static inline circpad_purpose_mask_t circpad_circ_purpose_to_mask(uint8_t
+                                          circ_purpose);
+static inline circpad_circuit_state_t circpad_circuit_state(
+                                        origin_circuit_t *circ);
+static void circpad_setup_machine_on_circ(circuit_t *on_circ,
+                                        const circpad_machine_spec_t *machine);
+static double circpad_distribution_sample(circpad_distribution_t dist);
+
+/** Cached consensus params */
+static uint8_t circpad_global_max_padding_percent;
+static uint16_t circpad_global_allowed_cells;
+
+/** Global cell counts, for rate limiting */
+static uint64_t circpad_global_padding_sent;
+static uint64_t circpad_global_nonpadding_sent;
+
+/** This is the list of circpad_machine_spec_t's parsed from consensus and
+ *  torrc that have origin_side == 1 (ie: are for client side).
+ *
+ *  The machines in this smartlist are considered immutable and they are used
+ *  as-is by circuits so they should not change or get deallocated in Tor's
+ *  runtime and as long as circuits are alive. */
+STATIC smartlist_t *origin_padding_machines = NULL;
+
+/** This is the list of circpad_machine_spec_t's parsed from consensus and
+ *  torrc that have origin_side == 0 (ie: are for relay side).
+ *
+ *  The machines in this smartlist are considered immutable and they are used
+ *  as-is by circuits so they should not change or get deallocated in Tor's
+ *  runtime and as long as circuits are alive. */
+STATIC smartlist_t *relay_padding_machines = NULL;
+
+/** Loop over the current padding state machines using <b>loop_var</b> as the
+ *  loop variable. */
+#define FOR_EACH_CIRCUIT_MACHINE_BEGIN(loop_var)                         \
+  STMT_BEGIN                                                             \
+  for (int loop_var = 0; loop_var < CIRCPAD_MAX_MACHINES; loop_var++) {
+#define FOR_EACH_CIRCUIT_MACHINE_END } STMT_END ;
+
+/** Loop over the current active padding state machines using <b>loop_var</b>
+ *  as the loop variable. If a machine is not active, skip it. */
+#define FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(loop_var, circ)            \
+  FOR_EACH_CIRCUIT_MACHINE_BEGIN(loop_var)                               \
+  if (!(circ)->padding_info[loop_var])                           \
+    continue;
+#define FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END } STMT_END ;
+
+/**
+ * Return a human-readable description for a circuit padding state.
+ */
+static const char *
+circpad_state_to_string(circpad_statenum_t state)
+{
+  const char *descr;
+
+  switch (state) {
+  case CIRCPAD_STATE_START:
+    descr = "START";
+    break;
+  case CIRCPAD_STATE_BURST:
+    descr = "BURST";
+    break;
+  case CIRCPAD_STATE_GAP:
+    descr = "GAP";
+    break;
+  case CIRCPAD_STATE_END:
+    descr = "END";
+    break;
+  default:
+    descr = "CUSTOM"; // XXX: Just return # in static char buf?
+  }
+
+  return descr;
+}
+
+/**
+ * Free the machineinfo at an index
+ */
+static void
+circpad_circuit_machineinfo_free_idx(circuit_t *circ, int idx)
+{
+  if (circ->padding_info[idx]) {
+    tor_free(circ->padding_info[idx]->histogram);
+    timer_free(circ->padding_info[idx]->padding_timer);
+    tor_free(circ->padding_info[idx]);
+  }
+}
+
+/** Free all the machineinfos in <b>circ</b> that match <b>machine_num</b>. */
+static void
+free_circ_machineinfos_with_machine_num(circuit_t *circ, int machine_num)
+{
+  FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) {
+    if (circ->padding_machine[i] &&
+        circ->padding_machine[i]->machine_num == machine_num) {
+      circpad_circuit_machineinfo_free_idx(circ, i);
+      circ->padding_machine[i] = NULL;
+    }
+  } FOR_EACH_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * Free all padding machines and mutable info associated with circuit
+ */
+void
+circpad_circuit_free_all_machineinfos(circuit_t *circ)
+{
+  FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) {
+    circpad_circuit_machineinfo_free_idx(circ, i);
+  } FOR_EACH_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * Allocate a new mutable machineinfo structure.
+ */
+STATIC circpad_machine_state_t *
+circpad_circuit_machineinfo_new(circuit_t *on_circ, int machine_index)
+{
+  circpad_machine_state_t *mi =
+    tor_malloc_zero(sizeof(circpad_machine_state_t));
+  mi->machine_index = machine_index;
+  mi->on_circ = on_circ;
+
+  return mi;
+}
+
+/**
+ * Return the circpad_state_t for the current state based on the
+ * mutable info.
+ *
+ * This function returns NULL when the machine is in the end state or in an
+ * invalid state.
+ */
+STATIC const circpad_state_t *
+circpad_machine_current_state(const circpad_machine_state_t *mi)
+{
+  const circpad_machine_spec_t *machine = CIRCPAD_GET_MACHINE(mi);
+
+  if (mi->current_state == CIRCPAD_STATE_END) {
+    return NULL;
+  } else if (BUG(mi->current_state >= machine->num_states)) {
+    log_fn(LOG_WARN,LD_CIRC,
+           "Invalid circuit padding state %d",
+           mi->current_state);
+
+    return NULL;
+  }
+
+  return &machine->states[mi->current_state];
+}
+
+/**
+ * Calculate the lower bound of a histogram bin. The upper bound
+ * is obtained by calling this function with bin+1, and subtracting 1.
+ *
+ * The 0th bin has a special value -- it only represents start_usec.
+ * This is so we can specify a probability on 0-delay values.
+ *
+ * After bin 0, bins are exponentially spaced, so that each subsequent
+ * bin is twice as large as the previous. This is done so that higher
+ * time resolution is given to lower time values.
+ *
+ * The infinity bin is a the last bin in the array (histogram_len-1).
+ * It has a usec value of CIRCPAD_DELAY_INFINITE (UINT32_MAX).
+ */
+STATIC circpad_delay_t
+circpad_histogram_bin_to_usec(const circpad_machine_state_t *mi,
+                              circpad_hist_index_t bin)
+{
+  const circpad_state_t *state = circpad_machine_current_state(mi);
+  circpad_delay_t start_usec;
+
+  /* Our state should have been checked to be non-null by the caller
+   * (circpad_machine_remove_token()) */
+  if (BUG(state == NULL)) {
+    return CIRCPAD_DELAY_INFINITE;
+  }
+
+  if (state->use_rtt_estimate)
+    start_usec = mi->rtt_estimate_usec+state->start_usec;
+  else
+    start_usec = state->start_usec;
+
+  if (bin >= CIRCPAD_INFINITY_BIN(state))
+    return CIRCPAD_DELAY_INFINITE;
+
+  if (bin == 0)
+    return start_usec;
+
+  if (bin == 1)
+    return start_usec+1;
+
+  /* The bin widths double every index, so that we can have more resolution
+   * for lower time values in the histogram. */
+  const circpad_time_t bin_width_exponent =
+        1 << (CIRCPAD_INFINITY_BIN(state) - bin);
+  return (circpad_delay_t)MIN(start_usec +
+                              state->range_usec/bin_width_exponent,
+                              CIRCPAD_DELAY_INFINITE);
+}
+
+/** Return the midpoint of the histogram bin <b>bin_index</b>. */
+static circpad_delay_t
+circpad_get_histogram_bin_midpoint(const circpad_machine_state_t *mi,
+                           int bin_index)
+{
+  circpad_delay_t left_bound = circpad_histogram_bin_to_usec(mi, bin_index);
+  circpad_delay_t right_bound =
+    circpad_histogram_bin_to_usec(mi, bin_index+1)-1;
+
+  return left_bound + (right_bound - left_bound)/2;
+}
+
+/**
+ * Return the bin that contains the usec argument.
+ * "Contains" is defined as us in [lower, upper).
+ *
+ * This function will never return the infinity bin (histogram_len-1),
+ * in order to simplify the rest of the code.
+ *
+ * This means that technically the last bin (histogram_len-2)
+ * has range [start_usec+range_usec, CIRCPAD_DELAY_INFINITE].
+ */
+STATIC circpad_hist_index_t
+circpad_histogram_usec_to_bin(const circpad_machine_state_t *mi,
+                              circpad_delay_t usec)
+{
+  const circpad_state_t *state = circpad_machine_current_state(mi);
+  circpad_delay_t start_usec;
+  int32_t bin; /* Larger than return type to properly clamp overflow */
+
+  /* Our state should have been checked to be non-null by the caller
+   * (circpad_machine_remove_token()) */
+  if (BUG(state == NULL)) {
+    return 0;
+  }
+
+  if (state->use_rtt_estimate)
+    start_usec = mi->rtt_estimate_usec+state->start_usec;
+  else
+    start_usec = state->start_usec;
+
+  /* The first bin (#0) has zero width and starts (and ends) at start_usec. */
+  if (usec <= start_usec)
+    return 0;
+
+  if (usec == start_usec+1)
+    return 1;
+
+  const circpad_time_t histogram_range_usec = state->range_usec;
+  /* We need to find the bin corresponding to our position in the range.
+   * Since bins are exponentially spaced in powers of two, we need to
+   * take the log2 of our position in histogram_range_usec. However,
+   * since tor_log2() returns the floor(log2(u64)), we have to adjust
+   * it to behave like ceil(log2(u64)). This is verified in our tests
+   * to properly invert the operation done in
+   * circpad_histogram_bin_to_usec(). */
+  bin = CIRCPAD_INFINITY_BIN(state) -
+    tor_log2(2*histogram_range_usec/(usec-start_usec+1));
+
+  /* Clamp the return value to account for timevals before the start
+   * of bin 0, or after the last bin. Don't return the infinity bin
+   * index. */
+  bin = MIN(MAX(bin, 1), CIRCPAD_INFINITY_BIN(state)-1);
+  return bin;
+}
+
+/**
+ * This function frees any token bins allocated from a previous state
+ *
+ * Called after a state transition, or if the bins are empty.
+ */
+STATIC void
+circpad_machine_setup_tokens(circpad_machine_state_t *mi)
+{
+  const circpad_state_t *state = circpad_machine_current_state(mi);
+
+  /* If this state doesn't exist, or doesn't have token removal,
+   * free any previous state's histogram, and bail */
+  if (!state || state->token_removal == CIRCPAD_TOKEN_REMOVAL_NONE) {
+    if (mi->histogram) {
+      tor_free(mi->histogram);
+      mi->histogram = NULL;
+      mi->histogram_len = 0;
+    }
+    return;
+  }
+
+  /* Try to avoid re-mallocing if we don't really need to */
+  if (!mi->histogram || (mi->histogram
+          && mi->histogram_len != state->histogram_len)) {
+    tor_free(mi->histogram); // null ok
+    mi->histogram = tor_malloc_zero(sizeof(circpad_hist_token_t)
+                                    *state->histogram_len);
+  }
+  mi->histogram_len = state->histogram_len;
+
+  memcpy(mi->histogram, state->histogram,
+         sizeof(circpad_hist_token_t)*state->histogram_len);
+}
+
+/**
+ * Choose a length for this state (in cells), if specified.
+ */
+static void
+circpad_choose_state_length(circpad_machine_state_t *mi)
+{
+  const circpad_state_t *state = circpad_machine_current_state(mi);
+  double length;
+
+  if (!state || state->length_dist.type == CIRCPAD_DIST_NONE) {
+    mi->state_length = CIRCPAD_STATE_LENGTH_INFINITE;
+    return;
+  }
+
+  length = circpad_distribution_sample(state->length_dist);
+  length = MAX(0, length);
+  length += state->start_length;
+  length = MIN(length, state->max_length);
+
+  mi->state_length = clamp_double_to_int64(length);
+}
+
+/**
+ * Sample a value from our iat_dist, and clamp it safely
+ * to circpad_delay_t.
+ */
+static circpad_delay_t
+circpad_distribution_sample_iat_delay(const circpad_state_t *state,
+                                      circpad_delay_t start_usec)
+{
+  double val = circpad_distribution_sample(state->iat_dist);
+  /* These comparisons are safe, because the output is in the range
+   * [0, 2**32), and double has a precision of 53 bits. */
+  val = MAX(0, val);
+  val = MIN(val, state->range_usec);
+
+  /* This addition is exact: val is at most 2**32-1, start_usec
+   * is at most 2**32-1, and doubles have a precision of 53 bits. */
+  val += start_usec;
+
+  /* Clamp the distribution at infinite delay val */
+  return (circpad_delay_t)MIN(tor_llround(val), CIRCPAD_DELAY_INFINITE);
+}
+
+/**
+ * Sample an expected time-until-next-packet delay from the histogram.
+ *
+ * The bin is chosen with probability proportional to the number
+ * of tokens in each bin, and then a time value is chosen uniformly from
+ * that bin's [start,end) time range.
+ */
+STATIC circpad_delay_t
+circpad_machine_sample_delay(circpad_machine_state_t *mi)
+{
+  const circpad_state_t *state = circpad_machine_current_state(mi);
+  const circpad_hist_token_t *histogram = NULL;
+  circpad_hist_index_t curr_bin = 0;
+  circpad_delay_t bin_start, bin_end;
+  circpad_delay_t start_usec;
+  /* These three must all be larger than circpad_hist_token_t, because
+   * we sum several circpad_hist_token_t values across the histogram */
+  uint64_t curr_weight = 0;
+  uint64_t histogram_total_tokens = 0;
+  uint64_t bin_choice;
+
+  tor_assert(state);
+
+  if (state->use_rtt_estimate)
+    start_usec = mi->rtt_estimate_usec+state->start_usec;
+  else
+    start_usec = state->start_usec;
+
+  if (state->iat_dist.type != CIRCPAD_DIST_NONE) {
+    /* Sample from a fixed IAT distribution and return */
+    return circpad_distribution_sample_iat_delay(state, start_usec);
+  } else if (state->token_removal != CIRCPAD_TOKEN_REMOVAL_NONE) {
+    /* We have a mutable histogram. Do basic sanity check and apply: */
+    if (BUG(!mi->histogram) ||
+        BUG(mi->histogram_len != state->histogram_len)) {
+      return CIRCPAD_DELAY_INFINITE;
+    }
+
+    histogram = mi->histogram;
+    for (circpad_hist_index_t b = 0; b < state->histogram_len; b++)
+      histogram_total_tokens += histogram[b];
+  } else {
+    /* We have a histogram, but it's immutable */
+    histogram = state->histogram;
+    histogram_total_tokens = state->histogram_total_tokens;
+  }
+
+  bin_choice = crypto_rand_uint64(histogram_total_tokens);
+
+  /* Skip all the initial zero bins */
+  while (!histogram[curr_bin]) {
+    curr_bin++;
+  }
+  curr_weight = histogram[curr_bin];
+
+  // TODO: This is not constant-time. Pretty sure we don't
+  // really need it to be, though.
+  while (curr_weight < bin_choice) {
+    curr_bin++;
+    /* It should be impossible to run past the end of the histogram */
+    if (BUG(curr_bin >= state->histogram_len)) {
+      return CIRCPAD_DELAY_INFINITE;
+    }
+    curr_weight += histogram[curr_bin];
+  }
+
+  /* Do some basic checking of the current bin we are in */
+  if (BUG(curr_bin >= state->histogram_len) ||
+      BUG(histogram[curr_bin] == 0)) {
+    return CIRCPAD_DELAY_INFINITE;
+  }
+
+  // Store this index to remove the token upon callback.
+  if (state->token_removal != CIRCPAD_TOKEN_REMOVAL_NONE) {
+    mi->chosen_bin = curr_bin;
+  }
+
+  if (curr_bin >= CIRCPAD_INFINITY_BIN(state)) {
+    if (state->token_removal != CIRCPAD_TOKEN_REMOVAL_NONE &&
+        mi->histogram[curr_bin] > 0) {
+      mi->histogram[curr_bin]--;
+    }
+
+    // Infinity: Don't send a padding packet. Wait for a real packet
+    // and then see if our bins are empty or what else we should do.
+    return CIRCPAD_DELAY_INFINITE;
+  }
+
+  tor_assert(curr_bin < CIRCPAD_INFINITY_BIN(state));
+
+  bin_start = circpad_histogram_bin_to_usec(mi, curr_bin);
+  /* We don't need to reduct 1 from the upper bound because the random range
+   * function below samples from [bin_start, bin_end) */
+  bin_end = circpad_histogram_bin_to_usec(mi, curr_bin+1);
+
+  /* Truncate the high bin in case it's the infinity bin:
+   * Don't actually schedule an "infinite"-1 delay */
+  bin_end = MIN(bin_end, start_usec+state->range_usec);
+
+  // Sample uniformly between histogram[i] to histogram[i+1]-1,
+  // but no need to sample if they are the same timeval (aka bin 0 or bin 1).
+  if (bin_end <= bin_start+1)
+    return bin_start;
+  else
+    return (circpad_delay_t)crypto_rand_uint64_range(bin_start, bin_end);
+}
+
+/**
+ * Sample a value from the specified probability distribution.
+ *
+ * This performs inverse transform sampling
+ * (https://en.wikipedia.org/wiki/Inverse_transform_sampling).
+ *
+ * XXX: These formulas were taken verbatim. Need a floating wizard
+ * to check them for catastropic cancellation and other issues (teor?).
+ * Also: is 32bits of double from [0.0,1.0) enough?
+ */
+static double
+circpad_distribution_sample(circpad_distribution_t dist)
+{
+  log_fn(LOG_DEBUG,LD_CIRC, "Sampling delay with distribution %d",
+         dist.type);
+
+  switch (dist.type) {
+    case CIRCPAD_DIST_NONE:
+      {
+        /* We should not get in here like this */
+        tor_assert_nonfatal_unreached();
+        return 0;
+      }
+    case CIRCPAD_DIST_UNIFORM:
+      {
+        // param2 is upper bound, param1 is lower
+        const struct uniform my_uniform = {
+          .base = UNIFORM(my_uniform),
+          .a = dist.param1,
+          .b = dist.param2,
+        };
+        return dist_sample(&my_uniform.base);
+      }
+    case CIRCPAD_DIST_LOGISTIC:
+      {
+      /* param1 is Mu, param2 is sigma. */
+        const struct logistic my_logistic = {
+          .base = LOGISTIC(my_logistic),
+          .mu = dist.param1,
+          .sigma = dist.param2,
+        };
+        return dist_sample(&my_logistic.base);
+      }
+    case CIRCPAD_DIST_LOG_LOGISTIC:
+      {
+        /* param1 is Alpha, param2 is 1.0/Beta */
+        const struct log_logistic my_log_logistic = {
+          .base = LOG_LOGISTIC(my_log_logistic),
+          .alpha = dist.param1,
+          .beta = dist.param2,
+        };
+        return dist_sample(&my_log_logistic.base);
+      }
+    case CIRCPAD_DIST_GEOMETRIC:
+      {
+        /* param1 is 'p' (success probability) */
+        const struct geometric my_geometric = {
+          .base = GEOMETRIC(my_geometric),
+          .p = dist.param1,
+        };
+        return dist_sample(&my_geometric.base);
+      }
+    case CIRCPAD_DIST_WEIBULL:
+      {
+        /* param1 is k, param2 is Lambda */
+        const struct weibull my_weibull = {
+          .base = WEIBULL(my_weibull),
+          .k = dist.param1,
+          .lambda = dist.param2,
+        };
+        return dist_sample(&my_weibull.base);
+      }
+    case CIRCPAD_DIST_PARETO:
+      {
+        /* param1 is sigma, param2 is xi, no more params for mu so we use 0 */
+        const struct genpareto my_genpareto = {
+          .base = GENPARETO(my_genpareto),
+          .mu = 0,
+          .sigma = dist.param1,
+          .xi = dist.param2,
+        };
+        return dist_sample(&my_genpareto.base);
+      }
+  }
+
+  tor_assert_nonfatal_unreached();
+  return 0;
+}
+
+/**
+ * Find the index of the first bin whose upper bound is
+ * greater than the target, and that has tokens remaining.
+ */
+static circpad_hist_index_t
+circpad_machine_first_higher_index(const circpad_machine_state_t *mi,
+                                   circpad_delay_t target_bin_usec)
+{
+  circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi,
+                                                           target_bin_usec);
+
+  /* Don't remove from the infinity bin */
+  for (; bin < CIRCPAD_INFINITY_BIN(mi); bin++) {
+    if (mi->histogram[bin] &&
+        circpad_histogram_bin_to_usec(mi, bin+1) > target_bin_usec) {
+      return bin;
+    }
+  }
+
+  return mi->histogram_len;
+}
+
+/**
+ * Find the index of the first bin whose lower bound is lower or equal to
+ * <b>target_bin_usec</b>, and that still has tokens remaining.
+ */
+static circpad_hist_index_t
+circpad_machine_first_lower_index(const circpad_machine_state_t *mi,
+                                  circpad_delay_t target_bin_usec)
+{
+  circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi,
+                                                           target_bin_usec);
+
+  for (; bin >= 0; bin--) {
+    if (mi->histogram[bin] &&
+        circpad_histogram_bin_to_usec(mi, bin) <= target_bin_usec) {
+      return bin;
+    }
+  }
+
+  return -1;
+}
+
+/**
+ * Remove a token from the first non-empty bin whose upper bound is
+ * greater than the target.
+ */
+STATIC void
+circpad_machine_remove_higher_token(circpad_machine_state_t *mi,
+                                    circpad_delay_t target_bin_usec)
+{
+  /* We need to remove the token from the first bin
+   * whose upper bound is greater than the target, and that
+   * has tokens remaining. */
+  circpad_hist_index_t bin = circpad_machine_first_higher_index(mi,
+                                                     target_bin_usec);
+
+  if (bin >= 0 && bin < CIRCPAD_INFINITY_BIN(mi)) {
+    if (!BUG(mi->histogram[bin] == 0)) {
+      mi->histogram[bin]--;
+    }
+  }
+}
+
+/**
+ * Remove a token from the first non-empty bin whose upper bound is
+ * lower than the target.
+ */
+STATIC void
+circpad_machine_remove_lower_token(circpad_machine_state_t *mi,
+                                   circpad_delay_t target_bin_usec)
+{
+  circpad_hist_index_t bin = circpad_machine_first_lower_index(mi,
+          target_bin_usec);
+
+  if (bin >= 0 && bin < CIRCPAD_INFINITY_BIN(mi)) {
+    if (!BUG(mi->histogram[bin] == 0)) {
+      mi->histogram[bin]--;
+    }
+  }
+}
+
+/* Helper macro: Ensure that the bin has tokens available, and BUG out of the
+ * function if it's not the case. */
+#define ENSURE_BIN_CAPACITY(bin_index) \
+  if (BUG(mi->histogram[bin_index] == 0)) {                   \
+    return;                                                   \
+  }
+
+/**
+ * Remove a token from the closest non-empty bin to the target.
+ *
+ * If use_usec is true, measure "closest" in terms of the next closest bin
+ * midpoint.
+ *
+ * If it is false, use bin index distance only.
+ */
+STATIC void
+circpad_machine_remove_closest_token(circpad_machine_state_t *mi,
+                                     circpad_delay_t target_bin_usec,
+                                     bool use_usec)
+{
+  circpad_hist_index_t lower, higher, current;
+  circpad_hist_index_t bin_to_remove = -1;
+
+  lower = circpad_machine_first_lower_index(mi, target_bin_usec);
+  higher = circpad_machine_first_higher_index(mi, target_bin_usec);
+  current = circpad_histogram_usec_to_bin(mi, target_bin_usec);
+
+  /* Sanity check the results */
+  if (BUG(lower > current) || BUG(higher < current)) {
+    return;
+  }
+
+  /* Take care of edge cases first */
+  if (higher == mi->histogram_len && lower == -1) {
+    /* All bins are empty */
+    return;
+  } else if (higher == mi->histogram_len) {
+    /* All higher bins are empty */
+    ENSURE_BIN_CAPACITY(lower);
+    mi->histogram[lower]--;
+    return;
+  } else if (lower == -1) {
+    /* All lower bins are empty */
+    ENSURE_BIN_CAPACITY(higher);
+    mi->histogram[higher]--;
+    return;
+  }
+
+  /* Now handle the intermediate cases */
+  if (use_usec) {
+    /* Find the closest bin midpoint to the target */
+    circpad_delay_t lower_usec = circpad_get_histogram_bin_midpoint(mi, lower);
+    circpad_delay_t higher_usec =
+      circpad_get_histogram_bin_midpoint(mi, higher);
+
+    if (target_bin_usec < lower_usec) {
+      // Lower bin is closer
+      ENSURE_BIN_CAPACITY(lower);
+      bin_to_remove = lower;
+    } else if (target_bin_usec > higher_usec) {
+      // Higher bin is closer
+      ENSURE_BIN_CAPACITY(higher);
+      bin_to_remove = higher;
+    } else if (target_bin_usec-lower_usec > higher_usec-target_bin_usec) {
+      // Higher bin is closer
+      ENSURE_BIN_CAPACITY(higher);
+      bin_to_remove = higher;
+    } else {
+      // Lower bin is closer
+      ENSURE_BIN_CAPACITY(lower);
+      bin_to_remove = lower;
+    }
+    mi->histogram[bin_to_remove]--;
+    log_debug(LD_GENERAL, "Removing token from bin %d", bin_to_remove);
+    return;
+  } else {
+    if (current - lower > higher - current) {
+      // Higher bin is closer
+      ENSURE_BIN_CAPACITY(higher);
+      mi->histogram[higher]--;
+      return;
+    } else {
+      // Lower bin is closer
+      ENSURE_BIN_CAPACITY(lower);
+      mi->histogram[lower]--;
+      return;
+    }
+  }
+}
+
+#undef ENSURE_BIN_CAPACITY
+
+/**
+ * Remove a token from the exact bin corresponding to the target.
+ *
+ * If it is empty, do nothing.
+ */
+static void
+circpad_machine_remove_exact(circpad_machine_state_t *mi,
+                             circpad_delay_t target_bin_usec)
+{
+  circpad_hist_index_t bin = circpad_histogram_usec_to_bin(mi,
+          target_bin_usec);
+
+  if (mi->histogram[bin] > 0)
+    mi->histogram[bin]--;
+}
+
+/**
+ * Check our state's cell limit count and tokens.
+ *
+ * Returns 1 if either limits are hit and we decide to change states,
+ * otherwise returns 0.
+ */
+static circpad_decision_t
+check_machine_token_supply(circpad_machine_state_t *mi)
+{
+  uint32_t histogram_total_tokens = 0;
+
+  /* Check if bins empty. This requires summing up the current mutable
+   * machineinfo histogram token total and checking if it is zero.
+   * Machineinfo does not keep a running token count. We're assuming the
+   * extra space is not worth this short loop iteration.
+   *
+   * We also do not count infinity bin in histogram totals.
+   */
+  if (mi->histogram_len && mi->histogram) {
+    for (circpad_hist_index_t b = 0; b < CIRCPAD_INFINITY_BIN(mi); b++)
+      histogram_total_tokens += mi->histogram[b];
+
+    /* If we change state, we're done */
+    if (histogram_total_tokens == 0) {
+      if (circpad_internal_event_bins_empty(mi) == CIRCPAD_STATE_CHANGED)
+        return CIRCPAD_STATE_CHANGED;
+    }
+  }
+
+  if (mi->state_length == 0) {
+    return circpad_internal_event_state_length_up(mi);
+  }
+
+  return CIRCPAD_STATE_UNCHANGED;
+}
+
+/**
+ * Remove a token from the bin corresponding to the delta since
+ * last packet. If that bin is empty, choose a token based on
+ * the specified removal strategy in the state machine.
+ *
+ * This function also updates and checks rate limit and state
+ * limit counters.
+ *
+ * Returns 1 if we transition states, 0 otherwise.
+ */
+STATIC circpad_decision_t
+circpad_machine_remove_token(circpad_machine_state_t *mi)
+{
+  const circpad_state_t *state = NULL;
+  circpad_time_t current_time;
+  circpad_delay_t target_bin_usec;
+
+  /* Update non-padding counts for rate limiting: We scale at UINT16_MAX
+   * because we only use this for a percentile limit of 2 sig figs, and
+   * space is scare in the machineinfo struct. */
+  mi->nonpadding_sent++;
+  if (mi->nonpadding_sent == UINT16_MAX) {
+    mi->padding_sent /= 2;
+    mi->nonpadding_sent /= 2;
+  }
+
+  /* Dont remove any tokens if there was no padding scheduled */
+  if (!mi->padding_scheduled_at_usec) {
+    return CIRCPAD_STATE_UNCHANGED;
+  }
+
+  state = circpad_machine_current_state(mi);
+  current_time = monotime_absolute_usec();
+
+  /* If we have scheduled padding some time in the future, we want to see what
+     bin we are in at the current time */
+  target_bin_usec = (circpad_delay_t)
+                  MIN((current_time - mi->padding_scheduled_at_usec),
+                      CIRCPAD_DELAY_INFINITE-1);
+
+  /* We are treating this non-padding cell as a padding cell, so we cancel
+     padding timer, if present. */
+  mi->padding_scheduled_at_usec = 0;
+  if (mi->is_padding_timer_scheduled) {
+    mi->is_padding_timer_scheduled = 0;
+    timer_disable(mi->padding_timer);
+  }
+
+  /* If we are not in a padding state (like start or end), we're done */
+  if (!state)
+    return CIRCPAD_STATE_UNCHANGED;
+
+  /* If we're enforcing a state length on non-padding packets,
+   * decrement it */
+  if (mi->state_length != CIRCPAD_STATE_LENGTH_INFINITE &&
+      state->length_includes_nonpadding &&
+      mi->state_length > 0) {
+    mi->state_length--;
+  }
+
+  /* Perform the specified token removal strategy */
+  switch (state->token_removal) {
+    case CIRCPAD_TOKEN_REMOVAL_NONE:
+      break;
+    case CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC:
+      circpad_machine_remove_closest_token(mi, target_bin_usec, 1);
+      break;
+    case CIRCPAD_TOKEN_REMOVAL_CLOSEST:
+      circpad_machine_remove_closest_token(mi, target_bin_usec, 0);
+      break;
+    case CIRCPAD_TOKEN_REMOVAL_LOWER:
+      circpad_machine_remove_lower_token(mi, target_bin_usec);
+      break;
+    case CIRCPAD_TOKEN_REMOVAL_HIGHER:
+      circpad_machine_remove_higher_token(mi, target_bin_usec);
+      break;
+    case CIRCPAD_TOKEN_REMOVAL_EXACT:
+      circpad_machine_remove_exact(mi, target_bin_usec);
+      break;
+  }
+
+  /* Check our token and state length limits */
+  return check_machine_token_supply(mi);
+}
+
+/**
+ * Send a relay command with a relay cell payload on a circuit to
+ * the particular hopnum.
+ *
+ * Hopnum starts at 1 (1=guard, 2=middle, 3=exit, etc).
+ *
+ * Payload may be null.
+ *
+ * Returns negative on error, 0 on success.
+ */
+MOCK_IMPL(STATIC signed_error_t,
+circpad_send_command_to_hop,(origin_circuit_t *circ, uint8_t hopnum,
+                             uint8_t relay_command, const uint8_t *payload,
+                             ssize_t payload_len))
+{
+  crypt_path_t *target_hop = circuit_get_cpath_hop(circ, hopnum);
+  signed_error_t ret;
+
+  /* Check that the cpath has the target hop */
+  if (!target_hop) {
+    log_fn(LOG_WARN, LD_BUG, "Padding circuit %u has %d hops, not %d",
+           circ->global_identifier, circuit_get_cpath_len(circ), hopnum);
+    return -1;
+  }
+
+  /* Check that the target hop is opened */
+  if (target_hop->state != CPATH_STATE_OPEN) {
+    log_fn(LOG_WARN,LD_CIRC,
+           "Padding circuit %u has %d hops, not %d",
+           circ->global_identifier,
+           circuit_get_cpath_opened_len(circ), hopnum);
+    return -1;
+  }
+
+  /* Send the drop command to the second hop */
+  ret = relay_send_command_from_edge(0, TO_CIRCUIT(circ), relay_command,
+                                     (const char*)payload, payload_len,
+                                     target_hop);
+  return ret;
+}
+
+/**
+ * Callback helper to send a padding cell.
+ *
+ * This helper is called after our histogram-sampled delay period passes
+ * without another packet being sent first. If a packet is sent before this
+ * callback happens, it is canceled. So when we're called here, send padding
+ * right away.
+ *
+ * If sending this padding cell forced us to transition states return
+ * CIRCPAD_STATE_CHANGED. Otherwise return CIRCPAD_STATE_UNCHANGED.
+ */
+circpad_decision_t
+circpad_send_padding_cell_for_callback(circpad_machine_state_t *mi)
+{
+  circuit_t *circ = mi->on_circ;
+  int machine_idx = mi->machine_index;
+  mi->padding_scheduled_at_usec = 0;
+  circpad_statenum_t state = mi->current_state;
+
+  // Make sure circuit didn't close on us
+  if (mi->on_circ->marked_for_close) {
+    log_fn(LOG_INFO,LD_CIRC,
+           "Padding callback on a circuit marked for close. Ignoring.");
+    return CIRCPAD_STATE_CHANGED;
+  }
+
+  /* If it's a histogram, reduce the token count */
+  if (mi->histogram && mi->histogram_len) {
+    /* Basic sanity check on the histogram before removing anything */
+    if (BUG(mi->chosen_bin >= mi->histogram_len) ||
+        BUG(mi->histogram[mi->chosen_bin] == 0)) {
+      return CIRCPAD_STATE_CHANGED;
+    }
+
+    mi->histogram[mi->chosen_bin]--;
+  }
+
+  /* If we have a valid state length bound, consider it */
+  if (mi->state_length != CIRCPAD_STATE_LENGTH_INFINITE &&
+      !BUG(mi->state_length <= 0)) {
+    mi->state_length--;
+  }
+
+  /*
+   * Update non-padding counts for rate limiting: We scale at UINT16_MAX
+   * because we only use this for a percentile limit of 2 sig figs, and
+   * space is scare in the machineinfo struct.
+   */
+  mi->padding_sent++;
+  if (mi->padding_sent == UINT16_MAX) {
+    mi->padding_sent /= 2;
+    mi->nonpadding_sent /= 2;
+  }
+  circpad_global_padding_sent++;
+
+  if (CIRCUIT_IS_ORIGIN(mi->on_circ)) {
+    circpad_send_command_to_hop(TO_ORIGIN_CIRCUIT(mi->on_circ),
+                                CIRCPAD_GET_MACHINE(mi)->target_hopnum,
+                                RELAY_COMMAND_DROP, NULL, 0);
+    log_fn(LOG_INFO,LD_CIRC, "Callback: Sending padding to origin circuit %u.",
+           TO_ORIGIN_CIRCUIT(mi->on_circ)->global_identifier);
+  } else {
+    // If we're a non-origin circ, we can just send from here as if we're the
+    // edge.
+    log_fn(LOG_INFO,LD_CIRC,
+          "Callback: Sending padding to non-origin circuit.");
+    relay_send_command_from_edge(0, mi->on_circ, RELAY_COMMAND_DROP, NULL,
+                                 0, NULL);
+  }
+
+  rep_hist_padding_count_write(PADDING_TYPE_DROP);
+  /* This is a padding cell sent from the client or from the middle node,
+   * (because it's invoked from circuitpadding.c) */
+  circpad_cell_event_padding_sent(circ);
+
+  /* The circpad_cell_event_padding_sent() could cause us to transition.
+   * Check that we still have a padding machineinfo, and then check our token
+   * supply. */
+  if (circ->padding_info[machine_idx] != NULL) {
+    if (state != circ->padding_info[machine_idx]->current_state)
+      return CIRCPAD_STATE_CHANGED;
+    else
+      return check_machine_token_supply(circ->padding_info[machine_idx]);
+  } else {
+    return CIRCPAD_STATE_CHANGED;
+  }
+}
+
+/**
+ * Tor-timer compatible callback that tells us to send a padding cell.
+ *
+ * Timers are associated with circpad_machine_state_t's. When the machineinfo
+ * is freed on a circuit, the timers are cancelled. Since the lifetime
+ * of machineinfo is always longer than the timers, handles are not
+ * needed.
+ */
+static void
+circpad_send_padding_callback(tor_timer_t *timer, void *args,
+                              const struct monotime_t *time)
+{
+  circpad_machine_state_t *mi = ((circpad_machine_state_t*)args);
+  (void)timer; (void)time;
+
+  if (mi && mi->on_circ) {
+    assert_circuit_ok(mi->on_circ);
+    circpad_send_padding_cell_for_callback(mi);
+  } else {
+    // This shouldn't happen (represents a timer leak)
+    log_fn(LOG_WARN,LD_CIRC,
+            "Circuit closed while waiting for padding timer.");
+    tor_fragile_assert();
+  }
+
+  // TODO-MP-AP: Unify this counter with channelpadding for rephist stats
+  //total_timers_pending--;
+}
+
+/**
+ * Cache our consensus parameters upon consensus update.
+ */
+void
+circpad_new_consensus_params(const networkstatus_t *ns)
+{
+  circpad_global_allowed_cells =
+      networkstatus_get_param(ns, "circpad_global_allowed_cells",
+         0, 0, UINT16_MAX-1);
+
+  circpad_global_max_padding_percent =
+      networkstatus_get_param(ns, "circpad_global_max_padding_pct",
+         0, 0, 100);
+}
+
+/**
+ * Check this machine against its padding limits, as well as global
+ * consensus limits.
+ *
+ * We have two limits: a percent and a cell count. The cell count
+ * limit must be reached before the percent is enforced (this is to
+ * optionally allow very light padding of things like circuit setup
+ * while there is no other traffic on the circuit).
+ *
+ * TODO: Don't apply limits to machines form torrc.
+ *
+ * Returns 1 if limits are set and we've hit them. Otherwise returns 0.
+ */
+STATIC bool
+circpad_machine_reached_padding_limit(circpad_machine_state_t *mi)
+{
+  const circpad_machine_spec_t *machine = CIRCPAD_GET_MACHINE(mi);
+
+  /* If machine_padding_pct is non-zero, and we've sent more
+   * than the allowed count of padding cells, then check our
+   * percent limits for this machine. */
+   if (machine->max_padding_percent &&
+      mi->padding_sent >= machine->allowed_padding_count) {
+    uint32_t total_cells = mi->padding_sent + mi->nonpadding_sent;
+
+    /* Check the percent */
+    if ((100*(uint32_t)mi->padding_sent) / total_cells >
+        machine->max_padding_percent) {
+      return 1; // limit is reached. Stop.
+    }
+  }
+
+  /* If circpad_max_global_padding_pct is non-zero, and we've
+   * sent more than the global padding cell limit, then check our
+   * gloabl tor process percentage limit on padding. */
+  if (circpad_global_max_padding_percent &&
+      circpad_global_padding_sent >= circpad_global_allowed_cells) {
+    uint64_t total_cells = circpad_global_padding_sent +
+              circpad_global_nonpadding_sent;
+
+    /* Check the percent */
+    if ((100*circpad_global_padding_sent) / total_cells >
+        circpad_global_max_padding_percent) {
+      return 1; // global limit reached. Stop.
+    }
+  }
+
+  return 0; // All good!
+}
+
+/**
+ * Schedule the next padding time according to the machineinfo on a
+ * circuit.
+ *
+ * The histograms represent inter-packet-delay. Whenever you get an packet
+ * event you should be scheduling your next timer (after cancelling any old
+ * ones and updating tokens accordingly).
+ *
+ * Returns 1 if we decide to transition states (due to infinity bin),
+ * 0 otherwise.
+ */
+MOCK_IMPL(circpad_decision_t,
+circpad_machine_schedule_padding,(circpad_machine_state_t *mi))
+{
+  circpad_delay_t in_usec = 0;
+  struct timeval timeout;
+  tor_assert(mi);
+
+  // Don't pad in end (but  also don't cancel any previously
+  // scheduled padding either).
+  if (mi->current_state == CIRCPAD_STATE_END) {
+    log_fn(LOG_INFO, LD_CIRC, "Padding end state");
+    return CIRCPAD_STATE_UNCHANGED;
+  }
+
+  /* Check our padding limits */
+  if (circpad_machine_reached_padding_limit(mi)) {
+   if (CIRCUIT_IS_ORIGIN(mi->on_circ)) {
+      log_fn(LOG_INFO, LD_CIRC,
+           "Padding machine has reached padding limit on circuit %u",
+             TO_ORIGIN_CIRCUIT(mi->on_circ)->global_identifier);
+    } else {
+      log_fn(LOG_INFO, LD_CIRC,
+           "Padding machine has reached padding limit on circuit %"PRIu64
+           ", %d",
+           mi->on_circ->n_chan ? mi->on_circ->n_chan->global_identifier : 0,
+           mi->on_circ->n_circ_id);
+    }
+    return CIRCPAD_STATE_UNCHANGED;
+  }
+
+  if (mi->is_padding_timer_scheduled) {
+    /* Cancel current timer (if any) */
+    timer_disable(mi->padding_timer);
+    mi->is_padding_timer_scheduled = 0;
+  }
+
+  /* in_usec = in microseconds */
+  in_usec = circpad_machine_sample_delay(mi);
+  mi->padding_scheduled_at_usec = monotime_absolute_usec();
+  log_fn(LOG_INFO,LD_CIRC,"\tPadding in %u usec", in_usec);
+
+  // Don't schedule if we have infinite delay.
+  if (in_usec == CIRCPAD_DELAY_INFINITE) {
+    return circpad_internal_event_infinity(mi);
+  }
+
+  if (mi->state_length == 0) {
+    /* If we're at length 0, that means we hit 0 after sending
+     * a cell earlier, and emitted an event for it, but
+     * for whatever reason we did not decide to change states then.
+     * So maybe the machine is waiting for bins empty, or for an
+     * infinity event later? That would be a strange machine,
+     * but there's no reason to make it impossible. */
+    return CIRCPAD_STATE_UNCHANGED;
+  }
+
+  if (in_usec <= 0) {
+    return circpad_send_padding_cell_for_callback(mi);
+  }
+
+  timeout.tv_sec = in_usec/TOR_USEC_PER_SEC;
+  timeout.tv_usec = (in_usec%TOR_USEC_PER_SEC);
+
+  log_fn(LOG_INFO, LD_CIRC, "\tPadding in %u sec, %u usec",
+          (unsigned)timeout.tv_sec, (unsigned)timeout.tv_usec);
+
+  if (mi->padding_timer) {
+    timer_set_cb(mi->padding_timer, circpad_send_padding_callback, mi);
+  } else {
+    mi->padding_timer =
+        timer_new(circpad_send_padding_callback, mi);
+  }
+  timer_schedule(mi->padding_timer, &timeout);
+  mi->is_padding_timer_scheduled = 1;
+
+  // TODO-MP-AP: Unify with channelpadding counter
+  //rep_hist_padding_count_timers(++total_timers_pending);
+
+  return CIRCPAD_STATE_UNCHANGED;
+}
+
+/**
+ * If the machine transitioned to the END state, we need
+ * to check to see if it wants us to shut it down immediately.
+ * If it does, then we need to send the appropate negotation commands
+ * depending on which side it is.
+ *
+ * After this function is called, mi may point to freed memory. Do
+ * not access it.
+ */
+static void
+circpad_machine_spec_transitioned_to_end(circpad_machine_state_t *mi)
+{
+  const circpad_machine_spec_t *machine = CIRCPAD_GET_MACHINE(mi);
+
+  /*
+   * We allow machines to shut down and delete themselves as opposed
+   * to just going back to START or waiting forever in END so that
+   * we can handle the case where this machine started while it was
+   * the only machine that matched conditions, but *since* then more
+   * "higher ranking" machines now match the conditions, and would
+   * be given a chance to take precidence over this one in
+   * circpad_add_matching_machines().
+   *
+   * Returning to START or waiting forever in END would not give those
+   * other machines a chance to be launched, where as shutting down
+   * here does.
+   */
+  if (machine->should_negotiate_end) {
+    circuit_t *on_circ = mi->on_circ;
+    if (machine->is_origin_side) {
+      /* We free the machine info here so that we can be replaced
+       * by a different machine. But we must leave the padding_machine
+       * in place to wait for the negotiated response */
+      circpad_circuit_machineinfo_free_idx(on_circ,
+                                           machine->machine_index);
+      circpad_negotiate_padding(TO_ORIGIN_CIRCUIT(on_circ),
+                                machine->machine_num,
+                                machine->target_hopnum,
+                                CIRCPAD_COMMAND_STOP);
+    } else {
+      circpad_circuit_machineinfo_free_idx(on_circ,
+                                           machine->machine_index);
+      circpad_padding_negotiated(on_circ,
+                                machine->machine_num,
+                                CIRCPAD_COMMAND_STOP,
+                                CIRCPAD_RESPONSE_OK);
+      on_circ->padding_machine[machine->machine_index] = NULL;
+    }
+  }
+}
+
+/**
+ * Generic state transition function for padding state machines.
+ *
+ * Given an event and our mutable machine info, decide if/how to
+ * transition to a different state, and perform actions accordingly.
+ *
+ * Returns 1 if we transition states, 0 otherwise.
+ */
+MOCK_IMPL(circpad_decision_t,
+circpad_machine_spec_transition,(circpad_machine_state_t *mi,
+                            circpad_event_t event))
+{
+  const circpad_state_t *state =
+      circpad_machine_current_state(mi);
+
+  /* If state is null we are in the end state. */
+  if (!state) {
+    /* If we in end state we don't pad no matter what. */
+    return CIRCPAD_STATE_UNCHANGED;
+  }
+
+  /* Check if this event is ignored or causes a cancel */
+  if (state->next_state[event] == CIRCPAD_STATE_IGNORE) {
+    return CIRCPAD_STATE_UNCHANGED;
+  } else if (state->next_state[event] == CIRCPAD_STATE_CANCEL) {
+    /* Check cancel events and cancel any pending padding */
+    mi->padding_scheduled_at_usec = 0;
+    if (mi->is_padding_timer_scheduled) {
+      mi->is_padding_timer_scheduled = 0;
+      /* Cancel current timer (if any) */
+      timer_disable(mi->padding_timer);
+    }
+    return CIRCPAD_STATE_UNCHANGED;
+  } else {
+    circpad_statenum_t s = state->next_state[event];
+    /* See if we need to transition to any other states based on this event.
+     * Whenever a transition happens, even to our own state, we schedule
+     * padding.
+     *
+     * So if a state only wants to schedule padding for an event, it specifies
+     * a transition to itself. All non-specified events are ignored.
+     */
+    log_fn(LOG_INFO, LD_CIRC,
+           "Circpad machine %d transitioning from %s to %s",
+            mi->machine_index, circpad_state_to_string(mi->current_state),
+            circpad_state_to_string(s));
+
+    /* If this is not the same state, switch and init tokens,
+     * otherwise just reschedule padding. */
+    if (mi->current_state != s) {
+      mi->current_state = s;
+      circpad_machine_setup_tokens(mi);
+      circpad_choose_state_length(mi);
+
+      /* If we transition to the end state, check to see
+       * if this machine wants to be shut down at end */
+      if (s == CIRCPAD_STATE_END) {
+        circpad_machine_spec_transitioned_to_end(mi);
+        /* We transitioned but we don't pad in end. Also, mi
+         * may be freed. Returning STATE_CHANGED prevents us
+         * from accessing it in any callers of this function. */
+        return CIRCPAD_STATE_CHANGED;
+      }
+
+      /* We transitioned to a new state, schedule padding */
+      circpad_machine_schedule_padding(mi);
+      return CIRCPAD_STATE_CHANGED;
+    }
+
+    /* We transitioned back to the same state. Schedule padding,
+     * and inform if that causes a state transition. */
+    return circpad_machine_schedule_padding(mi);
+  }
+
+  return CIRCPAD_STATE_UNCHANGED;
+}
+
+/**
+ * Estimate the circuit RTT from the current middle hop out to the
+ * end of the circuit.
+ *
+ * We estimate RTT by calculating the time between "receive" and
+ * "send" at a middle hop. This is because we "receive" a cell
+ * from the origin, and then relay it towards the exit before a
+ * response comes back. It is that response time from the exit side
+ * that we want to measure, so that we can make use of it for synthetic
+ * response delays.
+ */
+static void
+circpad_estimate_circ_rtt_on_received(circuit_t *circ,
+                                      circpad_machine_state_t *mi)
+{
+  /* Origin circuits don't estimate RTT. They could do it easily enough,
+   * but they have no reason to use it in any delay calculations. */
+  if (CIRCUIT_IS_ORIGIN(circ) || mi->stop_rtt_update)
+    return;
+
+  /* If we already have a last receieved packet time, that means we
+   * did not get a response before this packet. The RTT estimate
+   * only makes sense if we do not have multiple packets on the
+   * wire, so stop estimating if this is the second packet
+   * back to back. However, for the first set of back-to-back
+   * packets, we can wait until the very first response comes back
+   * to us, to measure that RTT (for the response to optimistic
+   * data, for example). Hence stop_rtt_update is only checked
+   * in this received side function, and not in send side below.
+   */
+  if (mi->last_received_time_usec) {
+    /* We also allow multiple back-to-back packets if the circuit is not
+     * opened, to handle var cells.
+     * XXX: Will this work with out var cell plans? Maybe not,
+     * since we're opened at the middle hop as soon as we process
+     * one var extend2 :/ */
+    if (circ->state == CIRCUIT_STATE_OPEN) {
+      log_fn(LOG_INFO, LD_CIRC,
+           "Stopping padding RTT estimation on circuit (%"PRIu64
+           ", %d) after two back to back packets. Current RTT: %d",
+           circ->n_chan ?  circ->n_chan->global_identifier : 0,
+           circ->n_circ_id, mi->rtt_estimate_usec);
+       mi->stop_rtt_update = 1;
+    }
+  } else {
+    mi->last_received_time_usec = monotime_absolute_usec();
+  }
+}
+
+/**
+ * Handles the "send" side of RTT calculation at middle nodes.
+ *
+ * This function calculates the RTT from the middle to the end
+ * of the circuit by subtracting the last received cell timestamp
+ * from the current time. It allows back-to-back cells until
+ * the circuit is opened, to allow for var cell handshakes.
+ * XXX: Check our var cell plans to make sure this will work.
+ */
+static void
+circpad_estimate_circ_rtt_on_send(circuit_t *circ,
+                                  circpad_machine_state_t *mi)
+{
+  /* Origin circuits don't estimate RTT. They could do it easily enough,
+   * but they have no reason to use it in any delay calculations. */
+  if (CIRCUIT_IS_ORIGIN(circ))
+    return;
+
+  /* If last_received_time_usec is non-zero, we are waiting for a response
+   * from the exit side. Calculate the time delta and use it as RTT. */
+  if (mi->last_received_time_usec) {
+    circpad_time_t rtt_time = monotime_absolute_usec() -
+        mi->last_received_time_usec;
+
+    /* Reset the last RTT packet time, so we can tell if two cells
+     * arrive back to back */
+    mi->last_received_time_usec = 0;
+
+    /* Use INT32_MAX to ensure the addition doesn't overflow */
+    if (rtt_time >= INT32_MAX) {
+      log_fn(LOG_WARN,LD_CIRC,
+             "Circuit padding RTT estimate overflowed: %"PRIu64
+             " vs %"PRIu64, monotime_absolute_usec(),
+               mi->last_received_time_usec);
+      return;
+    }
+
+    /* If the old RTT estimate is lower than this one, use this one, because
+     * the circuit is getting longer. If this estimate is somehow
+     * faster than the previous, then maybe that was network jitter.
+     * In that case, average them. */
+    if (mi->rtt_estimate_usec < (circpad_delay_t)rtt_time) {
+      mi->rtt_estimate_usec = (circpad_delay_t)rtt_time;
+    } else {
+      mi->rtt_estimate_usec += (circpad_delay_t)rtt_time;
+      mi->rtt_estimate_usec /= 2;
+    }
+  } else if (circ->state == CIRCUIT_STATE_OPEN) {
+    /* If last_received_time_usec is zero, then we have gotten two cells back
+     * to back. Stop estimating RTT in this case. Note that we only
+     * stop RTT update if the circuit is opened, to allow for RTT estimates
+     * of var cells during circ setup. */
+    mi->stop_rtt_update = 1;
+
+    if (!mi->rtt_estimate_usec) {
+      log_fn(LOG_NOTICE, LD_CIRC,
+             "Got two cells back to back on a circuit before estimating RTT.");
+    }
+  }
+}
+
+/**
+ * A "non-padding" cell has been sent from this endpoint. React
+ * according to any padding state machines on the circuit.
+ *
+ * For origin circuits, this means we sent a cell into the network.
+ * For middle relay circuits, this means we sent a cell towards the
+ * origin.
+ */
+void
+circpad_cell_event_nonpadding_sent(circuit_t *on_circ)
+{
+  /* Update global cell count */
+  circpad_global_nonpadding_sent++;
+
+  /* If there are no machines then this loop should not iterate */
+  FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) {
+    /* First, update any RTT estimate */
+    circpad_estimate_circ_rtt_on_send(on_circ, on_circ->padding_info[i]);
+
+    /* Remove a token: this is the idea of adaptive padding, since we have an
+     * ideal distribution that we want our distribution to look like. */
+    if (!circpad_machine_remove_token(on_circ->padding_info[i])) {
+      /* If removing a token did not cause a transition, check if
+       * non-padding sent event should */
+      circpad_machine_spec_transition(on_circ->padding_info[i],
+                                 CIRCPAD_EVENT_NONPADDING_SENT);
+    }
+  } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * A "non-padding" cell has been received by this endpoint. React
+ * according to any padding state machines on the circuit.
+ *
+ * For origin circuits, this means we read a cell from the network.
+ * For middle relay circuits, this means we received a cell from the
+ * origin.
+ */
+void
+circpad_cell_event_nonpadding_received(circuit_t *on_circ)
+{
+  FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) {
+    /* First, update any RTT estimate */
+    circpad_estimate_circ_rtt_on_received(on_circ, on_circ->padding_info[i]);
+
+    circpad_machine_spec_transition(on_circ->padding_info[i],
+                               CIRCPAD_EVENT_NONPADDING_RECV);
+  } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * A padding cell has been sent from this endpoint. React
+ * according to any padding state machines on the circuit.
+ *
+ * For origin circuits, this means we sent a cell into the network.
+ * For middle relay circuits, this means we sent a cell towards the
+ * origin.
+ */
+void
+circpad_cell_event_padding_sent(circuit_t *on_circ)
+{
+  FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) {
+    circpad_machine_spec_transition(on_circ->padding_info[i],
+                             CIRCPAD_EVENT_PADDING_SENT);
+  } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * A padding cell has been received by this endpoint. React
+ * according to any padding state machines on the circuit.
+ *
+ * For origin circuits, this means we read a cell from the network.
+ * For middle relay circuits, this means we received a cell from the
+ * origin.
+ */
+void
+circpad_cell_event_padding_received(circuit_t *on_circ)
+{
+  /* identical to padding sent */
+  FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, on_circ) {
+    circpad_machine_spec_transition(on_circ->padding_info[i],
+                              CIRCPAD_EVENT_PADDING_RECV);
+  } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * An "infinite" delay has ben chosen from one of our histograms.
+ *
+ * "Infinite" delays mean don't send padding -- but they can also
+ * mean transition to another state depending on the state machine
+ * definitions. Check the rules and react accordingly.
+ *
+ * Return 1 if we decide to transition, 0 otherwise.
+ */
+circpad_decision_t
+circpad_internal_event_infinity(circpad_machine_state_t *mi)
+{
+  return circpad_machine_spec_transition(mi, CIRCPAD_EVENT_INFINITY);
+}
+
+/**
+ * All of the bins of our current state's histogram's are empty.
+ *
+ * Check to see if this means transition to another state, and if
+ * not, refill the tokens.
+ *
+ * Return 1 if we decide to transition, 0 otherwise.
+ */
+circpad_decision_t
+circpad_internal_event_bins_empty(circpad_machine_state_t *mi)
+{
+  if (circpad_machine_spec_transition(mi, CIRCPAD_EVENT_BINS_EMPTY)
+      == CIRCPAD_STATE_CHANGED) {
+    return CIRCPAD_STATE_CHANGED;
+  } else {
+    /* If we dont transition, then we refill the tokens */
+    circpad_machine_setup_tokens(mi);
+    return CIRCPAD_STATE_UNCHANGED;
+  }
+}
+
+/**
+ * This state has used up its cell count. Emit the event and
+ * see if we transition.
+ *
+ * Return 1 if we decide to transition, 0 otherwise.
+ */
+circpad_decision_t
+circpad_internal_event_state_length_up(circpad_machine_state_t *mi)
+{
+  return circpad_machine_spec_transition(mi, CIRCPAD_EVENT_LENGTH_COUNT);
+}
+
+/**
+ * Returns true if the circuit matches the conditions.
+ */
+static inline bool
+circpad_machine_conditions_met(origin_circuit_t *circ,
+                               const circpad_machine_spec_t *machine)
+{
+  if (!(circpad_circ_purpose_to_mask(TO_CIRCUIT(circ)->purpose)
+      & machine->conditions.purpose_mask))
+    return 0;
+
+  if (machine->conditions.requires_vanguards) {
+    const or_options_t *options = get_options();
+
+    /* Pinned middles are effectively vanguards */
+    if (!(options->HSLayer2Nodes || options->HSLayer3Nodes))
+      return 0;
+  }
+
+  /* We check for any bits set in the circuit state mask so that machines
+   * can say any of the following through their state bitmask:
+   * "I want to apply to circuits with either streams or no streams"; OR
+   * "I only want to apply to circuits with streams"; OR
+   * "I only want to apply to circuits without streams". */
+  if (!(circpad_circuit_state(circ) & machine->conditions.state_mask))
+    return 0;
+
+  if (circuit_get_cpath_opened_len(circ) < machine->conditions.min_hops)
+    return 0;
+
+  return 1;
+}
+
+/**
+ * Returns a minimized representation of the circuit state.
+ *
+ * The padding code only cares if the circuit is building,
+ * opened, used for streams, and/or still has relay early cells.
+ * This returns a bitmask of all state properities that apply to
+ * this circuit.
+ */
+static inline
+circpad_circuit_state_t
+circpad_circuit_state(origin_circuit_t *circ)
+{
+  circpad_circuit_state_t retmask = 0;
+
+  if (circ->p_streams)
+    retmask |= CIRCPAD_CIRC_STREAMS;
+  else
+    retmask |= CIRCPAD_CIRC_NO_STREAMS;
+
+  /* We use has_opened to prevent cannibialized circs from flapping. */
+  if (circ->has_opened)
+    retmask |= CIRCPAD_CIRC_OPENED;
+  else
+    retmask |= CIRCPAD_CIRC_BUILDING;
+
+  if (circ->remaining_relay_early_cells > 0)
+    retmask |= CIRCPAD_CIRC_HAS_RELAY_EARLY;
+  else
+    retmask |= CIRCPAD_CIRC_HAS_NO_RELAY_EARLY;
+
+  return retmask;
+}
+
+/**
+ * Convert a normal circuit purpose into a bitmask that we can
+ * use for determining matching circuits.
+ */
+static inline
+circpad_purpose_mask_t
+circpad_circ_purpose_to_mask(uint8_t circ_purpose)
+{
+  /* Treat OR circ purposes as ignored. They should not be passed here*/
+  if (BUG(circ_purpose <= CIRCUIT_PURPOSE_OR_MAX_)) {
+    return 0;
+  }
+
+  /* Treat new client circuit purposes as "OMG ITS EVERYTHING".
+   * This also should not happen */
+  if (BUG(circ_purpose - CIRCUIT_PURPOSE_OR_MAX_ - 1 > 32)) {
+    return CIRCPAD_PURPOSE_ALL;
+  }
+
+  /* Convert the purpose to a bit position */
+  return 1 << (circ_purpose - CIRCUIT_PURPOSE_OR_MAX_ - 1);
+}
+
+/**
+ * Shut down any machines whose conditions no longer match
+ * the current circuit.
+ */
+static void
+circpad_shutdown_old_machines(origin_circuit_t *on_circ)
+{
+  circuit_t *circ = TO_CIRCUIT(on_circ);
+
+  FOR_EACH_ACTIVE_CIRCUIT_MACHINE_BEGIN(i, circ) {
+    if (!circpad_machine_conditions_met(on_circ,
+                                        circ->padding_machine[i])) {
+      // Clear machineinfo (frees timers)
+      circpad_circuit_machineinfo_free_idx(circ, i);
+      // Send padding negotiate stop
+      circpad_negotiate_padding(on_circ,
+                                circ->padding_machine[i]->machine_num,
+                                circ->padding_machine[i]->target_hopnum,
+                                CIRCPAD_COMMAND_STOP);
+    }
+  } FOR_EACH_ACTIVE_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * Negotiate new machines that would apply to this circuit.
+ *
+ * This function checks to see if we have any free machine indexes,
+ * and for each free machine index, it initializes the most recently
+ * added origin-side padding machine that matches the target machine
+ * index and circuit conditions, and negotiates it with the appropriate
+ * middle relay.
+ */
+static void
+circpad_add_matching_machines(origin_circuit_t *on_circ)
+{
+  circuit_t *circ = TO_CIRCUIT(on_circ);
+
+#ifdef TOR_UNIT_TESTS
+  /* Tests don't have to init our padding machines */
+  if (!origin_padding_machines)
+    return;
+#endif
+
+  /* If padding negotiation failed before, do not try again */
+  if (on_circ->padding_negotiation_failed)
+    return;
+
+  FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) {
+    /* If there is a padding machine info, this index is occupied.
+     * No need to check conditions for this index. */
+    if (circ->padding_info[i])
+      continue;
+
+    /* We have a free machine index. Check the origin padding
+     * machines in reverse order, so that more recently added
+     * machines take priority over older ones. */
+    SMARTLIST_FOREACH_REVERSE_BEGIN(origin_padding_machines,
+                                    circpad_machine_spec_t *,
+                                    machine) {
+      /* Machine definitions have a specific target machine index.
+       * This is so event ordering is deterministic with respect
+       * to which machine gets events first when there are two
+       * machines installed on a circuit. Make sure we only
+       * add this machine if its target machine index is free. */
+      if (machine->machine_index == i &&
+          circpad_machine_conditions_met(on_circ, machine)) {
+
+        // We can only replace this machine if the target hopnum
+        // is the same, otherwise we'll get invalid data
+        if (circ->padding_machine[i]) {
+          if (circ->padding_machine[i]->target_hopnum !=
+              machine->target_hopnum)
+            continue;
+          /* Replace it. (Don't free - is global). */
+          circ->padding_machine[i] = NULL;
+        }
+
+        /* Set up the machine immediately so that the slot is occupied.
+         * We will tear it down on error return, or if there is an error
+         * response from the relay. */
+        circpad_setup_machine_on_circ(circ, machine);
+        if (circpad_negotiate_padding(on_circ, machine->machine_num,
+                                  machine->target_hopnum,
+                                  CIRCPAD_COMMAND_START) < 0) {
+          circpad_circuit_machineinfo_free_idx(circ, i);
+          circ->padding_machine[i] = NULL;
+          on_circ->padding_negotiation_failed = 1;
+        } else {
+          /* Success. Don't try any more machines */
+          return;
+        }
+      }
+    } SMARTLIST_FOREACH_END(machine);
+  } FOR_EACH_CIRCUIT_MACHINE_END;
+}
+
+/**
+ * Event that tells us we added a hop to an origin circuit.
+ *
+ * This event is used to decide if we should create a padding machine
+ * on a circuit.
+ */
+void
+circpad_machine_event_circ_added_hop(origin_circuit_t *on_circ)
+{
+  /* Since our padding conditions do not specify a max_hops,
+   * all we can do is add machines here */
+  circpad_add_matching_machines(on_circ);
+}
+
+/**
+ * Event that tells us that an origin circuit is now built.
+ *
+ * Shut down any machines that only applied to un-built circuits.
+ * Activate any new ones.
+ */
+void
+circpad_machine_event_circ_built(origin_circuit_t *circ)
+{
+  circpad_shutdown_old_machines(circ);
+  circpad_add_matching_machines(circ);
+}
+
+/**
+ * Circpad purpose changed event.
+ *
+ * Shut down any machines that don't apply to our circ purpose.
+ * Activate any new ones that do.
+ */
+void
+circpad_machine_event_circ_purpose_changed(origin_circuit_t *circ)
+{
+  circpad_shutdown_old_machines(circ);
+  circpad_add_matching_machines(circ);
+}
+
+/**
+ * Event that tells us that an origin circuit is out of RELAY_EARLY
+ * cells.
+ *
+ * Shut down any machines that only applied to RELAY_EARLY circuits.
+ * Activate any new ones.
+ */
+void
+circpad_machine_event_circ_has_no_relay_early(origin_circuit_t *circ)
+{
+  circpad_shutdown_old_machines(circ);
+  circpad_add_matching_machines(circ);
+}
+
+/**
+ * Streams attached event.
+ *
+ * Called from link_apconn_to_circ() and handle_hs_exit_conn()
+ *
+ * Shut down any machines that only applied to machines without
+ * streams. Activate any new ones.
+ */
+void
+circpad_machine_event_circ_has_streams(origin_circuit_t *circ)
+{
+  circpad_shutdown_old_machines(circ);
+  circpad_add_matching_machines(circ);
+}
+
+/**
+ * Streams detached event.
+ *
+ * Called from circuit_detach_stream()
+ *
+ * Shut down any machines that only applied to machines without
+ * streams. Activate any new ones.
+ */
+void
+circpad_machine_event_circ_has_no_streams(origin_circuit_t *circ)
+{
+  circpad_shutdown_old_machines(circ);
+  circpad_add_matching_machines(circ);
+}
+
+/**
+ * Verify that padding is coming from the expected hop.
+ *
+ * Returns true if from_hop matches the target hop from
+ * one of our padding machines.
+ *
+ * Returns false if we're not an origin circuit, or if from_hop
+ * does not match one of the padding machines.
+ */
+bool
+circpad_padding_is_from_expected_hop(circuit_t *circ,
+                                     crypt_path_t *from_hop)
+{
+  crypt_path_t *target_hop = NULL;
+  if (!CIRCUIT_IS_ORIGIN(circ))
+    return 0;
+
+  FOR_EACH_CIRCUIT_MACHINE_BEGIN(i) {
+    /* We have to check padding_machine and not padding_info/active
+     * machines here because padding may arrive after we shut down a
+     * machine. The info is gone, but the padding_machine waits
+     * for the padding_negotiated response to come back. */
+    if (!circ->padding_machine[i])
+      continue;
+
+    target_hop = circuit_get_cpath_hop(TO_ORIGIN_CIRCUIT(circ),
+                    circ->padding_machine[i]->target_hopnum);
+
+    if (target_hop == from_hop)
+      return 1;
+  } FOR_EACH_CIRCUIT_MACHINE_END;
+
+  return 0;
+}
+
+/**
+ * Deliver circpad events for an "unrecognized cell".
+ *
+ * Unrecognized cells are sent to relays and are forwarded
+ * onto the next hop of their circuits. Unrecognized cells
+ * are by definition not padding. We need to tell relay-side
+ * state machines that a non-padding cell was sent or received,
+ * depending on the direction, so they can update their histograms
+ * and decide to pad or not.
+ */
+void
+circpad_deliver_unrecognized_cell_events(circuit_t *circ,
+                                         cell_direction_t dir)
+{
+  // We should never see unrecognized cells at origin.
+  // Our caller emits a warn when this happens.
+  if (CIRCUIT_IS_ORIGIN(circ)) {
+    return;
+  }
+
+  if (dir == CELL_DIRECTION_OUT) {
+    /* When direction is out (away from origin), then we received non-padding
+       cell coming from the origin to us. */
+    circpad_cell_event_nonpadding_received(circ);
+  } else if (dir == CELL_DIRECTION_IN) {
+    /* It's in and not origin, so the cell is going away from us.
+     * So we are relaying a non-padding cell towards the origin. */
+    circpad_cell_event_nonpadding_sent(circ);
+  }
+}
+
+/**
+ * Deliver circpad events for "recognized" relay cells.
+ *
+ * Recognized cells are destined for this hop, either client or middle.
+ * Check if this is a padding cell or not, and send the appropiate
+ * received event.
+ */
+void
+circpad_deliver_recognized_relay_cell_events(circuit_t *circ,
+                                             uint8_t relay_command,
+                                             crypt_path_t *layer_hint)
+{
+  /* Padding negotiate cells are ignored by the state machines
+   * for simplicity. */
+  if (relay_command == RELAY_COMMAND_PADDING_NEGOTIATE ||
+      relay_command == RELAY_COMMAND_PADDING_NEGOTIATED) {
+    return;
+  }
+
+  if (relay_command == RELAY_COMMAND_DROP) {
+    rep_hist_padding_count_read(PADDING_TYPE_DROP);
+
+    if (CIRCUIT_IS_ORIGIN(circ)) {
+      if (circpad_padding_is_from_expected_hop(circ, layer_hint)) {
+        circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), 0);
+      } else {
+        /* This is unexpected padding. Ignore it for now. */
+        return;
+      }
+    }
+
+    /* The cell should be recognized by now, which means that we are on the
+       destination, which means that we received a padding cell. We might be
+       the client or the Middle node, still, because leaky-pipe. */
+    circpad_cell_event_padding_received(circ);
+    log_fn(LOG_INFO, LD_CIRC, "Got padding cell on %s circuit %u.",
+           CIRCUIT_IS_ORIGIN(circ) ? "origin" : "non-origin",
+           CIRCUIT_IS_ORIGIN(circ) ?
+             TO_ORIGIN_CIRCUIT(circ)->global_identifier : 0);
+  } else {
+    /* We received a non-padding cell on the edge */
+    circpad_cell_event_nonpadding_received(circ);
+  }
+}
+
+/**
+ * Deliver circpad events for relay cells sent from us.
+ *
+ * If this is a padding cell, update our padding stats
+ * and deliver the event. Otherwise just deliver the event.
+ */
+void
+circpad_deliver_sent_relay_cell_events(circuit_t *circ,
+                                       uint8_t relay_command)
+{
+  /* Padding negotiate cells are ignored by the state machines
+   * for simplicity. */
+  if (relay_command == RELAY_COMMAND_PADDING_NEGOTIATE ||
+      relay_command == RELAY_COMMAND_PADDING_NEGOTIATED) {
+    return;
+  }
+
+  /* RELAY_COMMAND_DROP is the multi-hop (aka circuit-level) padding cell in
+   * tor. (CELL_PADDING is a channel-level padding cell, which is not relayed
+   * or processed here) */
+  if (relay_command == RELAY_COMMAND_DROP) {
+    /* Optimization: The event for RELAY_COMMAND_DROP is sent directly
+     * from circpad_send_padding_cell_for_callback(). This is to avoid
+     * putting a cell_t and a relay_header_t on the stack repeatedly
+     * if we decide to send a long train of padidng cells back-to-back
+     * with 0 delay. So we do nothing here. */
+    return;
+  } else {
+    /* This is a non-padding cell sent from the client or from
+     * this node. */
+    circpad_cell_event_nonpadding_sent(circ);
+  }
+}
+
+/**
+ * Initialize the states array for a circpad machine.
+ */
+void
+circpad_machine_states_init(circpad_machine_spec_t *machine,
+                            circpad_statenum_t num_states)
+{
+  if (BUG(num_states > CIRCPAD_MAX_MACHINE_STATES)) {
+    num_states = CIRCPAD_MAX_MACHINE_STATES;
+  }
+
+  machine->num_states = num_states;
+  machine->states = tor_malloc_zero(sizeof(circpad_state_t)*num_states);
+
+  /* Initialize the default next state for all events to
+   * "ignore" -- if events aren't specified, they are ignored. */
+  for (circpad_statenum_t s = 0; s < num_states; s++) {
+    for (int e = 0; e < CIRCPAD_NUM_EVENTS; e++) {
+      machine->states[s].next_state[e] = CIRCPAD_STATE_IGNORE;
+    }
+  }
+}
+
+static void
+circpad_setup_machine_on_circ(circuit_t *on_circ,
+                              const circpad_machine_spec_t *machine)
+{
+  if (CIRCUIT_IS_ORIGIN(on_circ) && !machine->is_origin_side) {
+    log_fn(LOG_WARN, LD_BUG,
+           "Can't set up non-origin machine on origin circuit!");
+    return;
+  }
+
+  if (!CIRCUIT_IS_ORIGIN(on_circ) && machine->is_origin_side) {
+    log_fn(LOG_WARN, LD_BUG,
+           "Can't set up origin machine on non-origin circuit!");
+    return;
+  }
+
+  tor_assert_nonfatal(on_circ->padding_machine[machine->machine_index]
+                      == NULL);
+  tor_assert_nonfatal(on_circ->padding_info[machine->machine_index] == NULL);
+
+  on_circ->padding_info[machine->machine_index] =
+      circpad_circuit_machineinfo_new(on_circ, machine->machine_index);
+  on_circ->padding_machine[machine->machine_index] = machine;
+}
+
+/* These padding machines are only used for tests pending #28634. */
+#ifdef TOR_UNIT_TESTS
+static void
+circpad_circ_client_machine_init(void)
+{
+  circpad_machine_spec_t *circ_client_machine
+      = tor_malloc_zero(sizeof(circpad_machine_spec_t));
+
+  // XXX: Better conditions for merge.. Or disable this machine in
+  // merge?
+  circ_client_machine->conditions.min_hops = 2;
+  circ_client_machine->conditions.state_mask =
+      CIRCPAD_CIRC_BUILDING|CIRCPAD_CIRC_OPENED|CIRCPAD_CIRC_HAS_RELAY_EARLY;
+  circ_client_machine->conditions.purpose_mask = CIRCPAD_PURPOSE_ALL;
+
+  circ_client_machine->target_hopnum = 2;
+  circ_client_machine->is_origin_side = 1;
+
+  /* Start, gap, burst */
+  circpad_machine_states_init(circ_client_machine, 3);
+
+  circ_client_machine->states[CIRCPAD_STATE_START].
+      next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST;
+
+  circ_client_machine->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST;
+  circ_client_machine->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST;
+
+  /* If we are in burst state, and we send a non-padding cell, then we cancel
+     the timer for the next padding cell:
+     We dont want to send fake extends when actual extends are going on */
+  circ_client_machine->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_CANCEL;
+
+  circ_client_machine->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_BINS_EMPTY] = CIRCPAD_STATE_END;
+
+  circ_client_machine->states[CIRCPAD_STATE_BURST].token_removal =
+      CIRCPAD_TOKEN_REMOVAL_CLOSEST;
+
+  // FIXME: Tune this histogram
+  circ_client_machine->states[CIRCPAD_STATE_BURST].histogram_len = 2;
+  circ_client_machine->states[CIRCPAD_STATE_BURST].start_usec = 500;
+  circ_client_machine->states[CIRCPAD_STATE_BURST].range_usec = 1000000;
+  /* We have 5 tokens in the histogram, which means that all circuits will look
+   * like they have 7 hops (since we start this machine after the second hop,
+   * and tokens are decremented for any valid hops, and fake extends are
+   * used after that -- 2+5==7). */
+  circ_client_machine->states[CIRCPAD_STATE_BURST].histogram[0] = 5;
+  circ_client_machine->states[CIRCPAD_STATE_BURST].histogram_total_tokens = 5;
+
+  circ_client_machine->machine_num = smartlist_len(origin_padding_machines);
+  smartlist_add(origin_padding_machines, circ_client_machine);
+}
+
+static void
+circpad_circ_responder_machine_init(void)
+{
+  circpad_machine_spec_t *circ_responder_machine
+      = tor_malloc_zero(sizeof(circpad_machine_spec_t));
+
+  /* Shut down the machine after we've sent enough packets */
+  circ_responder_machine->should_negotiate_end = 1;
+
+  /* The relay-side doesn't care what hopnum it is, but for consistency,
+   * let's match the client */
+  circ_responder_machine->target_hopnum = 2;
+  circ_responder_machine->is_origin_side = 0;
+
+  /* Start, gap, burst */
+  circpad_machine_states_init(circ_responder_machine, 3);
+
+  /* This is the settings of the state machine. In the future we are gonna
+     serialize this into the consensus or the torrc */
+
+  /* We transition to the burst state on padding receive and on non-padding
+   * recieve */
+  circ_responder_machine->states[CIRCPAD_STATE_START].
+      next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST;
+  circ_responder_machine->states[CIRCPAD_STATE_START].
+      next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST;
+
+  /* Inside the burst state we _stay_ in the burst state when a non-padding
+   * is sent */
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_BURST;
+
+  /* Inside the burst state we transition to the gap state when we receive a
+   * padding cell */
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_GAP;
+
+  /* These describe the padding charasteristics when in burst state */
+
+  /* use_rtt_estimate tries to estimate how long padding cells take to go from
+     C->M, and uses that as what as the base of the histogram */
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].use_rtt_estimate = 1;
+  /* The histogram is 2 bins: an empty one, and infinity */
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].histogram_len = 2;
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].start_usec = 5000;
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].range_usec = 1000000;
+  /* During burst state we wait forever for padding to arrive.
+
+     We are waiting for a padding cell from the client to come in, so that we
+     respond, and we immitate how extend looks like */
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].histogram[0] = 0;
+  // Only infinity bin:
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].histogram[1] = 1;
+  circ_responder_machine->states[CIRCPAD_STATE_BURST].
+      histogram_total_tokens = 1;
+
+  /* From the gap state, we _stay_ in the gap state, when we receive padding
+   * or non padding */
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].
+      next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_GAP;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].
+      next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_GAP;
+
+  /* And from the gap state, we go to the end, when the bins are empty or a
+   * non-padding cell is sent */
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].
+      next_state[CIRCPAD_EVENT_BINS_EMPTY] = CIRCPAD_STATE_END;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].
+      next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_END;
+
+  // FIXME: Tune this histogram
+
+  /* The gap state is the delay you wait after you receive a padding cell
+     before you send a padding response */
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].use_rtt_estimate = 1;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_len = 6;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].start_usec = 5000;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].range_usec = 1000000;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[0] = 0;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[1] = 1;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[2] = 2;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[3] = 2;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram[4] = 1;
+  /* Total number of tokens */
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].histogram_total_tokens = 6;
+  circ_responder_machine->states[CIRCPAD_STATE_GAP].token_removal =
+      CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC;
+
+  circ_responder_machine->machine_num = smartlist_len(relay_padding_machines);
+  smartlist_add(relay_padding_machines, circ_responder_machine);
+}
+#endif
+
+/**
+ * Initialize all of our padding machines.
+ *
+ * This is called at startup. It sets up some global machines, and then
+ * loads some from torrc, and from the tor consensus.
+ */
+void
+circpad_machines_init(void)
+{
+  tor_assert_nonfatal(origin_padding_machines == NULL);
+  tor_assert_nonfatal(relay_padding_machines == NULL);
+
+  origin_padding_machines = smartlist_new();
+  relay_padding_machines = smartlist_new();
+
+  // TODO: Parse machines from consensus and torrc
+#ifdef TOR_UNIT_TESTS
+  circpad_circ_client_machine_init();
+  circpad_circ_responder_machine_init();
+#endif
+}
+
+/**
+ * Free our padding machines
+ */
+void
+circpad_machines_free(void)
+{
+  if (origin_padding_machines) {
+    SMARTLIST_FOREACH(origin_padding_machines,
+                      circpad_machine_spec_t *,
+                      m, tor_free(m->states); tor_free(m));
+    smartlist_free(origin_padding_machines);
+  }
+
+  if (relay_padding_machines) {
+    SMARTLIST_FOREACH(relay_padding_machines,
+                      circpad_machine_spec_t *,
+                      m, tor_free(m->states); tor_free(m));
+    smartlist_free(relay_padding_machines);
+  }
+}
+
+/**
+ * Check the Protover info to see if a node supports padding.
+ */
+static bool
+circpad_node_supports_padding(const node_t *node)
+{
+  if (node->rs) {
+    log_fn(LOG_INFO, LD_CIRC, "Checking padding: %s",
+           node->rs->pv.supports_padding ? "supported" : "unsupported");
+    return node->rs->pv.supports_padding;
+  }
+
+  log_fn(LOG_INFO, LD_CIRC, "Empty routerstatus in padding check");
+  return 0;
+}
+
+/**
+ * Get a node_t for the nth hop in our circuit, starting from 1.
+ *
+ * Returns node_t from the consensus for that hop, if it is opened.
+ * Otherwise returns NULL.
+ */
+static const node_t *
+circuit_get_nth_node(origin_circuit_t *circ, int hop)
+{
+  crypt_path_t *iter = circuit_get_cpath_hop(circ, hop);
+
+  if (!iter || iter->state != CPATH_STATE_OPEN)
+    return NULL;
+
+  return node_get_by_id(iter->extend_info->identity_digest);
+}
+
+/**
+ * Return true if a particular circuit supports padding
+ * at the desired hop.
+ */
+static bool
+circpad_circuit_supports_padding(origin_circuit_t *circ,
+                                 int target_hopnum)
+{
+  const node_t *hop;
+
+  if (!(hop = circuit_get_nth_node(circ, target_hopnum))) {
+    return 0;
+  }
+
+  return circpad_node_supports_padding(hop);
+}
+
+/**
+ * Try to negotiate padding.
+ *
+ * Returns -1 on error, 0 on success.
+ */
+signed_error_t
+circpad_negotiate_padding(origin_circuit_t *circ,
+                          circpad_machine_num_t machine,
+                          uint8_t target_hopnum,
+                          uint8_t command)
+{
+  circpad_negotiate_t type;
+  cell_t cell;
+  ssize_t len;
+
+  /* Check that the target hop lists support for padding in
+   * its ProtoVer fields */
+  if (!circpad_circuit_supports_padding(circ, target_hopnum)) {
+    return -1;
+  }
+
+  memset(&cell, 0, sizeof(cell_t));
+  memset(&type, 0, sizeof(circpad_negotiate_t));
+  // This gets reset to RELAY_EARLY appropriately by
+  // relay_send_command_from_edge_. At least, it looks that way.
+  // QQQ-MP-AP: Verify that.
+  cell.command = CELL_RELAY;
+
+  circpad_negotiate_set_command(&type, command);
+  circpad_negotiate_set_version(&type, 0);
+  circpad_negotiate_set_machine_type(&type, machine);
+
+  if ((len = circpad_negotiate_encode(cell.payload, CELL_PAYLOAD_SIZE,
+        &type)) < 0)
+    return -1;
+
+  log_fn(LOG_INFO,LD_CIRC, "Negotiating padding on circuit %u",
+         circ->global_identifier);
+
+  return circpad_send_command_to_hop(circ, target_hopnum,
+                                     RELAY_COMMAND_PADDING_NEGOTIATE,
+                                     cell.payload, len);
+}
+
+/**
+ * Try to negotiate padding.
+ *
+ * Returns 1 if successful (or already set up), 0 otherwise.
+ */
+bool
+circpad_padding_negotiated(circuit_t *circ,
+                           circpad_machine_num_t machine,
+                           uint8_t command,
+                           uint8_t response)
+{
+  circpad_negotiated_t type;
+  cell_t cell;
+  ssize_t len;
+
+  memset(&cell, 0, sizeof(cell_t));
+  memset(&type, 0, sizeof(circpad_negotiated_t));
+  // This gets reset to RELAY_EARLY appropriately by
+  // relay_send_command_from_edge_. At least, it looks that way.
+  // QQQ-MP-AP: Verify that.
+  cell.command = CELL_RELAY;
+
+  circpad_negotiated_set_command(&type, command);
+  circpad_negotiated_set_response(&type, response);
+  circpad_negotiated_set_version(&type, 0);
+  circpad_negotiated_set_machine_type(&type, machine);
+
+  if ((len = circpad_negotiated_encode(cell.payload, CELL_PAYLOAD_SIZE,
+        &type)) < 0)
+    return 0;
+
+  /* Use relay_send because we're from the middle to the origin. We don't
+   * need to specify a target hop or layer_hint. */
+  return relay_send_command_from_edge(0, circ,
+                                      RELAY_COMMAND_PADDING_NEGOTIATED,
+                                      (void*)cell.payload,
+                                      (size_t)len, NULL) == 0;
+}
+
+/**
+ * Parse and react to a padding_negotiate cell.
+ *
+ * This is called at the middle node upon receipt of the client's choice of
+ * state machine, so that it can use the requested state machine index, if
+ * it is available.
+ *
+ * Returns -1 on error, 0 on success.
+ */
+signed_error_t
+circpad_handle_padding_negotiate(circuit_t *circ, cell_t *cell)
+{
+  int retval = 0;
+  circpad_negotiate_t *negotiate;
+
+  if (CIRCUIT_IS_ORIGIN(circ)) {
+    log_fn(LOG_WARN, LD_PROTOCOL,
+           "Padding negotiate cell unsupported at origin.");
+    return -1;
+  }
+
+  if (circpad_negotiate_parse(&negotiate, cell->payload+RELAY_HEADER_SIZE,
+                               CELL_PAYLOAD_SIZE-RELAY_HEADER_SIZE) < 0) {
+    log_fn(LOG_WARN, LD_CIRC,
+          "Received malformed PADDING_NEGOTIATE cell; dropping.");
+    return -1;
+  }
+
+  if (negotiate->command == CIRCPAD_COMMAND_STOP) {
+    /* Free the machine corresponding to this machine type */
+    free_circ_machineinfos_with_machine_num(circ, negotiate->machine_type);
+    log_fn(LOG_WARN, LD_CIRC,
+          "Received circuit padding stop command for unknown machine.");
+    goto err;
+  } else if (negotiate->command == CIRCPAD_COMMAND_START) {
+    SMARTLIST_FOREACH_BEGIN(relay_padding_machines,
+                            const circpad_machine_spec_t *, m) {
+      if (m->machine_num == negotiate->machine_type) {
+        circpad_setup_machine_on_circ(circ, m);
+        goto done;
+      }
+    } SMARTLIST_FOREACH_END(m);
+  }
+
+  err:
+    retval = -1;
+
+  done:
+    circpad_padding_negotiated(circ, negotiate->machine_type,
+                   negotiate->command,
+                   (retval == 0) ? CIRCPAD_RESPONSE_OK : CIRCPAD_RESPONSE_ERR);
+    circpad_negotiate_free(negotiate);
+
+    return retval;
+}
+
+/**
+ * Parse and react to a padding_negotiated cell.
+ *
+ * This is called at the origin upon receipt of the middle's response
+ * to our choice of state machine.
+ *
+ * Returns -1 on error, 0 on success.
+ */
+signed_error_t
+circpad_handle_padding_negotiated(circuit_t *circ, cell_t *cell,
+                                  crypt_path_t *layer_hint)
+{
+  circpad_negotiated_t *negotiated;
+
+  if (!CIRCUIT_IS_ORIGIN(circ)) {
+    log_fn(LOG_WARN, LD_PROTOCOL,
+           "Padding negotiated cell unsupported at non-origin.");
+    return -1;
+  }
+
+  /* Verify this came from the expected hop */
+  if (!circpad_padding_is_from_expected_hop(circ, layer_hint)) {
+    log_fn(LOG_WARN, LD_PROTOCOL,
+           "Padding negotiated cell from wrong hop!");
+    return -1;
+  }
+
+  if (circpad_negotiated_parse(&negotiated, cell->payload+RELAY_HEADER_SIZE,
+                               CELL_PAYLOAD_SIZE-RELAY_HEADER_SIZE) < 0) {
+    log_fn(LOG_WARN, LD_CIRC,
+          "Received malformed PADDING_NEGOTIATED cell; "
+          "dropping.");
+    return -1;
+  }
+
+  if (negotiated->command == CIRCPAD_COMMAND_STOP) {
+    /* There may not be a padding_info here if we shut down the
+     * machine in circpad_shutdown_old_machines(). Or, if
+     * circpad_add_matching_matchines() added a new machine,
+     * there may be a padding_machine for a different machine num
+     * than this response. */
+    free_circ_machineinfos_with_machine_num(circ, negotiated->machine_type);
+  } else if (negotiated->command == CIRCPAD_COMMAND_START &&
+             negotiated->response == CIRCPAD_RESPONSE_ERR) {
+    // This can happen due to consensus drift.. free the machines
+    // and be sad
+    free_circ_machineinfos_with_machine_num(circ, negotiated->machine_type);
+    TO_ORIGIN_CIRCUIT(circ)->padding_negotiation_failed = 1;
+    log_fn(LOG_INFO, LD_CIRC,
+           "Middle node did not accept our padding request.");
+  }
+
+  circpad_negotiated_free(negotiated);
+  return 0;
+}
+
+/* Serialization */
+// TODO: Should we use keyword=value here? Are there helpers for that?
+#if 0
+static void
+circpad_state_serialize(const circpad_state_t *state,
+                        smartlist_t *chunks)
+{
+  smartlist_add_asprintf(chunks, " %u", state->histogram[0]);
+  for (int i = 1; i < state->histogram_len; i++) {
+    smartlist_add_asprintf(chunks, ",%u",
+                           state->histogram[i]);
+  }
+
+  smartlist_add_asprintf(chunks, " 0x%x",
+                         state->transition_cancel_events);
+
+  for (int i = 0; i < CIRCPAD_NUM_STATES; i++) {
+    smartlist_add_asprintf(chunks, ",0x%x",
+                           state->transition_events[i]);
+  }
+
+  smartlist_add_asprintf(chunks, " %u %u",
+                         state->use_rtt_estimate,
+                         state->token_removal);
+}
+
+char *
+circpad_machine_spec_to_string(const circpad_machine_spec_t *machine)
+{
+  smartlist_t *chunks = smartlist_new();
+  char *out;
+  (void)machine;
+
+  circpad_state_serialize(&machine->start, chunks);
+  circpad_state_serialize(&machine->gap, chunks);
+  circpad_state_serialize(&machine->burst, chunks);
+
+  out = smartlist_join_strings(chunks, "", 0, NULL);
+
+  SMARTLIST_FOREACH(chunks, char *, cp, tor_free(cp));
+  smartlist_free(chunks);
+  return out;
+}
+
+// XXX: Writeme
+const circpad_machine_spec_t *
+circpad_string_to_machine(const char *str)
+{
+  (void)str;
+  return NULL;
+}
+
+#endif

+ 696 - 0
src/core/or/circuitpadding.h

@@ -0,0 +1,696 @@
+/*
+ * Copyright (c) 2017, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file circuitpadding.h
+ * \brief Header file for circuitpadding.c.
+ **/
+
+#ifndef TOR_CIRCUITPADDING_H
+#define TOR_CIRCUITPADDING_H
+
+#include "src/trunnel/circpad_negotiation.h"
+#include "lib/evloop/timers.h"
+
+struct circuit_t;
+struct origin_circuit_t;
+struct cell_t;
+
+/**
+ * Signed error return with the specific property that negative
+ * values mean error codes of various semantics, 0 means success,
+ * and positive values are unused.
+ *
+ * XXX: Tor uses this concept a lot but just calls it int. Should we move
+ * this somewhere centralized? Where?
+ */
+typedef int signed_error_t;
+
+/**
+ * These constants specify the types of events that can cause
+ * transitions between state machine states.
+ *
+ * Note that SENT and RECV are relative to this endpoint. For
+ * relays, SENT means packets destined towards the client and
+ * RECV means packets destined towards the relay. On the client,
+ * SENT means packets destined towards the relay, where as RECV
+ * means packets destined towards the client.
+ */
+typedef enum {
+  /* A non-padding cell was received. */
+  CIRCPAD_EVENT_NONPADDING_RECV = 0,
+  /* A non-padding cell was sent. */
+  CIRCPAD_EVENT_NONPADDING_SENT = 1,
+  /* A padding cell (RELAY_COMMAND_DROP) was sent. */
+  CIRCPAD_EVENT_PADDING_SENT = 2,
+  /* A padding cell was received. */
+  CIRCPAD_EVENT_PADDING_RECV = 3,
+  /* We tried to schedule padding but we ended up picking the infinity bin
+   * which means that padding was delayed infinitely */
+  CIRCPAD_EVENT_INFINITY = 4,
+  /* All histogram bins are empty (we are out of tokens) */
+  CIRCPAD_EVENT_BINS_EMPTY = 5,
+  /* just a counter of the events above */
+  CIRCPAD_EVENT_LENGTH_COUNT = 6
+} circpad_event_t;
+#define CIRCPAD_NUM_EVENTS ((int)CIRCPAD_EVENT_LENGTH_COUNT+1)
+
+/** Boolean type that says if we decided to transition states or not */
+typedef enum {
+  CIRCPAD_STATE_UNCHANGED = 0,
+  CIRCPAD_STATE_CHANGED = 1
+} circpad_decision_t;
+
+/** The type for the things in histogram bins (aka tokens) */
+typedef uint32_t circpad_hist_token_t;
+
+/** The type for histogram indexes (needs to be negative for errors) */
+typedef int8_t circpad_hist_index_t;
+
+/** The type for absolute time, from monotime_absolute_usec() */
+typedef uint64_t circpad_time_t;
+
+/** The type for timer delays, in microseconds */
+typedef uint32_t circpad_delay_t;
+
+/**
+ * An infinite padding cell delay means don't schedule any padding --
+ * simply wait until a different event triggers a transition.
+ *
+ * This means that the maximum delay we can scedule is UINT32_MAX-1
+ * microseconds, or about 4300 seconds (1.25 hours).
+ * XXX: Is this enough if we want to simulate light, intermittent
+ * activity on an onion service?
+ */
+#define CIRCPAD_DELAY_INFINITE  (UINT32_MAX)
+
+/**
+ * Macro to clarify when we're checking the infinity bin.
+ *
+ * Works with either circpad_state_t or circpad_machine_state_t
+ */
+#define CIRCPAD_INFINITY_BIN(mi)  ((mi)->histogram_len-1)
+
+/**
+ * These constants form a bitfield that specifies when a state machine
+ * should be applied to a circuit.
+ *
+ * If any of these elements is set, then the circuit will be tested against
+ * that specific condition. If an element is unset, then we don't test it.
+ * (E.g. If neither NO_STREAMS or STREAMS are set, then we will not care
+ * whether a circuit has streams attached when we apply a state machine)
+ *
+ * The helper function circpad_circuit_state() converts circuit state
+ * flags into this more compact representation.
+ */
+typedef enum {
+  /* Only apply machine if the circuit is still building */
+  CIRCPAD_CIRC_BUILDING = 1<<0,
+  /* Only apply machine if the circuit is open */
+  CIRCPAD_CIRC_OPENED = 1<<1,
+  /* Only apply machine if the circuit has no attached streams */
+  CIRCPAD_CIRC_NO_STREAMS = 1<<2,
+  /* Only apply machine if the circuit has attached streams */
+  CIRCPAD_CIRC_STREAMS = 1<<3,
+  /* Only apply machine if the circuit still allows RELAY_EARLY cells */
+  CIRCPAD_CIRC_HAS_RELAY_EARLY = 1<<4,
+  /* Only apply machine if the circuit has depleted its RELAY_EARLY cells
+   * allowance. */
+  CIRCPAD_CIRC_HAS_NO_RELAY_EARLY = 1<<5
+} circpad_circuit_state_t;
+
+/** Bitmask that says "apply this machine to all states" */
+#define CIRCPAD_STATE_ALL   \
+    (CIRCPAD_CIRC_BUILDING|CIRCPAD_CIRC_OPENED| \
+     CIRCPAD_CIRC_STREAMS|CIRCPAD_CIRC_NO_STREAMS| \
+     CIRCPAD_CIRC_HAS_RELAY_EARLY|CIRCPAD_CIRC_HAS_NO_RELAY_EARLY)
+
+/**
+ * A compact circuit purpose bitfield mask that allows us to compactly
+ * specify which circuit purposes a machine should apply to.
+ *
+ * The helper function circpad_circ_purpose_to_mask() converts circuit
+ * purposes into bit positions in this bitmask.
+ */
+typedef uint32_t circpad_purpose_mask_t;
+
+/** Bitmask that says "apply this machine to all purposes". */
+#define CIRCPAD_PURPOSE_ALL (0xFFFFFFFF)
+
+/**
+ * This type specifies all of the conditions that must be met before
+ * a client decides to initiate padding on a circuit.
+ *
+ * A circuit must satisfy every sub-field in this type in order
+ * to be considered to match the conditions.
+ */
+typedef struct circpad_machine_conditions_t {
+  /** Only apply the machine *if* the circuit has at least this many hops */
+  unsigned min_hops : 3;
+
+  /** Only apply the machine *if* vanguards are enabled */
+  unsigned requires_vanguards : 1;
+
+  /** Only apply the machine *if* the circuit's state matches any of
+   *  the bits set in this bitmask. */
+  circpad_circuit_state_t state_mask;
+
+  /** Only apply a machine *if* the circuit's purpose matches one
+   *  of the bits set in this bitmask */
+  circpad_purpose_mask_t purpose_mask;
+
+} circpad_machine_conditions_t;
+
+/**
+ * Token removal strategy options.
+ *
+ * The WTF-PAD histograms are meant to specify a target distribution to shape
+ * traffic towards. This is accomplished by removing tokens from the histogram
+ * when either padding or non-padding cells are sent.
+ *
+ * When we see a non-padding cell at a particular time since the last cell, you
+ * remove a token from the corresponding delay bin. These flags specify
+ * which bin to choose if that bin is already empty.
+ */
+typedef enum {
+  /** Don't remove any tokens */
+  CIRCPAD_TOKEN_REMOVAL_NONE = 0,
+  /**
+   * Remove from the first non-zero higher bin index when current is zero.
+   * This is the recommended strategy from the Adaptive Padding paper. */
+  CIRCPAD_TOKEN_REMOVAL_HIGHER = 1,
+  /** Remove from the first non-zero lower bin index when current is empty. */
+  CIRCPAD_TOKEN_REMOVAL_LOWER = 2,
+  /** Remove from the closest non-zero bin index when current is empty. */
+  CIRCPAD_TOKEN_REMOVAL_CLOSEST = 3,
+  /** Remove from the closest bin by time value (since bins are
+   *  exponentially spaced). */
+  CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC = 4,
+  /** Only remove from the exact bin corresponding to this delay. If
+   *  the bin is 0, simply do nothing. Don't pick another bin. */
+  CIRCPAD_TOKEN_REMOVAL_EXACT = 5
+} circpad_removal_t;
+
+/**
+ * Distribution types supported by circpad_distribution_sample().
+ *
+ * These can be used instead of histograms for the inter-packet
+ * timing distribution, or to specify a distribution on the number
+ * of cells that can be sent while in a specific state of the state
+ * machine. */
+typedef enum {
+  CIRCPAD_DIST_NONE = 0,
+  CIRCPAD_DIST_UNIFORM = 1,
+  CIRCPAD_DIST_LOGISTIC = 2,
+  CIRCPAD_DIST_LOG_LOGISTIC = 3,
+  CIRCPAD_DIST_GEOMETRIC = 4,
+  CIRCPAD_DIST_WEIBULL = 5,
+  CIRCPAD_DIST_PARETO = 6
+} circpad_distribution_type_t;
+
+/**
+ * Distribution information.
+ *
+ * This type specifies a specific distribution above, as well as
+ * up to two parameters for that distribution. The specific
+ * per-distribution meaning of these parameters is specified
+ * in circpad_distribution_sample().
+ */
+typedef struct circpad_distribution_t {
+  circpad_distribution_type_t type;
+  double param1;
+  double param2;
+} circpad_distribution_t;
+
+/** State number type. Represents current state of state machine. */
+typedef uint16_t circpad_statenum_t;
+#define  CIRCPAD_STATENUM_MAX   (UINT16_MAX)
+
+/** A histogram is used to sample padding delays given a machine state.  This
+ *  constant defines the maximum histogram width (i.e. the max number of bins)
+ *
+ *  Each histogram bin is twice as large as the previous. Two exceptions: The
+ *  first bin has zero width (which means that minimum delay is applied to the
+ *  next padding cell), and the last bin (infinity bin) has infinite width
+ *  (which means that the next padding cell will be delayed infinitely). */
+#define CIRCPAD_MAX_HISTOGRAM_LEN (sizeof(circpad_delay_t)*8 + 1)
+
+/**
+ * A state of a padding state machine. The information here are immutable and
+ * represent the initial form of the state; it does not get updated as things
+ * happen. The mutable information that gets updated in runtime are carried in
+ * a circpad_machine_state_t.
+ *
+ * This struct describes the histograms and parameters of a single
+ * state in the adaptive padding machine. Instances of this struct
+ * exist in global circpad machine definitions that come from torrc
+ * or the consensus.
+ */
+typedef struct circpad_state_t {
+  /** If a histogram is used for this state, this specifies the number of bins
+   *  of this histogram. Histograms must have at least 2 bins.
+   *
+   *  If a delay probability distribution is used for this state, this is set
+   *  to 0. */
+  circpad_hist_index_t histogram_len;
+  /** The histogram itself: an array of uint16s of tokens, whose
+   *  widths are exponentially spaced, in microseconds */
+  circpad_hist_token_t histogram[CIRCPAD_MAX_HISTOGRAM_LEN];
+  /** Total number of tokens in this histogram. This is a constant and is *not*
+   *  decremented every time we spend a token. It's used for initializing and
+   *  refilling the histogram. */
+  uint32_t histogram_total_tokens;
+
+  /** Minimum padding delay of this state in microseconds.
+   *
+   *  If histograms are used, this is the left (and right) bound of the first
+   *  bin (since it has zero width).
+   *
+   *  If a delay probability distribution is used, this represents the minimum
+   *  delay we can sample from the distribution.
+   */
+  circpad_delay_t start_usec;
+
+  /** If histograms are used, this is the width of the whole histogram in
+   *  microseconds, and it's used to calculate individual bin width.
+   *
+   *  If a delay probability distribution is used, this is used as the max
+   *  delay we can sample from the distribution.
+   */
+  circpad_delay_t range_usec;
+
+  /**
+   * Represents a delay probability distribution (aka IAT distribution). It's a
+   * parametrized way of encoding inter-packet delay information in
+   * microseconds. It can be used instead of histograms.
+   *
+   * If it is used, token_removal below must be set to
+   * CIRCPAD_TOKEN_REMOVAL_NONE.
+   *
+   * Start_usec, range_sec, and rtt_estimates are still applied to the
+   * results of sampling from this distribution (range_sec is used as a max).
+   */
+  circpad_distribution_t iat_dist;
+
+  /**
+   * The length dist is a parameterized way of encoding how long this
+   * state machine runs in terms of sent padding cells or all
+   * sent cells. Values are sampled from this distribution, clamped
+   * to max_len, and then start_len is added to that value.
+   *
+   * It may be specified instead of or in addition to
+   * the infinity bins and bins empty conditions. */
+  circpad_distribution_t length_dist;
+  /** A minimum length value, added to the output of length_dist */
+  uint16_t start_length;
+  /** A cap on the length value that can be sampled from the length_dist */
+  uint64_t max_length;
+
+  /** Should we decrement length when we see a nonpadding packet?
+   * XXX: Are there any machines that actually want to set this to 0? There may
+   * not be. OTOH, it's only a bit.. */
+  unsigned length_includes_nonpadding : 1;
+
+  /**
+   * This is an array that specifies the next state to transition to upon
+   * receipt an event matching the indicated array index.
+   *
+   * This aborts our scheduled packet and switches to the state
+   * corresponding to the index of the array. Tokens are filled upon
+   * this transition.
+   *
+   * States are allowed to transition to themselves, which means re-schedule
+   * a new padding timer. They are also allowed to temporarily "transition"
+   * to the "IGNORE" and "CANCEL" pseudo-states. See #defines below
+   * for details on state behavior and meaning.
+   */
+  circpad_statenum_t next_state[CIRCPAD_NUM_EVENTS];
+
+  /**
+   * If true, estimate the RTT from this relay to the exit/website and add that
+   * to start_usec for use as the histogram bin 0 start delay.
+   *
+   * Right now this is only supported for relay-side state machines.
+   */
+  unsigned use_rtt_estimate : 1;
+
+  /** This specifies the token removal strategy to use upon padding and
+   *  non-padding activity. */
+  circpad_removal_t token_removal;
+} circpad_state_t;
+
+/**
+ * The start state for this machine.
+ *
+ * In the original WTF-PAD, this is only used for transition to/from
+ * the burst state. All other fields are not used. But to simplify the
+ * code we've made it a first-class state. This has no performance
+ * consequences, but may make naive serialization of the state machine
+ * large, if we're not careful about how we represent empty fields.
+ */
+#define  CIRCPAD_STATE_START       0
+
+/**
+ * The burst state for this machine.
+ *
+ * In the original Adaptive Padding algorithm and in WTF-PAD
+ * (https://www.freehaven.net/anonbib/cache/ShWa-Timing06.pdf and
+ * https://www.cs.kau.se/pulls/hot/thebasketcase-wtfpad/), the burst
+ * state serves to detect bursts in traffic. This is done by using longer
+ * delays in its histogram, which represent the expected delays between
+ * bursts of packets in the target stream. If this delay expires without a
+ * real packet being sent, the burst state sends a padding packet and then
+ * immediately transitions to the gap state, which is used to generate
+ * a synthetic padding packet train. In this implementation, this transition
+ * needs to be explicitly specified in the burst state's transition events.
+ *
+ * Because of this flexibility, other padding mechanisms can transition
+ * between these two states arbitrarily, to encode other dynamics of
+ * target traffic.
+ */
+#define  CIRCPAD_STATE_BURST       1
+
+/**
+ * The gap state for this machine.
+ *
+ * In the original Adaptive Padding algorithm and in WTF-PAD, the gap
+ * state serves to simulate an artificial packet train composed of padding
+ * packets. It does this by specifying much lower inter-packet delays than
+ * the burst state, and transitioning back to itself after padding is sent
+ * if these timers expire before real traffic is sent. If real traffic is
+ * sent, it transitions back to the burst state.
+ *
+ * Again, in this implementation, these transitions must be specified
+ * explicitly, and other transitions are also permitted.
+ */
+#define  CIRCPAD_STATE_GAP         2
+
+/**
+ * End is a pseudo-state that causes the machine to go completely
+ * idle, and optionally get torn down (depending on the
+ * value of circpad_machine_spec_t.should_negotiate_end)
+ *
+ * End MUST NOT occupy a slot in the machine state array.
+ */
+#define  CIRCPAD_STATE_END         CIRCPAD_STATENUM_MAX
+
+/**
+ * "Ignore" is a pseudo-state that means "do not react to this
+ * event".
+ *
+ * "Ignore" MUST NOT occupy a slot in the machine state array.
+ */
+#define  CIRCPAD_STATE_IGNORE         (CIRCPAD_STATENUM_MAX-1)
+
+/**
+ * "Cancel" is a pseudo-state that means "cancel pending timers,
+ * but remain in your current state".
+ *
+ * Cancel MUST NOT occupy a slot in the machine state array.
+ */
+#define  CIRCPAD_STATE_CANCEL         (CIRCPAD_STATENUM_MAX-2)
+
+/**
+ * Since we have 3 pseudo-states, the max state array length is
+ * up to one less than cancel's statenum.
+ */
+#define CIRCPAD_MAX_MACHINE_STATES  (CIRCPAD_STATE_CANCEL-1)
+
+/**
+ * Mutable padding machine info.
+ *
+ * This structure contains mutable information about a padding
+ * machine. The mutable information must be kept separate because
+ * it exists per-circuit, where as the machines themselves are global.
+ * This separation is done to conserve space in the circuit structure.
+ *
+ * This is the per-circuit state that changes regarding the global state
+ * machine. Some parts of it are optional (ie NULL).
+ *
+ * XXX: Play with layout to minimize space on x64 Linux (most common relay).
+ */
+typedef struct circpad_machine_state_t {
+  /** The callback pointer for the padding callbacks.
+   *
+   *  These timers stick around the machineinfo until the machineinfo's circuit
+   *  is closed, at which point the timer is cancelled. For this reason it's
+   *  safe to assume that the machineinfo exists if this timer gets
+   *  triggered. */
+  tor_timer_t *padding_timer;
+
+  /** The circuit for this machine */
+  struct circuit_t *on_circ;
+
+  /** A mutable copy of the histogram for the current state.
+   *  NULL if remove_tokens is false for that state */
+  circpad_hist_token_t *histogram;
+  /** Length of the above histogram.
+   * XXX: This field *could* be removed at the expense of added
+   * complexity+overhead for reaching back into the immutable machine
+   * state every time we need to inspect the histogram. It's only a byte,
+   * though, so it seemed worth it.
+   */
+  circpad_hist_index_t histogram_len;
+  /** Remove token from this index upon sending padding */
+  circpad_hist_index_t chosen_bin;
+
+  /** Stop padding/transition if this many cells sent */
+  uint64_t state_length;
+#define CIRCPAD_STATE_LENGTH_INFINITE UINT64_MAX
+
+  /** A scaled count of padding packets sent, used to limit padding overhead.
+   * When this reaches UINT16_MAX, we cut it and nonpadding_sent in half. */
+  uint16_t padding_sent;
+  /** A scaled count of non-padding packets sent, used to limit padding
+   *  overhead. When this reaches UINT16_MAX, we cut it and padding_sent in
+   *  half. */
+  uint16_t nonpadding_sent;
+
+  /**
+   * EWMA estimate of the RTT of the circuit from this hop
+   * to the exit end, in microseconds. */
+  circpad_delay_t rtt_estimate_usec;
+
+  /**
+   * The last time we got an event relevant to estimating
+   * the RTT. Monotonic time in microseconds since system
+   * start.
+   */
+  circpad_time_t last_received_time_usec;
+
+  /**
+   * The time at which we scheduled a non-padding packet,
+   * or selected an infinite delay.
+   *
+   * Monotonic time in microseconds since system start.
+   * This is 0 if we haven't chosen a padding delay.
+   */
+  circpad_time_t padding_scheduled_at_usec;
+
+  /** What state is this machine in? */
+  circpad_statenum_t current_state;
+
+  /**
+   * True if we have scheduled a timer for padding.
+   *
+   * This is 1 if a timer is pending. It is 0 if
+   * no timer is scheduled. (It can be 0 even when
+   * padding_was_scheduled_at_usec is non-zero).
+   */
+  unsigned is_padding_timer_scheduled : 1;
+
+  /**
+   * If this is true, we have seen full duplex behavior.
+   * Stop updating the RTT.
+   */
+  unsigned stop_rtt_update : 1;
+
+/** Max number of padding machines on each circuit. If changed,
+ * also ensure the machine_index bitwith supports the new size. */
+#define CIRCPAD_MAX_MACHINES    (2)
+  /** Which padding machine index was this for.
+   * (make sure changes to the bitwidth can support the
+   * CIRCPAD_MAX_MACHINES define). */
+  unsigned machine_index : 1;
+
+} circpad_machine_state_t;
+
+/** Helper macro to get an actual state machine from a machineinfo */
+#define CIRCPAD_GET_MACHINE(machineinfo) \
+    ((machineinfo)->on_circ->padding_machine[(machineinfo)->machine_index])
+
+/**
+ * This specifies a particular padding machine to use after negotiation.
+ *
+ * The constants for machine_num_t are in trunnel.
+ * We want to be able to define extra numbers in the consensus/torrc, though.
+ */
+typedef uint8_t circpad_machine_num_t;
+
+/** Global state machine structure from the consensus */
+typedef struct circpad_machine_spec_t {
+  /** Global machine number */
+  circpad_machine_num_t machine_num;
+
+  /** Which machine index slot should this machine go into in
+   *  the array on the circuit_t */
+  unsigned machine_index : 1;
+
+  /** Send a padding negotiate to shut down machine at end state? */
+  unsigned should_negotiate_end : 1;
+
+  // These next three fields are origin machine-only...
+  /** Origin side or relay side */
+  unsigned is_origin_side : 1;
+
+  /** Which hop in the circuit should we send padding to/from?
+   *  1-indexed (ie: hop #1 is guard, #2 middle, #3 exit). */
+  unsigned target_hopnum : 3;
+
+  /** This machine only kills fascists if the following conditions are met. */
+  circpad_machine_conditions_t conditions;
+
+  /** How many padding cells can be sent before we apply overhead limits?
+   * XXX: Note that we can only allow up to 64k of padding cells on an
+   * otherwise quiet circuit. Is this enough? It's 33MB. */
+  uint16_t allowed_padding_count;
+
+  /** Padding percent cap: Stop padding if we exceed this percent overhead.
+   * 0 means no limit. Overhead is defined as percent of total traffic, so
+   * that we can use 0..100 here. This is the same definition as used in
+   * Prop#265. */
+  uint8_t max_padding_percent;
+
+  /** State array: indexed by circpad_statenum_t */
+  circpad_state_t *states;
+
+  /**
+   * Number of states this machine has (ie: length of the states array).
+   * XXX: This field is not needed other than for safety. */
+  circpad_statenum_t num_states;
+} circpad_machine_spec_t;
+
+void circpad_new_consensus_params(const networkstatus_t *ns);
+
+/**
+ * The following are event call-in points that are of interest to
+ * the state machines. They are called during cell processing. */
+void circpad_deliver_unrecognized_cell_events(struct circuit_t *circ,
+                                              cell_direction_t dir);
+void circpad_deliver_sent_relay_cell_events(struct circuit_t *circ,
+                                            uint8_t relay_command);
+void circpad_deliver_recognized_relay_cell_events(struct circuit_t *circ,
+                                                  uint8_t relay_command,
+                                                  crypt_path_t *layer_hint);
+
+/** Cell events are delivered by the above delivery functions */
+void circpad_cell_event_nonpadding_sent(struct circuit_t *on_circ);
+void circpad_cell_event_nonpadding_received(struct circuit_t *on_circ);
+void circpad_cell_event_padding_sent(struct circuit_t *on_circ);
+void circpad_cell_event_padding_received(struct circuit_t *on_circ);
+
+/** Internal events are events the machines send to themselves */
+circpad_decision_t
+circpad_internal_event_infinity(circpad_machine_state_t *mi);
+circpad_decision_t
+circpad_internal_event_bins_empty(circpad_machine_state_t *);
+circpad_decision_t circpad_internal_event_state_length_up(
+                                  circpad_machine_state_t *);
+
+/** Machine creation events are events that cause us to set up or
+ *  tear down padding state machines. */
+void circpad_machine_event_circ_added_hop(struct origin_circuit_t *on_circ);
+void circpad_machine_event_circ_built(struct origin_circuit_t *circ);
+void circpad_machine_event_circ_purpose_changed(struct origin_circuit_t *circ);
+void circpad_machine_event_circ_has_streams(struct origin_circuit_t *circ);
+void circpad_machine_event_circ_has_no_streams(struct origin_circuit_t *circ);
+void
+circpad_machine_event_circ_has_no_relay_early(struct origin_circuit_t *circ);
+
+void circpad_machines_init(void);
+void circpad_machines_free(void);
+
+void circpad_machine_states_init(circpad_machine_spec_t *machine,
+                                 circpad_statenum_t num_states);
+
+void circpad_circuit_free_all_machineinfos(struct circuit_t *circ);
+
+bool circpad_padding_is_from_expected_hop(struct circuit_t *circ,
+                                         crypt_path_t *from_hop);
+
+/** Serializaton functions for writing to/from torrc and consensus */
+char *circpad_machine_spec_to_string(const circpad_machine_spec_t *machine);
+const circpad_machine_spec_t *circpad_string_to_machine(const char *str);
+
+/* Padding negotiation between client and middle */
+signed_error_t circpad_handle_padding_negotiate(struct circuit_t *circ,
+                                      struct cell_t *cell);
+signed_error_t circpad_handle_padding_negotiated(struct circuit_t *circ,
+                                      struct cell_t *cell,
+                                      crypt_path_t *layer_hint);
+signed_error_t circpad_negotiate_padding(struct origin_circuit_t *circ,
+                          circpad_machine_num_t machine,
+                          uint8_t target_hopnum,
+                          uint8_t command);
+bool circpad_padding_negotiated(struct circuit_t *circ,
+                           circpad_machine_num_t machine,
+                           uint8_t command,
+                           uint8_t response);
+
+MOCK_DECL(circpad_decision_t,
+circpad_machine_schedule_padding,(circpad_machine_state_t *));
+
+MOCK_DECL(circpad_decision_t,
+circpad_machine_spec_transition, (circpad_machine_state_t *mi,
+                             circpad_event_t event));
+
+circpad_decision_t circpad_send_padding_cell_for_callback(
+                                 circpad_machine_state_t *mi);
+
+#ifdef CIRCUITPADDING_PRIVATE
+STATIC circpad_delay_t
+circpad_machine_sample_delay(circpad_machine_state_t *mi);
+
+STATIC bool
+circpad_machine_reached_padding_limit(circpad_machine_state_t *mi);
+
+STATIC
+circpad_decision_t circpad_machine_remove_token(circpad_machine_state_t *mi);
+
+STATIC circpad_delay_t
+circpad_histogram_bin_to_usec(const circpad_machine_state_t *mi,
+                              circpad_hist_index_t bin);
+
+STATIC const circpad_state_t *
+circpad_machine_current_state(const circpad_machine_state_t *mi);
+
+STATIC circpad_hist_index_t circpad_histogram_usec_to_bin(
+                                       const circpad_machine_state_t *mi,
+                                       circpad_delay_t us);
+
+STATIC circpad_machine_state_t *circpad_circuit_machineinfo_new(
+                                               struct circuit_t *on_circ,
+                                               int machine_index);
+STATIC void circpad_machine_remove_higher_token(circpad_machine_state_t *mi,
+                                         circpad_delay_t target_bin_us);
+STATIC void circpad_machine_remove_lower_token(circpad_machine_state_t *mi,
+                                         circpad_delay_t target_bin_us);
+STATIC void circpad_machine_remove_closest_token(circpad_machine_state_t *mi,
+                                         circpad_delay_t target_bin_us,
+                                         bool use_usec);
+STATIC void circpad_machine_setup_tokens(circpad_machine_state_t *mi);
+
+MOCK_DECL(STATIC signed_error_t,
+circpad_send_command_to_hop,(struct origin_circuit_t *circ, uint8_t hopnum,
+                             uint8_t relay_command, const uint8_t *payload,
+                             ssize_t payload_len));
+
+#ifdef TOR_UNIT_TESTS
+extern smartlist_t *origin_padding_machines;
+extern smartlist_t *relay_padding_machines;
+#endif
+
+#endif
+
+#endif

+ 14 - 0
src/core/or/circuituse.c

@@ -35,6 +35,7 @@
 #include "core/or/circuitlist.h"
 #include "core/or/circuitlist.h"
 #include "core/or/circuitstats.h"
 #include "core/or/circuitstats.h"
 #include "core/or/circuituse.h"
 #include "core/or/circuituse.h"
+#include "core/or/circuitpadding.h"
 #include "core/or/connection_edge.h"
 #include "core/or/connection_edge.h"
 #include "core/or/policies.h"
 #include "core/or/policies.h"
 #include "feature/client/addressmap.h"
 #include "feature/client/addressmap.h"
@@ -1419,6 +1420,11 @@ circuit_detach_stream(circuit_t *circ, edge_connection_t *conn)
       if (circ->purpose == CIRCUIT_PURPOSE_S_REND_JOINED) {
       if (circ->purpose == CIRCUIT_PURPOSE_S_REND_JOINED) {
         hs_dec_rdv_stream_counter(origin_circ);
         hs_dec_rdv_stream_counter(origin_circ);
       }
       }
+
+      /* If there are no more streams on this circ, tell circpad */
+      if (!origin_circ->p_streams)
+        circpad_machine_event_circ_has_no_streams(origin_circ);
+
       return;
       return;
     }
     }
   } else {
   } else {
@@ -2586,6 +2592,12 @@ link_apconn_to_circ(entry_connection_t *apconn, origin_circuit_t *circ,
   /* add it into the linked list of streams on this circuit */
   /* add it into the linked list of streams on this circuit */
   log_debug(LD_APP|LD_CIRC, "attaching new conn to circ. n_circ_id %u.",
   log_debug(LD_APP|LD_CIRC, "attaching new conn to circ. n_circ_id %u.",
             (unsigned)circ->base_.n_circ_id);
             (unsigned)circ->base_.n_circ_id);
+
+  /* If this is the first stream on this circuit, tell circpad
+   * that streams are attached */
+  if (!circ->p_streams)
+    circpad_machine_event_circ_has_streams(circ);
+
   /* reset it, so we can measure circ timeouts */
   /* reset it, so we can measure circ timeouts */
   ENTRY_TO_CONN(apconn)->timestamp_last_read_allowed = time(NULL);
   ENTRY_TO_CONN(apconn)->timestamp_last_read_allowed = time(NULL);
   ENTRY_TO_EDGE_CONN(apconn)->next_stream = circ->p_streams;
   ENTRY_TO_EDGE_CONN(apconn)->next_stream = circ->p_streams;
@@ -3064,6 +3076,8 @@ circuit_change_purpose(circuit_t *circ, uint8_t new_purpose)
   if (CIRCUIT_IS_ORIGIN(circ)) {
   if (CIRCUIT_IS_ORIGIN(circ)) {
     control_event_circuit_purpose_changed(TO_ORIGIN_CIRCUIT(circ),
     control_event_circuit_purpose_changed(TO_ORIGIN_CIRCUIT(circ),
                                           old_purpose);
                                           old_purpose);
+
+    circpad_machine_event_circ_purpose_changed(TO_ORIGIN_CIRCUIT(circ));
   }
   }
 }
 }
 
 

+ 5 - 0
src/core/or/connection_edge.c

@@ -67,6 +67,7 @@
 #include "core/or/circuitbuild.h"
 #include "core/or/circuitbuild.h"
 #include "core/or/circuitlist.h"
 #include "core/or/circuitlist.h"
 #include "core/or/circuituse.h"
 #include "core/or/circuituse.h"
+#include "core/or/circuitpadding.h"
 #include "core/or/connection_edge.h"
 #include "core/or/connection_edge.h"
 #include "core/or/connection_or.h"
 #include "core/or/connection_or.h"
 #include "core/or/policies.h"
 #include "core/or/policies.h"
@@ -3712,6 +3713,10 @@ handle_hs_exit_conn(circuit_t *circ, edge_connection_t *conn)
   /* Link the circuit and the connection crypt path. */
   /* Link the circuit and the connection crypt path. */
   conn->cpath_layer = origin_circ->cpath->prev;
   conn->cpath_layer = origin_circ->cpath->prev;
 
 
+  /* If this is the first stream on this circuit, tell circpad */
+  if (!origin_circ->p_streams)
+    circpad_machine_event_circ_has_streams(origin_circ);
+
   /* Add it into the linked list of p_streams on this circuit */
   /* Add it into the linked list of p_streams on this circuit */
   conn->next_stream = origin_circ->p_streams;
   conn->next_stream = origin_circ->p_streams;
   origin_circ->p_streams = conn;
   origin_circ->p_streams = conn;

+ 7 - 0
src/core/or/or.h

@@ -207,6 +207,9 @@ struct curve25519_public_key_t;
 #define RELAY_COMMAND_RENDEZVOUS_ESTABLISHED 39
 #define RELAY_COMMAND_RENDEZVOUS_ESTABLISHED 39
 #define RELAY_COMMAND_INTRODUCE_ACK 40
 #define RELAY_COMMAND_INTRODUCE_ACK 40
 
 
+#define RELAY_COMMAND_PADDING_NEGOTIATE 41
+#define RELAY_COMMAND_PADDING_NEGOTIATED 42
+
 /* Reasons why an OR connection is closed. */
 /* Reasons why an OR connection is closed. */
 #define END_OR_CONN_REASON_DONE           1
 #define END_OR_CONN_REASON_DONE           1
 #define END_OR_CONN_REASON_REFUSED        2 /* connection refused */
 #define END_OR_CONN_REASON_REFUSED        2 /* connection refused */
@@ -836,6 +839,10 @@ typedef struct protover_summary_flags_t {
    * service rendezvous point supporting version 3 as seen in proposal 224.
    * service rendezvous point supporting version 3 as seen in proposal 224.
    * This requires HSRend=2. */
    * This requires HSRend=2. */
   unsigned int supports_v3_rendezvous_point: 1;
   unsigned int supports_v3_rendezvous_point: 1;
+
+  /** True iff this router has a protocol list that allows clients to
+   * negotiate link-level padding. Requires Padding>=1. */
+  unsigned int supports_padding : 1;
 } protover_summary_flags_t;
 } protover_summary_flags_t;
 
 
 typedef struct routerinfo_t routerinfo_t;
 typedef struct routerinfo_t routerinfo_t;

+ 4 - 0
src/core/or/origin_circuit_st.h

@@ -161,6 +161,10 @@ struct origin_circuit_t {
    * connections to this circuit. */
    * connections to this circuit. */
   unsigned int unusable_for_new_conns : 1;
   unsigned int unusable_for_new_conns : 1;
 
 
+  /* If this flag is set (due to padding negotiation failure), we should
+   * not try to negotiate further circuit padding. */
+  unsigned padding_negotiation_failed : 1;
+
   /**
   /**
    * Tristate variable to guard against pathbias miscounting
    * Tristate variable to guard against pathbias miscounting
    * due to circuit purpose transitions changing the decision
    * due to circuit purpose transitions changing the decision

+ 6 - 1
src/core/or/protover.c

@@ -39,6 +39,9 @@ static int protocol_list_contains(const smartlist_t *protos,
 static const struct {
 static const struct {
   protocol_type_t protover_type;
   protocol_type_t protover_type;
   const char *name;
   const char *name;
+/* If you add a new protocol here, you probably also want to add
+ * parsing for it in routerstatus_parse_entry_from_string() so that
+ * it is set in routerstatus_t */
 } PROTOCOL_NAMES[] = {
 } PROTOCOL_NAMES[] = {
   { PRT_LINK, "Link" },
   { PRT_LINK, "Link" },
   { PRT_LINKAUTH, "LinkAuth" },
   { PRT_LINKAUTH, "LinkAuth" },
@@ -49,6 +52,7 @@ static const struct {
   { PRT_HSREND, "HSRend" },
   { PRT_HSREND, "HSRend" },
   { PRT_DESC, "Desc" },
   { PRT_DESC, "Desc" },
   { PRT_MICRODESC, "Microdesc"},
   { PRT_MICRODESC, "Microdesc"},
+  { PRT_PADDING, "Padding"},
   { PRT_CONS, "Cons" }
   { PRT_CONS, "Cons" }
 };
 };
 
 
@@ -396,7 +400,8 @@ protover_get_supported_protocols(void)
     "LinkAuth=3 "
     "LinkAuth=3 "
 #endif
 #endif
     "Microdesc=1-2 "
     "Microdesc=1-2 "
-    "Relay=1-2";
+    "Relay=1-2 "
+    "Padding=1";
 }
 }
 
 
 /** The protocols from protover_get_supported_protocols(), as parsed into a
 /** The protocols from protover_get_supported_protocols(), as parsed into a

+ 1 - 0
src/core/or/protover.h

@@ -43,6 +43,7 @@ typedef enum protocol_type_t {
   PRT_DESC,
   PRT_DESC,
   PRT_MICRODESC,
   PRT_MICRODESC,
   PRT_CONS,
   PRT_CONS,
+  PRT_PADDING,
 } protocol_type_t;
 } protocol_type_t;
 
 
 bool protover_contains_long_protocol_names(const char *s);
 bool protover_contains_long_protocol_names(const char *s);

+ 25 - 11
src/core/or/relay.c

@@ -55,6 +55,7 @@
 #include "core/or/circuitbuild.h"
 #include "core/or/circuitbuild.h"
 #include "core/or/circuitlist.h"
 #include "core/or/circuitlist.h"
 #include "core/or/circuituse.h"
 #include "core/or/circuituse.h"
+#include "core/or/circuitpadding.h"
 #include "lib/compress/compress.h"
 #include "lib/compress/compress.h"
 #include "app/config/config.h"
 #include "app/config/config.h"
 #include "core/mainloop/connection.h"
 #include "core/mainloop/connection.h"
@@ -80,7 +81,6 @@
 #include "feature/nodelist/describe.h"
 #include "feature/nodelist/describe.h"
 #include "feature/nodelist/routerlist.h"
 #include "feature/nodelist/routerlist.h"
 #include "core/or/scheduler.h"
 #include "core/or/scheduler.h"
-#include "feature/stats/rephist.h"
 
 
 #include "core/or/cell_st.h"
 #include "core/or/cell_st.h"
 #include "core/or/cell_queue_st.h"
 #include "core/or/cell_queue_st.h"
@@ -293,7 +293,9 @@ circuit_receive_relay_cell(cell_t *cell, circuit_t *circ,
     return 0;
     return 0;
   }
   }
 
 
-  /* not recognized. pass it on. */
+  /* not recognized. inform circpad and pass it on. */
+  circpad_deliver_unrecognized_cell_events(circ, cell_direction);
+
   if (cell_direction == CELL_DIRECTION_OUT) {
   if (cell_direction == CELL_DIRECTION_OUT) {
     cell->circ_id = circ->n_circ_id; /* switch it */
     cell->circ_id = circ->n_circ_id; /* switch it */
     chan = circ->n_chan;
     chan = circ->n_chan;
@@ -353,11 +355,11 @@ circuit_receive_relay_cell(cell_t *cell, circuit_t *circ,
  *  - Encrypt it to the right layer
  *  - Encrypt it to the right layer
  *  - Append it to the appropriate cell_queue on <b>circ</b>.
  *  - Append it to the appropriate cell_queue on <b>circ</b>.
  */
  */
-static int
-circuit_package_relay_cell(cell_t *cell, circuit_t *circ,
+MOCK_IMPL(int,
+circuit_package_relay_cell, (cell_t *cell, circuit_t *circ,
                            cell_direction_t cell_direction,
                            cell_direction_t cell_direction,
                            crypt_path_t *layer_hint, streamid_t on_stream,
                            crypt_path_t *layer_hint, streamid_t on_stream,
-                           const char *filename, int lineno)
+                           const char *filename, int lineno))
 {
 {
   channel_t *chan; /* where to send the cell */
   channel_t *chan; /* where to send the cell */
 
 
@@ -524,6 +526,8 @@ relay_command_to_string(uint8_t command)
     case RELAY_COMMAND_INTRODUCE_ACK: return "INTRODUCE_ACK";
     case RELAY_COMMAND_INTRODUCE_ACK: return "INTRODUCE_ACK";
     case RELAY_COMMAND_EXTEND2: return "EXTEND2";
     case RELAY_COMMAND_EXTEND2: return "EXTEND2";
     case RELAY_COMMAND_EXTENDED2: return "EXTENDED2";
     case RELAY_COMMAND_EXTENDED2: return "EXTENDED2";
+    case RELAY_COMMAND_PADDING_NEGOTIATE: return "PADDING_NEGOTIATE";
+    case RELAY_COMMAND_PADDING_NEGOTIATED: return "PADDING_NEGOTIATED";
     default:
     default:
       tor_snprintf(buf, sizeof(buf), "Unrecognized relay command %u",
       tor_snprintf(buf, sizeof(buf), "Unrecognized relay command %u",
                    (unsigned)command);
                    (unsigned)command);
@@ -577,8 +581,8 @@ relay_send_command_from_edge_,(streamid_t stream_id, circuit_t *circ,
   log_debug(LD_OR,"delivering %d cell %s.", relay_command,
   log_debug(LD_OR,"delivering %d cell %s.", relay_command,
             cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward");
             cell_direction == CELL_DIRECTION_OUT ? "forward" : "backward");
 
 
-  if (relay_command == RELAY_COMMAND_DROP)
-    rep_hist_padding_count_write(PADDING_TYPE_DROP);
+  /* Tell circpad we're sending a relay cell */
+  circpad_deliver_sent_relay_cell_events(circ, relay_command);
 
 
   /* If we are sending an END cell and this circuit is used for a tunneled
   /* If we are sending an END cell and this circuit is used for a tunneled
    * directory request, advance its state. */
    * directory request, advance its state. */
@@ -602,7 +606,9 @@ relay_send_command_from_edge_,(streamid_t stream_id, circuit_t *circ,
        * one of them.  Don't worry about the conn protocol version:
        * one of them.  Don't worry about the conn protocol version:
        * append_cell_to_circuit_queue will fix it up. */
        * append_cell_to_circuit_queue will fix it up. */
       cell.command = CELL_RELAY_EARLY;
       cell.command = CELL_RELAY_EARLY;
-      --origin_circ->remaining_relay_early_cells;
+      /* If we're out of relay early cells, tell circpad */
+      if (--origin_circ->remaining_relay_early_cells == 0)
+        circpad_machine_event_circ_has_no_relay_early(origin_circ);
       log_debug(LD_OR, "Sending a RELAY_EARLY cell; %d remaining.",
       log_debug(LD_OR, "Sending a RELAY_EARLY cell; %d remaining.",
                 (int)origin_circ->remaining_relay_early_cells);
                 (int)origin_circ->remaining_relay_early_cells);
       /* Memorize the command that is sent as RELAY_EARLY cell; helps debug
       /* Memorize the command that is sent as RELAY_EARLY cell; helps debug
@@ -1481,9 +1487,11 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
     }
     }
   }
   }
 
 
+  /* Tell circpad that we've recieved a recognized cell */
+  circpad_deliver_recognized_relay_cell_events(circ, rh.command, layer_hint);
+
   /* either conn is NULL, in which case we've got a control cell, or else
   /* either conn is NULL, in which case we've got a control cell, or else
    * conn points to the recognized stream. */
    * conn points to the recognized stream. */
-
   if (conn && !connection_state_is_open(TO_CONN(conn))) {
   if (conn && !connection_state_is_open(TO_CONN(conn))) {
     if (conn->base_.type == CONN_TYPE_EXIT &&
     if (conn->base_.type == CONN_TYPE_EXIT &&
         (conn->base_.state == EXIT_CONN_STATE_CONNECTING ||
         (conn->base_.state == EXIT_CONN_STATE_CONNECTING ||
@@ -1504,8 +1512,14 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
 
 
   switch (rh.command) {
   switch (rh.command) {
     case RELAY_COMMAND_DROP:
     case RELAY_COMMAND_DROP:
-      rep_hist_padding_count_read(PADDING_TYPE_DROP);
-//      log_info(domain,"Got a relay-level padding cell. Dropping.");
+      /* Already examined in circpad_deliver_recognized_relay_cell_events */
+      return 0;
+    case RELAY_COMMAND_PADDING_NEGOTIATE:
+      circpad_handle_padding_negotiate(circ, cell);
+      return 0;
+    case RELAY_COMMAND_PADDING_NEGOTIATED:
+      if (circpad_handle_padding_negotiated(circ, cell, layer_hint) == 0)
+        circuit_read_valid_data(TO_ORIGIN_CIRCUIT(circ), rh.length);
       return 0;
       return 0;
     case RELAY_COMMAND_BEGIN:
     case RELAY_COMMAND_BEGIN:
     case RELAY_COMMAND_BEGIN_DIR:
     case RELAY_COMMAND_BEGIN_DIR:

+ 5 - 0
src/core/or/relay.h

@@ -78,6 +78,11 @@ void destroy_cell_queue_append(destroy_cell_queue_t *queue,
 void channel_unlink_all_circuits(channel_t *chan, smartlist_t *detached_out);
 void channel_unlink_all_circuits(channel_t *chan, smartlist_t *detached_out);
 MOCK_DECL(int, channel_flush_from_first_active_circuit,
 MOCK_DECL(int, channel_flush_from_first_active_circuit,
           (channel_t *chan, int max));
           (channel_t *chan, int max));
+MOCK_DECL(int, circuit_package_relay_cell, (cell_t *cell, circuit_t *circ,
+                           cell_direction_t cell_direction,
+                           crypt_path_t *layer_hint, streamid_t on_stream,
+                           const char *filename, int lineno));
+
 void update_circuit_on_cmux_(circuit_t *circ, cell_direction_t direction,
 void update_circuit_on_cmux_(circuit_t *circ, cell_direction_t direction,
                              const char *file, int lineno);
                              const char *file, int lineno);
 #define update_circuit_on_cmux(circ, direction) \
 #define update_circuit_on_cmux(circ, direction) \

+ 2 - 0
src/core/or/versions.c

@@ -448,6 +448,8 @@ memoize_protover_summary(protover_summary_flags_t *out,
   out->supports_v3_rendezvous_point =
   out->supports_v3_rendezvous_point =
     protocol_list_supports_protocol(protocols, PRT_HSREND,
     protocol_list_supports_protocol(protocols, PRT_HSREND,
                                     PROTOVER_HS_RENDEZVOUS_POINT_V3);
                                     PROTOVER_HS_RENDEZVOUS_POINT_V3);
+    out->supports_padding =
+      protocol_list_supports_protocol(protocols, PRT_PADDING, 1);
 
 
   protover_summary_flags_t *new_cached = tor_memdup(out, sizeof(*out));
   protover_summary_flags_t *new_cached = tor_memdup(out, sizeof(*out));
   cached = strmap_set(protover_summary_map, protocols, new_cached);
   cached = strmap_set(protover_summary_map, protocols, new_cached);

+ 1 - 2
src/feature/hibernate/hibernate.c

@@ -37,6 +37,7 @@ hibernating, phase 2:
 #include "core/or/connection_or.h"
 #include "core/or/connection_or.h"
 #include "feature/control/control.h"
 #include "feature/control/control.h"
 #include "lib/crypt_ops/crypto_rand.h"
 #include "lib/crypt_ops/crypto_rand.h"
+#include "lib/defs/time.h"
 #include "feature/hibernate/hibernate.h"
 #include "feature/hibernate/hibernate.h"
 #include "core/mainloop/mainloop.h"
 #include "core/mainloop/mainloop.h"
 #include "feature/relay/router.h"
 #include "feature/relay/router.h"
@@ -832,8 +833,6 @@ hibernate_soft_limit_reached(void)
   return get_accounting_bytes() >= soft_limit;
   return get_accounting_bytes() >= soft_limit;
 }
 }
 
 
-#define TOR_USEC_PER_SEC (1000000)
-
 /** Called when we get a SIGINT, or when bandwidth soft limit is
 /** Called when we get a SIGINT, or when bandwidth soft limit is
  * reached. Puts us into "loose hibernation": we don't accept new
  * reached. Puts us into "loose hibernation": we don't accept new
  * connections, but we continue handling old ones. */
  * connections, but we continue handling old ones. */

+ 2 - 0
src/feature/nodelist/networkstatus.c

@@ -44,6 +44,7 @@
 #include "core/mainloop/netstatus.h"
 #include "core/mainloop/netstatus.h"
 #include "core/or/channel.h"
 #include "core/or/channel.h"
 #include "core/or/channelpadding.h"
 #include "core/or/channelpadding.h"
+#include "core/or/circuitpadding.h"
 #include "core/or/circuitmux.h"
 #include "core/or/circuitmux.h"
 #include "core/or/circuitmux_ewma.h"
 #include "core/or/circuitmux_ewma.h"
 #include "core/or/circuitstats.h"
 #include "core/or/circuitstats.h"
@@ -2116,6 +2117,7 @@ networkstatus_set_current_consensus(const char *consensus,
     circuit_build_times_new_consensus_params(
     circuit_build_times_new_consensus_params(
                                get_circuit_build_times_mutable(), c);
                                get_circuit_build_times_mutable(), c);
     channelpadding_new_consensus_params(c);
     channelpadding_new_consensus_params(c);
+    circpad_new_consensus_params(c);
   }
   }
 
 
   /* Reset the failure count only if this consensus is actually valid. */
   /* Reset the failure count only if this consensus is actually valid. */

+ 2 - 2
src/feature/nodelist/nodelist.c

@@ -1106,7 +1106,7 @@ node_ed25519_id_matches(const node_t *node, const ed25519_public_key_t *id)
 /** Dummy object that should be unreturnable.  Used to ensure that
 /** Dummy object that should be unreturnable.  Used to ensure that
  * node_get_protover_summary_flags() always returns non-NULL. */
  * node_get_protover_summary_flags() always returns non-NULL. */
 static const protover_summary_flags_t zero_protover_flags = {
 static const protover_summary_flags_t zero_protover_flags = {
-  0,0,0,0,0,0,0
+  0,0,0,0,0,0,0,0
 };
 };
 
 
 /** Return the protover_summary_flags for a given node. */
 /** Return the protover_summary_flags for a given node. */
@@ -2350,7 +2350,7 @@ compute_frac_paths_available(const networkstatus_t *consensus,
   const int authdir = authdir_mode_v3(options);
   const int authdir = authdir_mode_v3(options);
 
 
   count_usable_descriptors(num_present_out, num_usable_out,
   count_usable_descriptors(num_present_out, num_usable_out,
-                           mid, consensus, now, NULL,
+                           mid, consensus, now, options->MiddleNodes,
                            USABLE_DESCRIPTOR_ALL);
                            USABLE_DESCRIPTOR_ALL);
   log_debug(LD_NET,
   log_debug(LD_NET,
             "%s: %d present, %d usable",
             "%s: %d present, %d usable",

+ 2 - 0
src/feature/nodelist/routerlist.c

@@ -3221,6 +3221,8 @@ refresh_all_country_info(void)
     routerset_refresh_countries(options->EntryNodes);
     routerset_refresh_countries(options->EntryNodes);
   if (options->ExitNodes)
   if (options->ExitNodes)
     routerset_refresh_countries(options->ExitNodes);
     routerset_refresh_countries(options->ExitNodes);
+  if (options->MiddleNodes)
+    routerset_refresh_countries(options->MiddleNodes);
   if (options->ExcludeNodes)
   if (options->ExcludeNodes)
     routerset_refresh_countries(options->ExcludeNodes);
     routerset_refresh_countries(options->ExcludeNodes);
   if (options->ExcludeExitNodes)
   if (options->ExcludeExitNodes)

+ 11 - 0
src/lib/crypt_ops/crypto_rand.c

@@ -528,6 +528,17 @@ crypto_rand_unmocked(char *to, size_t n)
 #endif
 #endif
 }
 }
 
 
+/**
+ * Draw an unsigned 32-bit integer uniformly at random.
+ */
+uint32_t
+crypto_rand_u32(void)
+{
+  uint32_t rand;
+  crypto_rand((void*)&rand, sizeof(rand));
+  return rand;
+}
+
 /**
 /**
  * Return a pseudorandom integer, chosen uniformly from the values
  * Return a pseudorandom integer, chosen uniformly from the values
  * between 0 and <b>max</b>-1 inclusive.  <b>max</b> must be between 1 and
  * between 0 and <b>max</b>-1 inclusive.  <b>max</b> must be between 1 and

+ 1 - 0
src/lib/crypt_ops/crypto_rand.h

@@ -27,6 +27,7 @@ int crypto_rand_int(unsigned int max);
 int crypto_rand_int_range(unsigned int min, unsigned int max);
 int crypto_rand_int_range(unsigned int min, unsigned int max);
 uint64_t crypto_rand_uint64_range(uint64_t min, uint64_t max);
 uint64_t crypto_rand_uint64_range(uint64_t min, uint64_t max);
 time_t crypto_rand_time_range(time_t min, time_t max);
 time_t crypto_rand_time_range(time_t min, time_t max);
+uint32_t crypto_rand_u32(void);
 uint64_t crypto_rand_uint64(uint64_t max);
 uint64_t crypto_rand_uint64(uint64_t max);
 double crypto_rand_double(void);
 double crypto_rand_double(void);
 struct tor_weak_rng_t;
 struct tor_weak_rng_t;

+ 1 - 0
src/lib/defs/include.am

@@ -2,4 +2,5 @@
 noinst_HEADERS += 			\
 noinst_HEADERS += 			\
 	src/lib/defs/dh_sizes.h 	\
 	src/lib/defs/dh_sizes.h 	\
 	src/lib/defs/digest_sizes.h	\
 	src/lib/defs/digest_sizes.h	\
+	src/lib/defs/time.h      	\
 	src/lib/defs/x25519_sizes.h
 	src/lib/defs/x25519_sizes.h

+ 23 - 0
src/lib/defs/time.h

@@ -0,0 +1,23 @@
+/* Copyright (c) 2001, Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#ifndef TOR_TIME_DEFS_H
+#define TOR_TIME_DEFS_H
+
+/**
+ * \file time.h
+ *
+ * \brief Definitions for timing-related constants.
+ **/
+
+/** How many microseconds per second */
+#define TOR_USEC_PER_SEC (1000000)
+/** How many nanoseconds per microsecond */
+#define TOR_NSEC_PER_USEC (1000)
+/* How many nanoseconds per millisecond */
+#define TOR_NSEC_PER_MSEC (1000*1000)
+
+#endif

+ 2 - 0
src/lib/math/.may_include

@@ -3,3 +3,5 @@ orconfig.h
 lib/cc/*.h
 lib/cc/*.h
 lib/log/*.h
 lib/log/*.h
 lib/math/*.h
 lib/math/*.h
+lib/testsupport/*.h
+lib/crypt_ops/*.h

+ 25 - 0
src/lib/math/fp.c

@@ -117,3 +117,28 @@ ENABLE_GCC_WARNING(double-promotion)
 ENABLE_GCC_WARNING(float-conversion)
 ENABLE_GCC_WARNING(float-conversion)
 #endif
 #endif
 }
 }
+
+/* isinf() wrapper for tor */
+int
+tor_isinf(double x)
+{
+  /* Same as above, work around the "double promotion" warnings */
+#if defined(MINGW_ANY) && GCC_VERSION >= 409
+#define PROBLEMATIC_FLOAT_CONVERSION_WARNING
+DISABLE_GCC_WARNING(float-conversion)
+#endif /* defined(MINGW_ANY) && GCC_VERSION >= 409 */
+#if defined(__clang__)
+#if __has_warning("-Wdouble-promotion")
+#define PROBLEMATIC_DOUBLE_PROMOTION_WARNING
+DISABLE_GCC_WARNING(double-promotion)
+#endif
+#endif /* defined(__clang__) */
+  return isinf(x);
+#ifdef PROBLEMATIC_DOUBLE_PROMOTION_WARNING
+ENABLE_GCC_WARNING(double-promotion)
+#endif
+#ifdef PROBLEMATIC_FLOAT_CONVERSION_WARNING
+ENABLE_GCC_WARNING(float-conversion)
+#endif
+}
+

+ 1 - 0
src/lib/math/fp.h

@@ -19,5 +19,6 @@ double tor_mathlog(double d) ATTR_CONST;
 long tor_lround(double d) ATTR_CONST;
 long tor_lround(double d) ATTR_CONST;
 int64_t tor_llround(double d) ATTR_CONST;
 int64_t tor_llround(double d) ATTR_CONST;
 int64_t clamp_double_to_int64(double number);
 int64_t clamp_double_to_int64(double number);
+int tor_isinf(double x);
 
 
 #endif
 #endif

+ 4 - 2
src/lib/math/include.am

@@ -7,7 +7,8 @@ endif
 
 
 src_lib_libtor_math_a_SOURCES =	\
 src_lib_libtor_math_a_SOURCES =	\
 		src/lib/math/fp.c		\
 		src/lib/math/fp.c		\
-		src/lib/math/laplace.c
+		src/lib/math/laplace.c 	\
+		src/lib/math/prob_distr.c
 
 
 
 
 src_lib_libtor_math_testing_a_SOURCES = \
 src_lib_libtor_math_testing_a_SOURCES = \
@@ -17,4 +18,5 @@ src_lib_libtor_math_testing_a_CFLAGS = $(AM_CFLAGS) $(TEST_CFLAGS)
 
 
 noinst_HEADERS +=				\
 noinst_HEADERS +=				\
 		src/lib/math/fp.h		\
 		src/lib/math/fp.h		\
-		src/lib/math/laplace.h
+		src/lib/math/laplace.h  \
+		src/lib/math/prob_distr.h

+ 1717 - 0
src/lib/math/prob_distr.c

@@ -0,0 +1,1717 @@
+/* Copyright (c) 2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file prob_distr.c
+ *
+ * \brief
+ *  Implements various probability distributions.
+ *  Almost all code is courtesy of Riastradh.
+ *
+ * \details
+ * Here are some details that might help you understand this file:
+ *
+ * - Throughout this file, `eps' means the largest relative error of a
+ *   correctly rounded floating-point operation, which in binary64
+ *   floating-point arithmetic is 2^-53.  Here the relative error of a
+ *   true value x from a computed value y is |x - y|/|x|.  This
+ *   definition of epsilon is conventional for numerical analysts when
+ *   writing error analyses.  (If your libm doesn't provide correctly
+ *   rounded exp and log, their relative error is usually below 2*2^-53
+ *   and probably closer to 1.1*2^-53 instead.)
+ *
+ *   The C constant DBL_EPSILON is actually twice this, and should
+ *   perhaps rather be named ulp(1) -- that is, it is the distance from
+ *   1 to the next greater floating-point number, which is usually of
+ *   more interest to programmers and hardware engineers.
+ *
+ *   Since this file is concerned mainly with error bounds rather than
+ *   with low-level bit-hacking of floating-point numbers, we adopt the
+ *   numerical analysts' definition in the comments, though we do use
+ *   DBL_EPSILON in a handful of places where it is convenient to use
+ *   some function of eps = DBL_EPSILON/2 in a case analysis.
+ *
+ * - In various functions (e.g. sample_log_logistic()) we jump through hoops so
+ *   that we can use reals closer to 0 than closer to 1, since we achieve much
+ *   greater accuracy for floating point numbers near 0. In particular, we can
+ *   represent differences as small as 10^-300 for numbers near 0, but of no
+ *   less than 10^-16 for numbers near 1.
+ **/
+
+#define PROB_DISTR_PRIVATE
+
+#include "orconfig.h"
+
+#include "lib/math/prob_distr.h"
+
+#include "lib/crypt_ops/crypto_rand.h"
+#include "lib/cc/ctassert.h"
+
+#include <float.h>
+#include <math.h>
+#include <stddef.h>
+
+/** Validators for downcasting macros below */
+#define validate_container_of(PTR, TYPE, FIELD)                         \
+  (0 * sizeof((PTR) - &((TYPE *)(((char *)(PTR)) -                      \
+      offsetof(TYPE, FIELD)))->FIELD))
+#define validate_const_container_of(PTR, TYPE, FIELD)                   \
+  (0 * sizeof((PTR) - &((const TYPE *)(((const char *)(PTR)) -          \
+      offsetof(TYPE, FIELD)))->FIELD))
+/** Downcasting macro */
+#define container_of(PTR, TYPE, FIELD)                                  \
+  ((TYPE *)(((char *)(PTR)) - offsetof(TYPE, FIELD))                    \
+    + validate_container_of(PTR, TYPE, FIELD))
+/** Constified downcasting macro */
+#define const_container_of(PTR, TYPE, FIELD)                            \
+  ((const TYPE *)(((const char *)(PTR)) - offsetof(TYPE, FIELD))        \
+    + validate_const_container_of(PTR, TYPE, FIELD))
+
+/**
+ * Count number of one bits in 32-bit word.
+ */
+static unsigned
+bitcount32(uint32_t x)
+{
+
+  /* Count two-bit groups.  */
+  x -= (x >> 1) & UINT32_C(0x55555555);
+
+  /* Count four-bit groups.  */
+  x = ((x >> 2) & UINT32_C(0x33333333)) + (x & UINT32_C(0x33333333));
+
+  /* Count eight-bit groups.  */
+  x = (x + (x >> 4)) & UINT32_C(0x0f0f0f0f);
+
+  /* Sum all eight-bit groups, and extract the sum.  */
+  return (x * UINT32_C(0x01010101)) >> 24;
+}
+
+/**
+ * Count leading zeros in 32-bit word.
+ */
+static unsigned
+clz32(uint32_t x)
+{
+
+  /* Round up to a power of two.  */
+  x |= x >> 1;
+  x |= x >> 2;
+  x |= x >> 4;
+  x |= x >> 8;
+  x |= x >> 16;
+
+  /* Subtract count of one bits from 32.  */
+  return (32 - bitcount32(x));
+}
+
+/*
+ * Some lemmas that will be used throughout this file to prove various error
+ * bounds:
+ *
+ * Lemma 1.  If |d| <= 1/2, then 1/(1 + d) <= 2.
+ *
+ * Proof.  If 0 <= d <= 1/2, then 1 + d >= 1, so that 1/(1 + d) <= 1.
+ * If -1/2 <= d <= 0, then 1 + d >= 1/2, so that 1/(1 + d) <= 2.  QED.
+ *
+ * Lemma 2. If b = a*(1 + d)/(1 + d') for |d'| < 1/2 and nonzero a, b,
+ * then b = a*(1 + e) for |e| <= 2|d' - d|.
+ *
+ * Proof.  |a - b|/|a|
+ *             = |a - a*(1 + d)/(1 + d')|/|a|
+ *             = |1 - (1 + d)/(1 + d')|
+ *             = |(1 + d' - 1 - d)/(1 + d')|
+ *             = |(d' - d)/(1 + d')|
+ *            <= 2|d' - d|, by Lemma 1,
+ *
+ * QED.
+ *
+ * Lemma 3.  For |d|, |d'| < 1/4,
+ *
+ *     |log((1 + d)/(1 + d'))| <= 4|d - d'|.
+ *
+ * Proof.  Write
+ *
+ *     log((1 + d)/(1 + d'))
+ *      = log(1 + (1 + d)/(1 + d') - 1)
+ *      = log(1 + (1 + d - 1 - d')/(1 + d')
+ *      = log(1 + (d - d')/(1 + d')).
+ *
+ * By Lemma 1, |(d - d')/(1 + d')| < 2|d' - d| < 1, so the Taylor
+ * series of log(1 + x) converges absolutely for (d - d')/(1 + d'),
+ * and thus we have
+ *
+ *     |log(1 + (d - d')/(1 + d'))|
+ *      = |\sum_{n=1}^\infty ((d - d')/(1 + d'))^n/n|
+ *     <= \sum_{n=1}^\infty |(d - d')/(1 + d')|^n/n
+ *     <= \sum_{n=1}^\infty |2(d' - d)|^n/n
+ *     <= \sum_{n=1}^\infty |2(d' - d)|^n
+ *      = 1/(1 - |2(d' - d)|)
+ *     <= 4|d' - d|,
+ *
+ * QED.
+ *
+ * Lemma 4.  If 1/e <= 1 + x <= e, then
+ *
+ *     log(1 + (1 + d) x) = (1 + d') log(1 + x)
+ *
+ * for |d'| < 8|d|.
+ *
+ * Proof.  Write
+ *
+ *     log(1 + (1 + d) x)
+ *     = log(1 + x + x*d)
+ *     = log((1 + x) (1 + x + x*d)/(1 + x))
+ *     = log(1 + x) + log((1 + x + x*d)/(1 + x))
+ *     = log(1 + x) (1 + log((1 + x + x*d)/(1 + x))/log(1 + x)).
+ *
+ * The relative error is bounded by
+ *
+ *     |log((1 + x + x*d)/(1 + x))/log(1 + x)|
+ *     <= 4|x + x*d - x|/|log(1 + x)|, by Lemma 3,
+ *      = 4|x*d|/|log(1 + x)|
+ *      < 8|d|,
+ *
+ * since in this range 0 < 1 - 1/e < x/log(1 + x) <= e - 1 < 2.  QED.
+ */
+
+/**
+ * Compute the logistic function: f(x) = 1/(1 + e^{-x}) = e^x/(1 + e^x).
+ * Maps a log-odds-space probability in [-\infty, +\infty] into a direct-space
+ * probability in [0,1].  Inverse of logit.
+ *
+ * Ill-conditioned for large x; the identity logistic(-x) = 1 -
+ * logistic(x) and the function logistichalf(x) = logistic(x) - 1/2 may
+ * help to rearrange a computation.
+ *
+ * This implementation gives relative error bounded by 7 eps.
+ */
+STATIC double
+logistic(double x)
+{
+  if (x <= log(DBL_EPSILON/2)) {
+    /*
+     * If x <= log(DBL_EPSILON/2) = log(eps), then e^x <= eps. In this case
+     * we will approximate the logistic() function with e^x because the
+     * relative error is less than eps. Here is a calculation of the
+     * relative error between the logistic() function and e^x and a proof
+     * that it's less than eps:
+     *
+     *     |e^x - e^x/(1 + e^x)|/|e^x/(1 + e^x)|
+     *     <= |1 - 1/(1 + e^x)|*|1 + e^x|
+     *      = |e^x/(1 + e^x)|*|1 + e^x|
+     *      = |e^x|
+     *     <= eps.
+     */
+    return exp(x); /* return e^x */
+  } else if (x <= -log(DBL_EPSILON/2)) {
+    /*
+     * e^{-x} > 0, so 1 + e^{-x} > 1, and 0 < 1/(1 +
+     * e^{-x}) < 1; further, since e^{-x} < 1 + e^{-x}, we
+     * also have 0 < 1/(1 + e^{-x}) < 1.  Thus, if exp has
+     * relative error d0, + has relative error d1, and /
+     * has relative error d2, then we get
+     *
+     *     (1 + d2)/[(1 + (1 + d0) e^{-x})(1 + d1)]
+     *     = (1 + d0)/[1 + e^{-x} + d0 e^{-x}
+     *                     + d1 + d1 e^{-x} + d0 d1 e^{-x}]
+     *     = (1 + d0)/[(1 + e^{-x})
+     *                 * (1 + d0 e^{-x}/(1 + e^{-x})
+     *                      + d1/(1 + e^{-x})
+     *                      + d0 d1 e^{-x}/(1 + e^{-x}))].
+     *     = (1 + d0)/[(1 + e^{-x})(1 + d')]
+     *     = [1/(1 + e^{-x})] (1 + d0)/(1 + d')
+     *
+     * where
+     *
+     *     d' = d0 e^{-x}/(1 + e^{-x})
+     *          + d1/(1 + e^{-x})
+     *          + d0 d1 e^{-x}/(1 + e^{-x}).
+     *
+     * By Lemma 2 this relative error is bounded by
+     *
+     *     2|d0 - d'|
+     *      = 2|d0 - d0 e^{-x}/(1 + e^{-x})
+     *             - d1/(1 + e^{-x})
+     *             - d0 d1 e^{-x}/(1 + e^{-x})|
+     *     <= 2|d0| + 2|d0 e^{-x}/(1 + e^{-x})|
+     *             + 2|d1/(1 + e^{-x})|
+     *             + 2|d0 d1 e^{-x}/(1 + e^{-x})|
+     *     <= 2|d0| + 2|d0| + 2|d1| + 2|d0 d1|
+     *     <= 4|d0| + 2|d1| + 2|d0 d1|
+     *     <= 6 eps + 2 eps^2.
+     */
+    return 1/(1 + exp(-x));
+  } else {
+    /*
+     * e^{-x} <= eps, so the relative error of 1 from 1/(1
+     * + e^{-x}) is
+     *
+     *     |1/(1 + e^{-x}) - 1|/|1/(1 + e^{-x})|
+     *      = |e^{-x}/(1 + e^{-x})|/|1/(1 + e^{-x})|
+     *      = |e^{-x}|
+     *     <= eps.
+     *
+     * This computation avoids an intermediate overflow
+     * exception, although the effect on the result is
+     * harmless.
+     *
+     * XXX Should maybe raise inexact here.
+     */
+    return 1;
+  }
+}
+
+/**
+ * Compute the logit function: log p/(1 - p).  Defined on [0,1].  Maps
+ * a direct-space probability in [0,1] to a log-odds-space probability
+ * in [-\infty, +\infty].  Inverse of logistic.
+ *
+ * Ill-conditioned near 1/2 and 1; the identity logit(1 - p) =
+ * -logit(p) and the function logithalf(p0) = logit(1/2 + p0) may help
+ * to rearrange a computation for p in [1/(1 + e), 1 - 1/(1 + e)].
+ *
+ * This implementation gives relative error bounded by 10 eps.
+ */
+STATIC double
+logit(double p)
+{
+
+  /* logistic(-1) <= p <= logistic(+1) */
+  if (1/(1 + exp(1)) <= p && p <= 1/(1 + exp(-1))) {
+    /*
+     * For inputs near 1/2, we want to compute log1p(near
+     * 0) rather than log(near 1), so write this as:
+     *
+     * log(p/(1 - p)) = -log((1 - p)/p)
+     * = -log(1 + (1 - p)/p - 1)
+     * = -log(1 + (1 - p - p)/p)
+     * = -log(1 + (1 - 2p)/p).
+     *
+     * Since p = 2p/2 <= 1 <= 2*2p = 4p, the floating-point
+     * evaluation of 1 - 2p is exact; the only error arises
+     * from division and log1p.  First, note that if
+     * logistic(-1) <= p <= logistic(+1), (1 - 2p)/p lies
+     * in the bounds of Lemma 4.
+     *
+     * If division has relative error d0 and log1p has
+     * relative error d1, the outcome is
+     *
+     *     -(1 + d1) log(1 + (1 - 2p) (1 + d0)/p)
+     *     = -(1 + d1) (1 + d') log(1 + (1 - 2p)/p)
+     *     = -(1 + d1 + d' + d1 d') log(1 + (1 - 2p)/p).
+     *
+     * where |d'| < 8|d0| by Lemma 4.  The relative error
+     * is then bounded by
+     *
+     *     |d1 + d' + d1 d'|
+     *     <= |d1| + 8|d0| + 8|d1 d0|
+     *     <= 9 eps + 8 eps^2.
+     */
+    return -log1p((1 - 2*p)/p);
+  } else {
+    /*
+     * For inputs near 0, although 1 - p may be rounded to
+     * 1, it doesn't matter much because the magnitude of
+     * the result is so much larger.  For inputs near 1, we
+     * can compute 1 - p exactly, although the precision on
+     * the input is limited so we won't ever get more than
+     * about 700 for the output.
+     *
+     * If - has relative error d0, / has relative error d1,
+     * and log has relative error d2, then
+     *
+     *     (1 + d2) log((1 + d0) p/[(1 - p)(1 + d1)])
+     *     = (1 + d2) [log(p/(1 - p)) + log((1 + d0)/(1 + d1))]
+     *     = log(p/(1 - p)) + d2 log(p/(1 - p))
+     *       + (1 + d2) log((1 + d0)/(1 + d1))
+     *     = log(p/(1 - p))*[1 + d2 +
+     *         + (1 + d2) log((1 + d0)/(1 + d1))/log(p/(1 - p))]
+     *
+     * Since 0 <= p < logistic(-1) or logistic(+1) < p <=
+     * 1, we have |log(p/(1 - p))| > 1.  Hence this error
+     * is bounded by
+     *
+     *     |d2 + (1 + d2) log((1 + d0)/(1 + d1))/log(p/(1 - p))|
+     *     <= |d2| + |(1 + d2) log((1 + d0)/(1 + d1))
+     *                      / log(p/(1 - p))|
+     *     <= |d2| + |(1 + d2) log((1 + d0)/(1 + d1))|
+     *     <= |d2| + 4|(1 + d2) (d0 - d1)|, by Lemma 3,
+     *     <= |d2| + 4|d0 - d1 + d2 d0 - d1 d0|
+     *     <= |d2| + 4|d0| + 4|d1| + 4|d2 d0| + 4|d1 d0|
+     *     <= 9 eps + 8 eps^2.
+     */
+    return log(p/(1 - p));
+  }
+}
+
+/**
+ * Compute the logit function, translated in input by 1/2: logithalf(p)
+ * = logit(1/2 + p).  Defined on [-1/2, 1/2].  Inverse of logistichalf.
+ *
+ * Ill-conditioned near +/-1/2.  If |p0| > 1/2 - 1/(1 + e), it may be
+ * better to compute 1/2 + p0 or -1/2 - p0 and to use logit instead.
+ * This implementation gives relative error bounded by 34 eps.
+ */
+STATIC double
+logithalf(double p0)
+{
+
+  if (fabs(p0) <= 0.5 - 1/(1 + exp(1))) {
+    /*
+     * logit(1/2 + p0)
+     * = log((1/2 + p0)/(1 - (1/2 + p0)))
+     * = log((1/2 + p0)/(1/2 - p0))
+     * = log(1 + (1/2 + p0)/(1/2 - p0) - 1)
+     * = log(1 + (1/2 + p0 - (1/2 - p0))/(1/2 - p0))
+     * = log(1 + (1/2 + p0 - 1/2 + p0)/(1/2 - p0))
+     * = log(1 + 2 p0/(1/2 - p0))
+     *
+     * If the error of subtraction is d0, the error of
+     * division is d1, and the error of log1p is d2, then
+     * what we compute is
+     *
+     *  (1 + d2) log(1 + (1 + d1) 2 p0/[(1 + d0) (1/2 - p0)])
+     *  = (1 + d2) log(1 + (1 + d') 2 p0/(1/2 - p0))
+     *  = (1 + d2) (1 + d'') log(1 + 2 p0/(1/2 - p0))
+     *  = (1 + d2 + d'' + d2 d'') log(1 + 2 p0/(1/2 - p0)),
+     *
+     * where |d'| < 2|d0 - d1| <= 4 eps by Lemma 2, and
+     * |d''| < 8|d'| < 32 eps by Lemma 4 since
+     *
+     *  1/e <= 1 + 2*p0/(1/2 - p0) <= e
+     *
+     * when |p0| <= 1/2 - 1/(1 + e).  Hence the relative
+     * error is bounded by
+     *
+     *  |d2 + d'' + d2 d''|
+     *  <= |d2| + |d''| + |d2 d''|
+     *  <= |d1| + 32 |d0| + 32 |d1 d0|
+     *  <= 33 eps + 32 eps^2.
+     */
+    return log1p(2*p0/(0.5 - p0));
+  } else {
+    /*
+     * We have a choice of computing logit(1/2 + p0) or
+     * -logit(1 - (1/2 + p0)) = -logit(1/2 - p0).  It
+     * doesn't matter which way we do this: either way,
+     * since 1/2 p0 <= 1/2 <= 2 p0, the sum and difference
+     * are computed exactly.  So let's do the one that
+     * skips the final negation.
+     *
+     * The result is
+     *
+     *  (1 + d1) log((1 + d0) (1/2 + p0)/[(1 + d2) (1/2 - p0)])
+     *  = (1 + d1) (1 + log((1 + d0)/(1 + d2))
+     *                  / log((1/2 + p0)/(1/2 - p0)))
+     *    * log((1/2 + p0)/(1/2 - p0))
+     *  = (1 + d') log((1/2 + p0)/(1/2 - p0))
+     *  = (1 + d') logit(1/2 + p0)
+     *
+     * where
+     *
+     *  d' = d1 + log((1 + d0)/(1 + d2))/logit(1/2 + p0)
+     *       + d1 log((1 + d0)/(1 + d2))/logit(1/2 + p0).
+     *
+     * For |p| > 1/2 - 1/(1 + e), logit(1/2 + p0) > 1.
+     * Provided |d0|, |d2| < 1/4, by Lemma 3 we have
+     *
+     *  |log((1 + d0)/(1 + d2))| <= 4|d0 - d2|.
+     *
+     * Hence the relative error is bounded by
+     *
+     *  |d'| <= |d1| + 4|d0 - d2| + 4|d1| |d0 - d2|
+     *       <= |d1| + 4|d0| + 4|d2| + 4|d1 d0| + 4|d1 d2|
+     *       <= 9 eps + 8 eps^2.
+     */
+    return log((0.5 + p0)/(0.5 - p0));
+  }
+}
+
+/*
+ * The following random_uniform_01 is tailored for IEEE 754 binary64
+ * floating-point or smaller.  It can be adapted to larger
+ * floating-point formats like i387 80-bit or IEEE 754 binary128, but
+ * it may require sampling more bits.
+ */
+CTASSERT(FLT_RADIX == 2);
+CTASSERT(-DBL_MIN_EXP <= 1021);
+CTASSERT(DBL_MANT_DIG <= 53);
+
+/**
+ * Draw a floating-point number in [0, 1] with uniform distribution.
+ *
+ * Note that the probability of returning 0 is less than 2^-1074, so
+ * callers need not check for it.  However, callers that cannot handle
+ * rounding to 1 must deal with that, because it occurs with
+ * probability 2^-54, which is small but nonnegligible.
+ */
+STATIC double
+random_uniform_01(void)
+{
+  uint32_t z, x, hi, lo;
+  double s;
+
+  /*
+   * Draw an exponent, geometrically distributed, but give up if
+   * we get a run of more than 1088 zeros, which really means the
+   * system is broken.
+   */
+  z = 0;
+  while ((x = crypto_rand_u32()) == 0) {
+    if (z >= 1088)
+      /* Your bit sampler is broken.  Go home.  */
+      return 0;
+    z += 32;
+  }
+  z += clz32(x);
+
+  /*
+   * Pick 32-bit halves of an odd normalized significand.
+   * Picking it odd breaks ties in the subsequent rounding, which
+   * occur only with measure zero in the uniform distribution on
+   * [0, 1].
+   */
+  hi = crypto_rand_u32() | UINT32_C(0x80000000);
+  lo = crypto_rand_u32() | UINT32_C(0x00000001);
+
+  /* Round to nearest scaled significand in [2^63, 2^64].  */
+  s = hi*(double)4294967296 + lo;
+
+  /* Rescale into [1/2, 1] and apply exponent in one swell foop.  */
+  return s * ldexp(1, -(64 + z));
+}
+
+/*******************************************************************/
+
+/* Functions for specific probability distributions start here: */
+
+/*
+ * Logistic(mu, sigma) distribution, supported on (-\infty,+\infty)
+ *
+ * This is the uniform distribution on [0,1] mapped into log-odds
+ * space, scaled by sigma and translated by mu.
+ *
+ * pdf(x) = e^{-(x - mu)/sigma} sigma (1 + e^{-(x - mu)/sigma})^2
+ * cdf(x) = 1/(1 + e^{-(x - mu)/sigma}) = logistic((x - mu)/sigma)
+ * sf(x) = 1 - cdf(x) = 1 - logistic((x - mu)/sigma = logistic(-(x - mu)/sigma)
+ * icdf(p) = mu + sigma log p/(1 - p) = mu + sigma logit(p)
+ * isf(p) = mu + sigma log (1 - p)/p = mu - sigma logit(p)
+ */
+
+/**
+ * Compute the CDF of the Logistic(mu, sigma) distribution: the
+ * logistic function.  Well-conditioned for negative inputs and small
+ * positive inputs; ill-conditioned for large positive inputs.
+ */
+STATIC double
+cdf_logistic(double x, double mu, double sigma)
+{
+  return logistic((x - mu)/sigma);
+}
+
+/**
+ * Compute the SF of the Logistic(mu, sigma) distribution: the logistic
+ * function reflected over the y axis.  Well-conditioned for positive
+ * inputs and small negative inputs; ill-conditioned for large negative
+ * inputs.
+ */
+STATIC double
+sf_logistic(double x, double mu, double sigma)
+{
+  return logistic(-(x - mu)/sigma);
+}
+
+/**
+ * Compute the inverse of the CDF of the Logistic(mu, sigma)
+ * distribution: the logit function.  Well-conditioned near 0;
+ * ill-conditioned near 1/2 and 1.
+ */
+STATIC double
+icdf_logistic(double p, double mu, double sigma)
+{
+  return mu + sigma*logit(p);
+}
+
+/**
+ * Compute the inverse of the SF of the Logistic(mu, sigma)
+ * distribution: the -logit function.  Well-conditioned near 0;
+ * ill-conditioned near 1/2 and 1.
+ */
+STATIC double
+isf_logistic(double p, double mu, double sigma)
+{
+  return mu - sigma*logit(p);
+}
+
+/*
+ * LogLogistic(alpha, beta) distribution, supported on (0, +\infty).
+ *
+ * This is the uniform distribution on [0,1] mapped into odds space,
+ * scaled by positive alpha and shaped by positive beta.
+ *
+ * Equivalent to computing exp of a Logistic(log alpha, 1/beta) sample.
+ * (Name arises because the pdf has LogLogistic(x; alpha, beta) =
+ * Logistic(log x; log alpha, 1/beta) and mathematicians got their
+ * covariance contravariant.)
+ *
+ * pdf(x) = (beta/alpha) (x/alpha)^{beta - 1}/(1 + (x/alpha)^beta)^2
+ *        = (1/e^mu sigma) (x/e^mu)^{1/sigma - 1} /
+ *              (1 + (x/e^mu)^{1/sigma})^2
+ * cdf(x) = 1/(1 + (x/alpha)^-beta) = 1/(1 + (x/e^mu)^{-1/sigma})
+ *        = 1/(1 + (e^{log x}/e^mu)^{-1/sigma})
+ *        = 1/(1 + (e^{log x - mu})^{-1/sigma})
+ *        = 1/(1 + e^{-(log x - mu)/sigma})
+ *        = logistic((log x - mu)/sigma)
+ *        = logistic((log x - log alpha)/(1/beta))
+ * sf(x) = 1 - 1/(1 + (x/alpha)^-beta)
+ *       = (x/alpha)^-beta/(1 + (x/alpha)^-beta)
+ *       = 1/((x/alpha)^beta + 1)
+ *       = 1/(1 + (x/alpha)^beta)
+ * icdf(p) = alpha (p/(1 - p))^{1/beta}
+ *         = alpha e^{logit(p)/beta}
+ *         = e^{mu + sigma logit(p)}
+ * isf(p) = alpha ((1 - p)/p)^{1/beta}
+ *        = alpha e^{-logit(p)/beta}
+ *        = e^{mu - sigma logit(p)}
+ */
+
+/**
+ * Compute the CDF of the LogLogistic(alpha, beta) distribution.
+ * Well-conditioned for all x and alpha, and the condition number
+ *
+ *      -beta/[1 + (x/alpha)^{-beta}]
+ *
+ * grows linearly with beta.
+ *
+ * Loosely, the relative error of this implementation is bounded by
+ *
+ *      4 eps + 2 eps^2 + O(beta eps),
+ *
+ * so don't bother trying this for beta anywhere near as large as
+ * 1/eps, around which point it levels off at 1.
+ */
+STATIC double
+cdf_log_logistic(double x, double alpha, double beta)
+{
+  /*
+   * Let d0 be the error of x/alpha; d1, of pow; d2, of +; and
+   * d3, of the final quotient.  The exponentiation gives
+   *
+   *    ((1 + d0) x/alpha)^{-beta}
+   *    = (x/alpha)^{-beta} (1 + d0)^{-beta}
+   *    = (x/alpha)^{-beta} (1 + (1 + d0)^{-beta} - 1)
+   *    = (x/alpha)^{-beta} (1 + d')
+   *
+   * where d' = (1 + d0)^{-beta} - 1.  If y = (x/alpha)^{-beta},
+   * the denominator is
+   *
+   *    (1 + d2) (1 + (1 + d1) (1 + d') y)
+   *    = (1 + d2) (1 + y + (d1 + d' + d1 d') y)
+   *    = 1 + y + (1 + d2) (d1 + d' + d1 d') y
+   *    = (1 + y) (1 + (1 + d2) (d1 + d' + d1 d') y/(1 + y))
+   *    = (1 + y) (1 + d''),
+   *
+   * where d'' = (1 + d2) (d1 + d' + d1 d') y/(1 + y).  The
+   * final result is
+   *
+   *    (1 + d3) / [(1 + d2) (1 + d'') (1 + y)]
+   *    = (1 + d''') / (1 + y)
+   *
+   * for |d'''| <= 2|d3 - d''| by Lemma 2 as long as |d''| < 1/2
+   * (which may not be the case for very large beta).  This
+   * relative error is therefore bounded by
+   *
+   *    |d'''|
+   *    <= 2|d3 - d''|
+   *    <= 2|d3| + 2|(1 + d2) (d1 + d' + d1 d') y/(1 + y)|
+   *    <= 2|d3| + 2|(1 + d2) (d1 + d' + d1 d')|
+   *     = 2|d3| + 2|d1 + d' + d1 d' + d2 d1 + d2 d' + d2 d1 d'|
+   *      <= 2|d3| + 2|d1| + 2|d'| + 2|d1 d'| + 2|d2 d1| + 2|d2 d'|
+   *         + 2|d2 d1 d'|
+   *      <= 4 eps + 2 eps^2 + (2 + 2 eps + 2 eps^2) |d'|.
+   *
+   * Roughly, |d'| = |(1 + d0)^{-beta} - 1| grows like beta eps,
+   * until it levels off at 1.
+   */
+  return 1/(1 + pow(x/alpha, -beta));
+}
+
+/**
+ * Compute the SF of the LogLogistic(alpha, beta) distribution.
+ * Well-conditioned for all x and alpha, and the condition number
+ *
+ *      beta/[1 + (x/alpha)^beta]
+ *
+ * grows linearly with beta.
+ *
+ * Loosely, the relative error of this implementation is bounded by
+ *
+ *      4 eps + 2 eps^2 + O(beta eps)
+ *
+ * so don't bother trying this for beta anywhere near as large as
+ * 1/eps, beyond which point it grows unbounded.
+ */
+STATIC double
+sf_log_logistic(double x, double alpha, double beta)
+{
+  /*
+   * The error analysis here is essentially the same as in
+   * cdf_log_logistic, except that rather than levelling off at
+   * 1, |(1 + d0)^beta - 1| grows unbounded.
+   */
+  return 1/(1 + pow(x/alpha, beta));
+}
+
+/**
+ * Compute the inverse of the CDF of the LogLogistic(alpha, beta)
+ * distribution.  Ill-conditioned for p near 1 and beta near 0 with
+ * condition number 1/[beta (1 - p)].
+ */
+STATIC double
+icdf_log_logistic(double p, double alpha, double beta)
+{
+  return alpha*pow(p/(1 - p), 1/beta);
+}
+
+/**
+ * Compute the inverse of the SF of the LogLogistic(alpha, beta)
+ * distribution.  Ill-conditioned for p near 1 and for large beta, with
+ * condition number -1/[beta (1 - p)].
+ */
+STATIC double
+isf_log_logistic(double p, double alpha, double beta)
+{
+  return alpha*pow((1 - p)/p, 1/beta);
+}
+
+/*
+ * Weibull(lambda, k) distribution, supported on (0, +\infty).
+ *
+ * pdf(x) = (k/lambda) (x/lambda)^{k - 1} e^{-(x/lambda)^k}
+ * cdf(x) = 1 - e^{-(x/lambda)^k}
+ * icdf(p) = lambda * (-log (1 - p))^{1/k}
+ * sf(x) = e^{-(x/lambda)^k}
+ * isf(p) = lambda * (-log p)^{1/k}
+ */
+
+/**
+ * Compute the CDF of the Weibull(lambda, k) distribution.
+ * Well-conditioned for small x and k, and for large lambda --
+ * condition number
+ *
+ *      -k (x/lambda)^k exp(-(x/lambda)^k)/[exp(-(x/lambda)^k) - 1]
+ *
+ * grows linearly with k, x^k, and lambda^{-k}.
+ */
+STATIC double
+cdf_weibull(double x, double lambda, double k)
+{
+  return -expm1(-pow(x/lambda, k));
+}
+
+/**
+ * Compute the SF of the Weibull(lambda, k) distribution.
+ * Well-conditioned for small x and k, and for large lambda --
+ * condition number
+ *
+ *      -k (x/lambda)^k
+ *
+ * grows linearly with k, x^k, and lambda^{-k}.
+ */
+STATIC double
+sf_weibull(double x, double lambda, double k)
+{
+  return exp(-pow(x/lambda, k));
+}
+
+/**
+ * Compute the inverse of the CDF of the Weibull(lambda, k)
+ * distribution.  Ill-conditioned for p near 1, and for k near 0;
+ * condition number is
+ *
+ *      (p/(1 - p))/(k log(1 - p)).
+ */
+STATIC double
+icdf_weibull(double p, double lambda, double k)
+{
+  return lambda*pow(-log1p(-p), 1/k);
+}
+
+/**
+ * Compute the inverse of the SF of the Weibull(lambda, k)
+ * distribution.  Ill-conditioned for p near 0, and for k near 0;
+ * condition number is
+ *
+ *      1/(k log(p)).
+ */
+STATIC double
+isf_weibull(double p, double lambda, double k)
+{
+  return lambda*pow(-log(p), 1/k);
+}
+
+/*
+ * GeneralizedPareto(mu, sigma, xi), supported on (mu, +\infty) for
+ * nonnegative xi, or (mu, mu - sigma/xi) for negative xi.
+ *
+ * Samples:
+ * = mu - sigma log U, if xi = 0;
+ * = mu + sigma (U^{-xi} - 1)/xi = mu + sigma*expm1(-xi log U)/xi, if xi =/= 0,
+ * where U is uniform on (0,1].
+ * = mu + sigma (e^{xi X} - 1)/xi,
+ * where X has standard exponential distribution.
+ *
+ * pdf(x) = sigma^{-1} (1 + xi (x - mu)/sigma)^{-(1 + 1/xi)}
+ * cdf(x) = 1 - (1 + xi (x - mu)/sigma)^{-1/xi}
+ *        = 1 - e^{-log(1 + xi (x - mu)/sigma)/xi}
+ *        --> 1 - e^{-(x - mu)/sigma}  as  xi --> 0
+ * sf(x) = (1 + xi (x - mu)/sigma)^{-1/xi}
+ *       --> e^{-(x - mu)/sigma}  as  xi --> 0
+ * icdf(p) = mu + sigma*(p^{-xi} - 1)/xi
+ *         = mu + sigma*expm1(-xi log p)/xi
+ *         --> mu + sigma*log p  as  xi --> 0
+ * isf(p) = mu + sigma*((1 - p)^{xi} - 1)/xi
+ *        = mu + sigma*expm1(-xi log1p(-p))/xi
+ *        --> mu + sigma*log1p(-p)  as  xi --> 0
+ */
+
+/**
+ * Compute the CDF of the GeneralizedPareto(mu, sigma, xi)
+ * distribution.  Well-conditioned everywhere.  For standard
+ * distribution (mu=0, sigma=1), condition number
+ *
+ *      (x/(1 + x xi)) / ((1 + x xi)^{1/xi} - 1)
+ *
+ * is bounded by 1, attained only at x = 0.
+ */
+STATIC double
+cdf_genpareto(double x, double mu, double sigma, double xi)
+{
+  double x_0 = (x - mu)/sigma;
+
+  /*
+   * log(1 + xi x_0)/xi
+   * = (-1/xi) \sum_{n=1}^\infty (-xi x_0)^n/n
+   * = (-1/xi) (-xi x_0 + \sum_{n=2}^\infty (-xi x_0)^n/n)
+   * = x_0 - (1/xi) \sum_{n=2}^\infty (-xi x_0)^n/n
+   * = x_0 - x_0 \sum_{n=2}^\infty (-xi x_0)^{n-1}/n
+   * = x_0 (1 - d),
+   *
+   * where d = \sum_{n=2}^\infty (-xi x_0)^{n-1}/n.  If |xi| <
+   * eps/4|x_0|, then
+   *
+   * |d| <= \sum_{n=2}^\infty (eps/4)^{n-1}/n
+   *     <= \sum_{n=2}^\infty (eps/4)^{n-1}
+   *      = \sum_{n=1}^\infty (eps/4)^n
+   *      = (eps/4) \sum_{n=0}^\infty (eps/4)^n
+   *      = (eps/4)/(1 - eps/4)
+   *      < eps/2
+   *
+   * for any 0 < eps < 2.  Thus, the relative error of x_0 from
+   * log(1 + xi x_0)/xi is bounded by eps.
+   */
+  if (fabs(xi) < 1e-17/x_0)
+    return -expm1(-x_0);
+  else
+    return -expm1(-log1p(xi*x_0)/xi);
+}
+
+/**
+ * Compute the SF of the GeneralizedPareto(mu, sigma, xi) distribution.
+ * For standard distribution (mu=0, sigma=1), ill-conditioned for xi
+ * near 0; condition number
+ *
+ *      -x (1 + x xi)^{(-1 - xi)/xi}/(1 + x xi)^{-1/xi}
+ *      = -x (1 + x xi)^{-1/xi - 1}/(1 + x xi)^{-1/xi}
+ *      = -(x/(1 + x xi)) (1 + x xi)^{-1/xi}/(1 + x xi)^{-1/xi}
+ *      = -x/(1 + x xi)
+ *
+ * is bounded by 1/xi.
+ */
+STATIC double
+sf_genpareto(double x, double mu, double sigma, double xi)
+{
+  double x_0 = (x - mu)/sigma;
+
+  if (fabs(xi) < 1e-17/x_0)
+    return exp(-x_0);
+  else
+    return exp(-log1p(xi*x_0)/xi);
+}
+
+/**
+ * Compute the inverse of the CDF of the GeneralizedPareto(mu, sigma,
+ * xi) distribution.  Ill-conditioned for p near 1; condition number is
+ *
+ *      xi (p/(1 - p))/(1 - (1 - p)^xi)
+ */
+STATIC double
+icdf_genpareto(double p, double mu, double sigma, double xi)
+{
+  /*
+   * To compute f(xi) = (U^{-xi} - 1)/xi = (e^{-xi log U} - 1)/xi
+   * for xi near zero (note f(xi) --> -log U as xi --> 0), write
+   * the absolutely convergent Taylor expansion
+   *
+   * f(xi) = (1/xi)*(-xi log U + \sum_{n=2}^\infty (-xi log U)^n/n!
+   *       = -log U + (1/xi)*\sum_{n=2}^\infty (-xi log U)^n/n!
+   *       = -log U + \sum_{n=2}^\infty xi^{n-1} (-log U)^n/n!
+   *       = -log U - log U \sum_{n=2}^\infty (-xi log U)^{n-1}/n!
+   *       = -log U (1 + \sum_{n=2}^\infty (-xi log U)^{n-1}/n!).
+   *
+   * Let d = \sum_{n=2}^\infty (-xi log U)^{n-1}/n!.  What do we
+   * lose if we discard it and use -log U as an approximation to
+   * f(xi)?  If |xi| < eps/-4log U, then
+   *
+   * |d| <= \sum_{n=2}^\infty |xi log U|^{n-1}/n!
+   *     <= \sum_{n=2}^\infty (eps/4)^{n-1}/n!
+   *     <= \sum_{n=1}^\infty (eps/4)^n
+   *      = (eps/4) \sum_{n=0}^\infty (eps/4)^n
+   *      = (eps/4)/(1 - eps/4)
+   *      < eps/2,
+   *
+   * for any 0 < eps < 2.  Hence, as long as |xi| < eps/-2log U,
+   * f(xi) = -log U (1 + d) for |d| <= eps/2.  |d| is the
+   * relative error of f(xi) from -log U; from this bound, the
+   * relative error of -log U from f(xi) is at most (eps/2)/(1 -
+   * eps/2) = eps/2 + (eps/2)^2 + (eps/2)^3 + ... < eps for 0 <
+   * eps < 1.  Since -log U < 1000 for all U in (0, 1] in
+   * binary64 floating-point, we can safely cut xi off at 1e-20 <
+   * eps/4000 and attain <1ulp error from series truncation.
+   */
+  if (fabs(xi) <= 1e-20)
+    return mu - sigma*log1p(-p);
+  else
+    return mu + sigma*expm1(-xi*log1p(-p))/xi;
+}
+
+/**
+ * Compute the inverse of the SF of the GeneralizedPareto(mu, sigma,
+ * xi) distribution.  Ill-conditioned for p near 1; conditon number is
+ *
+ *      -xi/(1 - p^{-xi})
+ */
+STATIC double
+isf_genpareto(double p, double mu, double sigma, double xi)
+{
+  if (fabs(xi) <= 1e-20)
+    return mu - sigma*log(p);
+  else
+    return mu + sigma*expm1(-xi*log(p))/xi;
+}
+
+/*******************************************************************/
+
+/**
+ * Deterministic samplers, parametrized by uniform integer and (0,1]
+ * samples.  No guarantees are made about _which_ mapping from the
+ * integer and (0,1] samples these use; all that is guaranteed is the
+ * distribution of the outputs conditioned on a uniform distribution on
+ * the inputs.  The automatic tests in test_prob_distr.c double-check
+ * the particular mappings we use.
+ *
+ * Beware: Unlike random_uniform_01(), these are not guaranteed to be
+ * supported on all possible outputs.  See Ilya Mironov, `On the
+ * Significance of the Least Significant Bits for Differential
+ * Privacy', for an example of what can go wrong if you try to use
+ * these to conceal information from an adversary but you expose the
+ * specific full-precision floating-point values.
+ *
+ * Note: None of these samplers use rejection sampling; they are all
+ * essentially inverse-CDF transforms with tweaks.  If you were to add,
+ * say, a Gamma sampler with the Marsaglia-Tsang method, you would have
+ * to parametrize it by a potentially infinite stream of uniform (and
+ * perhaps normal) samples rather than a fixed number, which doesn't
+ * make for quite as nice automatic testing as for these.
+ */
+
+/**
+ * Deterministically sample from the interval [a, b], indexed by a
+ * uniform random floating-point number p0 in (0, 1].
+ *
+ * Note that even if p0 is nonzero, the result may be equal to a, if
+ * ulp(a)/2 is nonnegligible, e.g. if a = 1.  For maximum resolution,
+ * arrange |a| <= |b|.
+ */
+STATIC double
+sample_uniform_interval(double p0, double a, double b)
+{
+  /*
+   * XXX Prove that the distribution is, in fact, uniform on
+   * [a,b], particularly around p0 = 1, or at least has very
+   * small deviation from uniform, quantified appropriately
+   * (e.g., like in Monahan 1984, or by KL divergence).  It
+   * almost certainly does but it would be nice to quantify the
+   * error.
+   */
+  if ((a <= 0 && 0 <= b) || (b <= 0 && 0 <= a)) {
+    /*
+     * When ab < 0, (1 - t) a + t b is monotonic, since for
+     * a <= b it is a sum of nondecreasing functions of t,
+     * and for b <= a, of nonincreasing functions of t.
+     * Further, clearly at 0 and 1 it attains a and b,
+     * respectively.  Hence it is bounded within [a, b].
+     */
+    return (1 - p0)*a + p0*b;
+  } else {
+    /*
+     * a + (b - a) t is monotonic -- it is obviously a
+     * nondecreasing function of t for a <= b.  Further, it
+     * attains a at 0, and while it may overshoot b at 1,
+     * we have a
+     *
+     * Theorem.  If 0 <= t < 1, then the floating-point
+     * evaluation of a + (b - a) t is bounded in [a, b].
+     *
+     * Lemma 1.  If 0 <= t < 1 is a floating-point number,
+     * then for any normal floating-point number x except
+     * the smallest in magnitude, |round(x*t)| < |x|.
+     *
+     * Proof.  WLOG, assume x >= 0.  Since the rounding
+     * function and t |---> x*t are nondecreasing, their
+     * composition t |---> round(x*t) is also
+     * nondecreasing, so it suffices to consider the
+     * largest floating-point number below 1, in particular
+     * t = 1 - ulp(1)/2.
+     *
+     * Case I: If x is a power of two, then the next
+     * floating-point number below x is x - ulp(x)/2 = x -
+     * x*ulp(1)/2 = x*(1 - ulp(1)/2) = x*t, so, since x*t
+     * is a floating-point number, multiplication is exact,
+     * and thus round(x*t) = x*t < x.
+     *
+     * Case II: If x is not a power of two, then the
+     * greatest lower bound of real numbers rounded to x is
+     * x - ulp(x)/2 = x - ulp(T(x))/2 = x - T(x)*ulp(1)/2,
+     * where T(X) is the largest power of two below x.
+     * Anything below this bound is rounded to a
+     * floating-point number smaller than x, and x*t = x*(1
+     * - ulp(1)/2) = x - x*ulp(1)/2 < x - T(x)*ulp(1)/2
+     * since T(x) < x, so round(x*t) < x*t < x.  QED.
+     *
+     * Lemma 2.  If x and y are subnormal, then round(x +
+     * y) = x + y.
+     *
+     * Proof.  It is a matter of adding the significands,
+     * since if we treat subnormals as having an implicit
+     * zero bit before the `binary' point, their exponents
+     * are all the same.  There is at most one carry/borrow
+     * bit, which can always be acommodated either in a
+     * subnormal, or, at largest, in the implicit one bit
+     * of a normal.
+     *
+     * Lemma 3.  Let x and y be floating-point numbers.  If
+     * round(x - y) is subnormal or zero, then it is equal
+     * to x - y.
+     *
+     * Proof.  Case I (equal): round(x - y) = 0 iff x = y;
+     * hence if round(x - y) = 0, then round(x - y) = 0 = x
+     * - y.
+     *
+     * Case II (subnormal/subnormal): If x and y are both
+     * subnormal, this follows directly from Lemma 2.
+     *
+     * Case IIIa (normal/subnormal): If x is normal and y
+     * is subnormal, then x and y must share sign, or else
+     * x - y would be larger than x and thus rounded to
+     * normal.  If s is the smallest normal positive
+     * floating-point number, |x| < 2s since by
+     * construction 2s - |y| is normal for all subnormal y.
+     * This means that x and y must have the same exponent,
+     * so the difference is the difference of significands,
+     * which is exact.
+     *
+     * Case IIIb (subnormal/normal): Same as case IIIa for
+     * -(y - x).
+     *
+     * Case IV (normal/normal): If x and y are both normal,
+     * then they must share sign, or else x - y would be
+     * larger than x and thus rounded to normal.  Note that
+     * |y| < 2|x|, for if |y| >= 2|x|, then |x| - |y| <=
+     * -|x| but -|x| is normal like x.  Also, |x|/2 < |y|:
+     * if |x|/2 is subnormal, it must hold because y is
+     * normal; if |x|/2 is normal, then |x|/2 >= s, so
+     * since |x| - |y| < s,
+     *
+     *  |x|/2 = |x| - |x|/2 <= |x| - s <= |y|;
+     *
+     * that is, |x|/2 < |y| < 2|x|, so by the Sterbenz
+     * lemma, round(x - y) = x - y.  QED.
+     *
+     * Proof of theorem.  WLOG, assume 0 <= a <= b.  Since
+     * round(a + round(round(b - a)*t) is nondecreasing in
+     * t and attains a at 0, the lower end of the bound is
+     * trivial; we must show the upper end of the bound
+     * strictly.  It suffices to show this for the largest
+     * floating-point number below 1, namely 1 - ulp(1)/2.
+     *
+     * Case I: round(b - a) is normal.  Then it is at most
+     * the smallest floating-point number above b - a.  By
+     * Lemma 1, round(round(b - a)*t) < round(b - a).
+     * Since the inequality is strict, and since
+     * round(round(b - a)*t) is a floating-point number
+     * below round(b - a), and since there are no
+     * floating-point numbers between b - a and round(b -
+     * a), we must have round(round(b - a)*t) < b - a.
+     * Then since y |---> round(a + y) is nondecreasing, we
+     * must have
+     *
+     *  round(a + round(round(b - a)*t))
+     *  <= round(a + (b - a))
+     *   = round(b) = b.
+     *
+     * Case II: round(b - a) is subnormal.  In this case,
+     * Lemma 1 falls apart -- we are not guaranteed the
+     * strict inequality.  However, by Lemma 3, the
+     * difference is exact: round(b - a) = b - a.  Thus,
+     *
+     *  round(a + round(round(b - a)*t))
+     *  <= round(a + round((b - a)*t))
+     *  <= round(a + (b - a))
+     *   = round(b)
+     *   = b,
+     *
+     * QED.
+     */
+
+    /* p0 is restricted to [0,1], but we use >= to silence -Wfloat-equal.  */
+    if (p0 >= 1)
+      return b;
+    return a + (b - a)*p0;
+  }
+}
+
+/**
+ * Deterministically sample from the standard logistic distribution,
+ * indexed by a uniform random 32-bit integer s and uniform random
+ * floating-point numbers t and p0 in (0, 1].
+ */
+STATIC double
+sample_logistic(uint32_t s, double t, double p0)
+{
+  double sign = (s & 1) ? -1 : +1;
+  double r;
+
+  /*
+   * We carve up the interval (0, 1) into subregions to compute
+   * the inverse CDF precisely:
+   *
+   * A = (0, 1/(1 + e)] ---> (-\infty, -1]
+   * B = [1/(1 + e), 1/2] ---> [-1, 0]
+   * C = [1/2, 1 - 1/(1 + e)] ---> [0, 1]
+   * D = [1 - 1/(1 + e), 1) ---> [1, +\infty)
+   *
+   * Cases D and C are mirror images of cases A and B,
+   * respectively, so we choose between them by the sign chosen
+   * by a fair coin toss.  We choose between cases A and B by a
+   * coin toss weighted by
+   *
+   *    2/(1 + e) = 1 - [1/2 - 1/(1 + e)]/(1/2):
+   *
+   * if it comes up heads, scale p0 into a uniform (0, 1/(1 + e)]
+   * sample p; if it comes up tails, scale p0 into a uniform (0,
+   * 1/2 - 1/(1 + e)] sample and compute the inverse CDF of p =
+   * 1/2 - p0.
+   */
+  if (t <= 2/(1 + exp(1))) {
+    /* p uniform in (0, 1/(1 + e)], represented by p.  */
+    p0 /= 1 + exp(1);
+    r = logit(p0);
+  } else {
+    /*
+     * p uniform in [1/(1 + e), 1/2), actually represented
+     * by p0 = 1/2 - p uniform in (0, 1/2 - 1/(1 + e)], so
+     * that p = 1/2 - p.
+     */
+    p0 *= 0.5 - 1/(1 + exp(1));
+    r = logithalf(p0);
+  }
+
+  /*
+   * We have chosen from the negative half of the standard
+   * logistic distribution, which is symmetric with the positive
+   * half.  Now use the sign to choose uniformly between them.
+   */
+  return sign*r;
+}
+
+/**
+ * Deterministically sample from the logistic distribution scaled by
+ * sigma and translated by mu.
+ */
+static double
+sample_logistic_locscale(uint32_t s, double t, double p0, double mu,
+    double sigma)
+{
+
+  return mu + sigma*sample_logistic(s, t, p0);
+}
+
+/**
+ * Deterministically sample from the standard log-logistic
+ * distribution, indexed by a uniform random 32-bit integer s and a
+ * uniform random floating-point number p0 in (0, 1].
+ */
+STATIC double
+sample_log_logistic(uint32_t s, double p0)
+{
+
+  /*
+   * Carve up the interval (0, 1) into (0, 1/2] and [1/2, 1); the
+   * condition numbers of the icdf and the isf coincide at 1/2.
+   */
+  p0 *= 0.5;
+  if ((s & 1) == 0) {
+    /* p = p0 in (0, 1/2] */
+    return p0/(1 - p0);
+  } else {
+    /* p = 1 - p0 in [1/2, 1) */
+    return (1 - p0)/p0;
+  }
+}
+
+/**
+ * Deterministically sample from the log-logistic distribution with
+ * scale alpha and shape beta.
+ */
+static double
+sample_log_logistic_scaleshape(uint32_t s, double p0, double alpha,
+    double beta)
+{
+  double x = sample_log_logistic(s, p0);
+
+  return alpha*pow(x, 1/beta);
+}
+
+/**
+ * Deterministically sample from the standard exponential distribution,
+ * indexed by a uniform random 32-bit integer s and a uniform random
+ * floating-point number p0 in (0, 1].
+ */
+static double
+sample_exponential(uint32_t s, double p0)
+{
+  /*
+   * We would like to evaluate log(p) for p near 0, and log1p(-p)
+   * for p near 1.  Simply carve the interval into (0, 1/2] and
+   * [1/2, 1) by a fair coin toss.
+   */
+  p0 *= 0.5;
+  if ((s & 1) == 0)
+    /* p = p0 in (0, 1/2] */
+    return -log(p0);
+  else
+    /* p = 1 - p0 in [1/2, 1) */
+    return -log1p(-p0);
+}
+
+/**
+ * Deterministically sample from a Weibull distribution with scale
+ * lambda and shape k -- just an exponential with a shape parameter in
+ * addition to a scale parameter.  (Yes, lambda really is the scale,
+ * _not_ the rate.)
+ */
+STATIC double
+sample_weibull(uint32_t s, double p0, double lambda, double k)
+{
+
+  return lambda*pow(sample_exponential(s, p0), 1/k);
+}
+
+/**
+ * Deterministically sample from the generalized Pareto distribution
+ * with shape xi, indexed by a uniform random 32-bit integer s and a
+ * uniform random floating-point number p0 in (0, 1].
+ */
+STATIC double
+sample_genpareto(uint32_t s, double p0, double xi)
+{
+  double x = sample_exponential(s, p0);
+
+  /*
+   * Write f(xi) = (e^{xi x} - 1)/xi for xi near zero as the
+   * absolutely convergent Taylor series
+   *
+   * f(x) = (1/xi) (xi x + \sum_{n=2}^\infty (xi x)^n/n!)
+   *      = x + (1/xi) \sum_{n=2}^\inty (xi x)^n/n!
+   *      = x + \sum_{n=2}^\infty xi^{n-1} x^n/n!
+   *      = x + x \sum_{n=2}^\infty (xi x)^{n-1}/n!
+   *      = x (1 + \sum_{n=2}^\infty (xi x)^{n-1}/n!).
+   *
+   * d = \sum_{n=2}^\infty (xi x)^{n-1}/n! is the relative error
+   * of f(x) from x.  If |xi| < eps/4x, then
+   *
+   * |d| <= \sum_{n=2}^\infty |xi x|^{n-1}/n!
+   *     <= \sum_{n=2}^\infty (eps/4)^{n-1}/n!
+   *     <= \sum_{n=1}^\infty (eps/4)
+   *      = (eps/4) \sum_{n=0}^\infty (eps/4)^n
+   *      = (eps/4)/(1 - eps/4)
+   *      < eps/2,
+   *
+   * for any 0 < eps < 2.  Hence, as long as |xi| < eps/2x, f(xi)
+   * = x (1 + d) for |d| <= eps/2, so x = f(xi) (1 + d') for |d'|
+   * <= eps.  What bound should we use for x?
+   *
+   * - If x is exponentially distributed, x > 200 with
+   *   probability below e^{-200} << 2^{-256}, i.e. never.
+   *
+   * - If x is computed by -log(U) for U in (0, 1], x is
+   *   guaranteed to be below 1000 in IEEE 754 binary64
+   *   floating-point.
+   *
+   * We can safely cut xi off at 1e-20 < eps/4000 and attain an
+   * error bounded by 0.5 ulp for this expression.
+   */
+  return (fabs(xi) < 1e-20 ? x : expm1(xi*x)/xi);
+}
+
+/**
+ * Deterministically sample from a generalized Pareto distribution with
+ * shape xi, scaled by sigma and translated by mu.
+ */
+static double
+sample_genpareto_locscale(uint32_t s, double p0, double mu, double sigma,
+    double xi)
+{
+
+  return mu + sigma*sample_genpareto(s, p0, xi);
+}
+
+/**
+ * Deterministically sample from the geometric distribution with
+ * per-trial success probability p.
+ *
+ * XXX Quantify the error (KL divergence?) of this
+ * ceiling-of-exponential sampler from a true geometric distribution,
+ * which we could get by rejection sampling.  Relevant papers:
+ *
+ *      John F. Monahan, `Accuracy in Random Number Generation',
+ *      Mathematics of Computation 45(172), October 1984, pp. 559--568.
+*https://pdfs.semanticscholar.org/aca6/74b96da1df77b2224e8cfc5dd6d61a471632.pdf
+ *
+ *      Karl Bringmann and Tobias Friedrich, `Exact and Efficient
+ *      Generation of Geometric Random Variates and Random Graphs', in
+ *      Proceedings of the 40th International Colloaquium on Automata,
+ *      Languages, and Programming -- ICALP 2013, Springer LNCS 7965,
+ *      pp.267--278.
+ *      https://doi.org/10.1007/978-3-642-39206-1_23
+ *      https://people.mpi-inf.mpg.de/~kbringma/paper/2013ICALP-1.pdf
+ */
+static double
+sample_geometric(uint32_t s, double p0, double p)
+{
+  double x = sample_exponential(s, p0);
+
+  /* This is actually a check against 1, but we do >= so that the compiler
+     does not raise a -Wfloat-equal */
+  if (p >= 1)
+    return 1;
+
+  return ceil(-x/log1p(-p));
+}
+
+/*******************************************************************/
+
+/** Public API for probability distributions:
+ *
+ *  For each probability distribution we define each public functions
+ *  (sample/cdf/sf/icdf/isf) as part of its dist_ops structure.
+ */
+
+const char *
+dist_name(const struct dist *dist)
+{
+  return dist->ops->name;
+}
+
+double
+dist_sample(const struct dist *dist)
+{
+  return dist->ops->sample(dist);
+}
+
+double
+dist_cdf(const struct dist *dist, double x)
+{
+  return dist->ops->cdf(dist, x);
+}
+
+double
+dist_sf(const struct dist *dist, double x)
+{
+  return dist->ops->sf(dist, x);
+}
+
+double
+dist_icdf(const struct dist *dist, double p)
+{
+  return dist->ops->icdf(dist, p);
+}
+
+double
+dist_isf(const struct dist *dist, double p)
+{
+  return dist->ops->isf(dist, p);
+}
+
+/** Functions for uniform distribution */
+
+static double
+uniform_sample(const struct dist *dist)
+{
+  const struct uniform *U = const_container_of(dist, struct uniform,
+    base);
+  double p0 = random_uniform_01();
+
+  return sample_uniform_interval(p0, U->a, U->b);
+}
+
+static double
+uniform_cdf(const struct dist *dist, double x)
+{
+  const struct uniform *U = const_container_of(dist, struct uniform,
+    base);
+
+  if (x < U->a)
+    return 0;
+  else if (x < U->b)
+    return (x - U->a)/(U->b - U->a);
+  else
+    return 1;
+}
+
+static double
+uniform_sf(const struct dist *dist, double x)
+{
+  const struct uniform *U = const_container_of(dist, struct uniform,
+    base);
+
+  if (x > U->b)
+    return 0;
+  else if (x > U->a)
+    return (U->b - x)/(U->b - U->a);
+  else
+    return 1;
+}
+
+static double
+uniform_icdf(const struct dist *dist, double p)
+{
+  const struct uniform *U = const_container_of(dist, struct uniform,
+    base);
+  double w = U->b - U->a;
+
+  return (p < 0.5 ? (U->a + w*p) : (U->b - w*(1 - p)));
+}
+
+static double
+uniform_isf(const struct dist *dist, double p)
+{
+  const struct uniform *U = const_container_of(dist, struct uniform,
+    base);
+  double w = U->b - U->a;
+
+  return (p < 0.5 ? (U->b - w*p) : (U->a + w*(1 - p)));
+}
+
+const struct dist_ops uniform_ops = {
+  .name = "uniform",
+  .sample = uniform_sample,
+  .cdf = uniform_cdf,
+  .sf = uniform_sf,
+  .icdf = uniform_icdf,
+  .isf = uniform_isf,
+};
+
+/** Functions for logistic distribution: */
+
+static double
+logistic_sample(const struct dist *dist)
+{
+  const struct logistic *L = const_container_of(dist, struct logistic,
+    base);
+  uint32_t s = crypto_rand_u32();
+  double t = random_uniform_01();
+  double p0 = random_uniform_01();
+
+  return sample_logistic_locscale(s, t, p0, L->mu, L->sigma);
+}
+
+static double
+logistic_cdf(const struct dist *dist, double x)
+{
+  const struct logistic *L = const_container_of(dist, struct logistic,
+    base);
+
+  return cdf_logistic(x, L->mu, L->sigma);
+}
+
+static double
+logistic_sf(const struct dist *dist, double x)
+{
+  const struct logistic *L = const_container_of(dist, struct logistic,
+    base);
+
+  return sf_logistic(x, L->mu, L->sigma);
+}
+
+static double
+logistic_icdf(const struct dist *dist, double p)
+{
+  const struct logistic *L = const_container_of(dist, struct logistic,
+    base);
+
+  return icdf_logistic(p, L->mu, L->sigma);
+}
+
+static double
+logistic_isf(const struct dist *dist, double p)
+{
+  const struct logistic *L = const_container_of(dist, struct logistic,
+    base);
+
+  return isf_logistic(p, L->mu, L->sigma);
+}
+
+const struct dist_ops logistic_ops = {
+  .name = "logistic",
+  .sample = logistic_sample,
+  .cdf = logistic_cdf,
+  .sf = logistic_sf,
+  .icdf = logistic_icdf,
+  .isf = logistic_isf,
+};
+
+/** Functions for log-logistic distribution: */
+
+static double
+log_logistic_sample(const struct dist *dist)
+{
+  const struct log_logistic *LL = const_container_of(dist, struct
+    log_logistic, base);
+  uint32_t s = crypto_rand_u32();
+  double p0 = random_uniform_01();
+
+  return sample_log_logistic_scaleshape(s, p0, LL->alpha, LL->beta);
+}
+
+static double
+log_logistic_cdf(const struct dist *dist, double x)
+{
+  const struct log_logistic *LL = const_container_of(dist,
+    struct log_logistic, base);
+
+  return cdf_log_logistic(x, LL->alpha, LL->beta);
+}
+
+static double
+log_logistic_sf(const struct dist *dist, double x)
+{
+  const struct log_logistic *LL = const_container_of(dist,
+    struct log_logistic, base);
+
+  return sf_log_logistic(x, LL->alpha, LL->beta);
+}
+
+static double
+log_logistic_icdf(const struct dist *dist, double p)
+{
+  const struct log_logistic *LL = const_container_of(dist,
+    struct log_logistic, base);
+
+  return icdf_log_logistic(p, LL->alpha, LL->beta);
+}
+
+static double
+log_logistic_isf(const struct dist *dist, double p)
+{
+  const struct log_logistic *LL = const_container_of(dist,
+    struct log_logistic, base);
+
+  return isf_log_logistic(p, LL->alpha, LL->beta);
+}
+
+const struct dist_ops log_logistic_ops = {
+  .name = "log logistic",
+  .sample = log_logistic_sample,
+  .cdf = log_logistic_cdf,
+  .sf = log_logistic_sf,
+  .icdf = log_logistic_icdf,
+  .isf = log_logistic_isf,
+};
+
+/** Functions for Weibull distribution */
+
+static double
+weibull_sample(const struct dist *dist)
+{
+  const struct weibull *W = const_container_of(dist, struct weibull,
+    base);
+  uint32_t s = crypto_rand_u32();
+  double p0 = random_uniform_01();
+
+  return sample_weibull(s, p0, W->lambda, W->k);
+}
+
+static double
+weibull_cdf(const struct dist *dist, double x)
+{
+  const struct weibull *W = const_container_of(dist, struct weibull,
+    base);
+
+  return cdf_weibull(x, W->lambda, W->k);
+}
+
+static double
+weibull_sf(const struct dist *dist, double x)
+{
+  const struct weibull *W = const_container_of(dist, struct weibull,
+    base);
+
+  return sf_weibull(x, W->lambda, W->k);
+}
+
+static double
+weibull_icdf(const struct dist *dist, double p)
+{
+  const struct weibull *W = const_container_of(dist, struct weibull,
+    base);
+
+  return icdf_weibull(p, W->lambda, W->k);
+}
+
+static double
+weibull_isf(const struct dist *dist, double p)
+{
+  const struct weibull *W = const_container_of(dist, struct weibull,
+    base);
+
+  return isf_weibull(p, W->lambda, W->k);
+}
+
+const struct dist_ops weibull_ops = {
+  .name = "Weibull",
+  .sample = weibull_sample,
+  .cdf = weibull_cdf,
+  .sf = weibull_sf,
+  .icdf = weibull_icdf,
+  .isf = weibull_isf,
+};
+
+/** Functions for generalized Pareto distributions */
+
+static double
+genpareto_sample(const struct dist *dist)
+{
+  const struct genpareto *GP = const_container_of(dist, struct genpareto,
+    base);
+  uint32_t s = crypto_rand_u32();
+  double p0 = random_uniform_01();
+
+  return sample_genpareto_locscale(s, p0, GP->mu, GP->sigma, GP->xi);
+}
+
+static double
+genpareto_cdf(const struct dist *dist, double x)
+{
+  const struct genpareto *GP = const_container_of(dist, struct genpareto,
+    base);
+
+  return cdf_genpareto(x, GP->mu, GP->sigma, GP->xi);
+}
+
+static double
+genpareto_sf(const struct dist *dist, double x)
+{
+  const struct genpareto *GP = const_container_of(dist, struct genpareto,
+    base);
+
+  return sf_genpareto(x, GP->mu, GP->sigma, GP->xi);
+}
+
+static double
+genpareto_icdf(const struct dist *dist, double p)
+{
+  const struct genpareto *GP = const_container_of(dist, struct genpareto,
+    base);
+
+  return icdf_genpareto(p, GP->mu, GP->sigma, GP->xi);
+}
+
+static double
+genpareto_isf(const struct dist *dist, double p)
+{
+  const struct genpareto *GP = const_container_of(dist, struct genpareto,
+    base);
+
+  return isf_genpareto(p, GP->mu, GP->sigma, GP->xi);
+}
+
+const struct dist_ops genpareto_ops = {
+  .name = "generalized Pareto",
+  .sample = genpareto_sample,
+  .cdf = genpareto_cdf,
+  .sf = genpareto_sf,
+  .icdf = genpareto_icdf,
+  .isf = genpareto_isf,
+};
+
+/** Functions for geometric distribution on number of trials before success */
+
+static double
+geometric_sample(const struct dist *dist)
+{
+  const struct geometric *G = const_container_of(dist, struct geometric, base);
+  uint32_t s = crypto_rand_u32();
+  double p0 = random_uniform_01();
+
+  return sample_geometric(s, p0, G->p);
+}
+
+static double
+geometric_cdf(const struct dist *dist, double x)
+{
+  const struct geometric *G = const_container_of(dist, struct geometric, base);
+
+  if (x < 1)
+    return 0;
+  /* 1 - (1 - p)^floor(x) = 1 - e^{floor(x) log(1 - p)} */
+  return -expm1(floor(x)*log1p(-G->p));
+}
+
+static double
+geometric_sf(const struct dist *dist, double x)
+{
+  const struct geometric *G = const_container_of(dist, struct geometric, base);
+
+  if (x < 1)
+    return 0;
+  /* (1 - p)^floor(x) = e^{ceil(x) log(1 - p)} */
+  return exp(floor(x)*log1p(-G->p));
+}
+
+static double
+geometric_icdf(const struct dist *dist, double p)
+{
+  const struct geometric *G = const_container_of(dist, struct geometric, base);
+
+  return log1p(-p)/log1p(-G->p);
+}
+
+static double
+geometric_isf(const struct dist *dist, double p)
+{
+  const struct geometric *G = const_container_of(dist, struct geometric, base);
+
+  return log(p)/log1p(-G->p);
+}
+
+const struct dist_ops geometric_ops = {
+  .name = "geometric (1-based)",
+  .sample = geometric_sample,
+  .cdf = geometric_cdf,
+  .sf = geometric_sf,
+  .icdf = geometric_icdf,
+  .isf = geometric_isf,
+};

+ 158 - 0
src/lib/math/prob_distr.h

@@ -0,0 +1,158 @@
+
+/**
+ * \file prob_distr.h
+ *
+ * \brief Header for prob_distr.c
+ **/
+
+#ifndef TOR_PROB_DISTR_H
+#define TOR_PROB_DISTR_H
+
+#include "lib/cc/compat_compiler.h"
+#include "lib/cc/torint.h"
+#include "lib/testsupport/testsupport.h"
+
+/**
+ * Container for distribution parameters for sampling, CDF, &c.
+ */
+struct dist {
+  const struct dist_ops *ops;
+};
+
+#define DIST_BASE(OPS)  { .ops = (OPS) }
+#define DIST_BASE_TYPED(OPS, OBJ, TYPE)                         \
+  DIST_BASE((OPS) + 0*sizeof(&(OBJ) - (const TYPE *)&(OBJ)))
+
+const char *dist_name(const struct dist *);
+double dist_sample(const struct dist *);
+double dist_cdf(const struct dist *, double x);
+double dist_sf(const struct dist *, double x);
+double dist_icdf(const struct dist *, double p);
+double dist_isf(const struct dist *, double p);
+
+struct dist_ops {
+  const char *name;
+  double (*sample)(const struct dist *);
+  double (*cdf)(const struct dist *, double x);
+  double (*sf)(const struct dist *, double x);
+  double (*icdf)(const struct dist *, double p);
+  double (*isf)(const struct dist *, double p);
+};
+
+/* Geometric distribution on positive number of trials before first success */
+
+struct geometric {
+  struct dist base;
+  double p; /* success probability */
+};
+
+extern const struct dist_ops geometric_ops;
+
+#define GEOMETRIC(OBJ)                                      \
+  DIST_BASE_TYPED(&geometric_ops, OBJ, struct geometric)
+
+/* Pareto distribution */
+
+struct genpareto {
+  struct dist base;
+  double mu;
+  double sigma;
+  double xi;
+};
+
+extern const struct dist_ops genpareto_ops;
+
+#define GENPARETO(OBJ)                                      \
+  DIST_BASE_TYPED(&genpareto_ops, OBJ, struct genpareto)
+
+/* Weibull distribution */
+
+struct weibull {
+  struct dist base;
+  double lambda;
+  double k;
+};
+
+extern const struct dist_ops weibull_ops;
+
+#define WEIBULL(OBJ)                                    \
+  DIST_BASE_TYPED(&weibull_ops, OBJ, struct weibull)
+
+/* Log-logistic distribution */
+
+struct log_logistic {
+  struct dist base;
+  double alpha;
+  double beta;
+};
+
+extern const struct dist_ops log_logistic_ops;
+
+#define LOG_LOGISTIC(OBJ)                                       \
+  DIST_BASE_TYPED(&log_logistic_ops, OBJ, struct log_logistic)
+
+/* Logistic distribution */
+
+struct logistic {
+  struct dist base;
+  double mu;
+  double sigma;
+};
+
+extern const struct dist_ops logistic_ops;
+
+#define LOGISTIC(OBJ)                                   \
+  DIST_BASE_TYPED(&logistic_ops, OBJ, struct logistic)
+
+/* Uniform distribution */
+
+struct uniform {
+  struct dist base;
+  double a;
+  double b;
+};
+
+extern const struct dist_ops uniform_ops;
+
+#define UNIFORM(OBJ)                                    \
+  DIST_BASE_TYPED(&uniform_ops, OBJ, struct uniform)
+
+/** Only by unittests */
+
+#ifdef PROB_DISTR_PRIVATE
+
+STATIC double logithalf(double p0);
+STATIC double logit(double p);
+
+STATIC double random_uniform_01(void);
+
+STATIC double logistic(double x);
+STATIC double cdf_logistic(double x, double mu, double sigma);
+STATIC double sf_logistic(double x, double mu, double sigma);
+STATIC double icdf_logistic(double p, double mu, double sigma);
+STATIC double isf_logistic(double p, double mu, double sigma);
+STATIC double sample_logistic(uint32_t s, double t, double p0);
+
+STATIC double cdf_log_logistic(double x, double alpha, double beta);
+STATIC double sf_log_logistic(double x, double alpha, double beta);
+STATIC double icdf_log_logistic(double p, double alpha, double beta);
+STATIC double isf_log_logistic(double p, double alpha, double beta);
+STATIC double sample_log_logistic(uint32_t s, double p0);
+
+STATIC double cdf_weibull(double x, double lambda, double k);
+STATIC double sf_weibull(double x, double lambda, double k);
+STATIC double icdf_weibull(double p, double lambda, double k);
+STATIC double isf_weibull(double p, double lambda, double k);
+STATIC double sample_weibull(uint32_t s, double p0, double lambda, double k);
+
+STATIC double sample_uniform_interval(double p0, double a, double b);
+
+STATIC double cdf_genpareto(double x, double mu, double sigma, double xi);
+STATIC double sf_genpareto(double x, double mu, double sigma, double xi);
+STATIC double icdf_genpareto(double p, double mu, double sigma, double xi);
+STATIC double isf_genpareto(double p, double mu, double sigma, double xi);
+STATIC double sample_genpareto(uint32_t s, double p0, double xi);
+
+#endif
+
+#endif

+ 13 - 0
src/lib/smartlist_core/smartlist_foreach.h

@@ -83,6 +83,19 @@
          ++var ## _sl_idx) {                                    \
          ++var ## _sl_idx) {                                    \
       var = (sl)->list[var ## _sl_idx];
       var = (sl)->list[var ## _sl_idx];
 
 
+/** Iterates over the items in smartlist <b>sl</b> in reverse order, similar to
+ *  SMARTLIST_FOREACH_BEGIN
+ *
+ * NOTE: This macro is incompatible with SMARTLIST_DEL_CURRENT.
+ */
+#define SMARTLIST_FOREACH_REVERSE_BEGIN(sl, type, var)  \
+  STMT_BEGIN                                                       \
+    int var ## _sl_idx, var ## _sl_len=(sl)->num_used;             \
+    type var;                                                      \
+    for (var ## _sl_idx = var ## _sl_len-1; var ## _sl_idx >= 0;   \
+         --var ## _sl_idx) {                                       \
+      var = (sl)->list[var ## _sl_idx];
+
 #define SMARTLIST_FOREACH_END(var)              \
 #define SMARTLIST_FOREACH_END(var)              \
     var = NULL;                                 \
     var = NULL;                                 \
     (void) var ## _sl_idx;                      \
     (void) var ## _sl_idx;                      \

+ 1 - 0
src/lib/time/.may_include

@@ -7,6 +7,7 @@ lib/log/*.h
 lib/subsys/*.h
 lib/subsys/*.h
 lib/time/*.h
 lib/time/*.h
 lib/wallclock/*.h
 lib/wallclock/*.h
+lib/defs/time.h
 
 
 # For load_windows_system_lib.
 # For load_windows_system_lib.
 lib/fs/winlib.h
 lib/fs/winlib.h

+ 2 - 2
src/lib/time/compat_time.c

@@ -787,8 +787,8 @@ monotime_absolute_nsec(void)
   return monotime_diff_nsec(&initialized_at, &now);
   return monotime_diff_nsec(&initialized_at, &now);
 }
 }
 
 
-uint64_t
-monotime_absolute_usec(void)
+MOCK_IMPL(uint64_t,
+monotime_absolute_usec,(void))
 {
 {
   return monotime_absolute_nsec() / 1000;
   return monotime_absolute_nsec() / 1000;
 }
 }

+ 1 - 1
src/lib/time/compat_time.h

@@ -199,7 +199,7 @@ uint64_t monotime_absolute_nsec(void);
 /**
 /**
  * Return the number of microseconds since the timer system was initialized.
  * Return the number of microseconds since the timer system was initialized.
  */
  */
-uint64_t monotime_absolute_usec(void);
+MOCK_DECL(uint64_t, monotime_absolute_usec,(void));
 /**
 /**
  * Return the number of milliseconds since the timer system was initialized.
  * Return the number of milliseconds since the timer system was initialized.
  */
  */

+ 1 - 2
src/lib/time/tvdiff.c

@@ -11,6 +11,7 @@
 #include "lib/time/tvdiff.h"
 #include "lib/time/tvdiff.h"
 
 
 #include "lib/cc/compat_compiler.h"
 #include "lib/cc/compat_compiler.h"
+#include "lib/defs/time.h"
 #include "lib/log/log.h"
 #include "lib/log/log.h"
 
 
 #ifdef _WIN32
 #ifdef _WIN32
@@ -20,8 +21,6 @@
 #include <sys/time.h>
 #include <sys/time.h>
 #endif
 #endif
 
 
-#define TOR_USEC_PER_SEC 1000000
-
 /** Return the difference between start->tv_sec and end->tv_sec.
 /** Return the difference between start->tv_sec and end->tv_sec.
  * Returns INT64_MAX on overflow and underflow.
  * Returns INT64_MAX on overflow and underflow.
  */
  */

+ 6 - 2
src/rust/protover/protover.rs

@@ -46,6 +46,7 @@ pub enum Protocol {
     LinkAuth,
     LinkAuth,
     Microdesc,
     Microdesc,
     Relay,
     Relay,
+    Padding,
 }
 }
 
 
 impl fmt::Display for Protocol {
 impl fmt::Display for Protocol {
@@ -73,6 +74,7 @@ impl FromStr for Protocol {
             "LinkAuth" => Ok(Protocol::LinkAuth),
             "LinkAuth" => Ok(Protocol::LinkAuth),
             "Microdesc" => Ok(Protocol::Microdesc),
             "Microdesc" => Ok(Protocol::Microdesc),
             "Relay" => Ok(Protocol::Relay),
             "Relay" => Ok(Protocol::Relay),
+            "Padding" => Ok(Protocol::Padding),
             _ => Err(ProtoverError::UnknownProtocol),
             _ => Err(ProtoverError::UnknownProtocol),
         }
         }
     }
     }
@@ -163,7 +165,8 @@ pub(crate) fn get_supported_protocols_cstr() -> &'static CStr {
              Link=1-5 \
              Link=1-5 \
              LinkAuth=3 \
              LinkAuth=3 \
              Microdesc=1-2 \
              Microdesc=1-2 \
-             Relay=1-2"
+             Relay=1-2 \
+             Padding=1"
         )
         )
     } else {
     } else {
         cstr!(
         cstr!(
@@ -176,7 +179,8 @@ pub(crate) fn get_supported_protocols_cstr() -> &'static CStr {
              Link=1-5 \
              Link=1-5 \
              LinkAuth=1,3 \
              LinkAuth=1,3 \
              Microdesc=1-2 \
              Microdesc=1-2 \
-             Relay=1-2"
+             Relay=1-2 \
+             Padding=1"
         )
         )
     }
     }
 }
 }

+ 1 - 0
src/test/Makefile.nmake

@@ -19,6 +19,7 @@ TEST_OBJECTS = test.obj test_addr.obj test_channel.obj test_channeltls.obj \
 	test_cell_formats.obj test_relay.obj test_replay.obj \
 	test_cell_formats.obj test_relay.obj test_replay.obj \
 	test_channelpadding.obj \
 	test_channelpadding.obj \
 	test_circuitstats.obj \
 	test_circuitstats.obj \
+	test_circuitpadding.obj \
 	test_scheduler.obj test_introduce.obj test_hs.obj tinytest.obj
 	test_scheduler.obj test_introduce.obj test_hs.obj tinytest.obj
 
 
 tinytest.obj: ..\ext\tinytest.c
 tinytest.obj: ..\ext\tinytest.c

+ 3 - 0
src/test/include.am

@@ -101,6 +101,7 @@ src_test_test_SOURCES += \
 	src/test/test_cell_queue.c \
 	src/test/test_cell_queue.c \
 	src/test/test_channel.c \
 	src/test/test_channel.c \
 	src/test/test_channelpadding.c \
 	src/test/test_channelpadding.c \
+	src/test/test_circuitpadding.c \
 	src/test/test_channeltls.c \
 	src/test/test_channeltls.c \
 	src/test/test_checkdir.c \
 	src/test/test_checkdir.c \
 	src/test/test_circuitlist.c \
 	src/test/test_circuitlist.c \
@@ -156,6 +157,7 @@ src_test_test_SOURCES += \
 	src/test/test_periodic_event.c \
 	src/test/test_periodic_event.c \
 	src/test/test_policy.c \
 	src/test/test_policy.c \
 	src/test/test_process.c \
 	src/test/test_process.c \
+	src/test/test_prob_distr.c \
 	src/test/test_procmon.c \
 	src/test/test_procmon.c \
 	src/test/test_proto_http.c \
 	src/test/test_proto_http.c \
 	src/test/test_proto_misc.c \
 	src/test/test_proto_misc.c \
@@ -206,6 +208,7 @@ src_test_test_slow_SOURCES += \
 	src/test/test_slow.c \
 	src/test/test_slow.c \
 	src/test/test_crypto_slow.c \
 	src/test/test_crypto_slow.c \
 	src/test/test_process_slow.c \
 	src/test/test_process_slow.c \
+	src/test/test_prob_distr.c \
 	src/test/testing_common.c \
 	src/test/testing_common.c \
 	src/test/testing_rsakeys.c \
 	src/test/testing_rsakeys.c \
 	src/ext/tinytest.c
 	src/ext/tinytest.c

+ 64 - 0
src/test/prob_distr_mpfr_ref.c

@@ -0,0 +1,64 @@
+/* Copyright 2012-2018, The Tor Project, Inc
+ * See LICENSE for licensing information */
+
+/** prob_distr_mpfr_ref.c
+ *
+ * Example reference file for GNU MPFR vectors tested in test_prob_distr.c .
+ * Code by Riastradh.
+ */
+
+#include <complex.h>
+#include <float.h>
+#include <math.h>
+#include <stdio.h>
+
+/* Must come after <stdio.h> so we get mpfr_printf.  */
+#include <mpfr.h>
+
+/*  gcc -o mpfr prob_distr_mpfr_ref.c -lmpfr -lm */
+
+/* Computes logit(p) for p = .49999 */
+int
+main(void)
+{
+  mpfr_t p, q, r;
+  mpfr_init(p);
+  mpfr_set_prec(p, 200);
+  mpfr_init(q);
+  mpfr_set_prec(q, 200);
+  mpfr_init(r);
+  mpfr_set_prec(r, 200);
+  mpfr_set_d(p, .49999, MPFR_RNDN);
+  mpfr_set_d(q, 1, MPFR_RNDN);
+  /* r := q - p = 1 - p */
+  mpfr_sub(r, q, p, MPFR_RNDN);
+  /* q := p/r = p/(1 - p) */
+  mpfr_div(q, p, r, MPFR_RNDN);
+  /* r := log(q) = log(p/(1 - p)) */
+  mpfr_log(r, q, MPFR_RNDN);
+  mpfr_printf("mpfr 200-bit\t%.128Rg\n", r);
+
+  /*
+   * Print a double approximation to logit three different ways.  All
+   * three agree bit for bit on the libms I tried, with the nextafter
+   * adjustment (which is well within the 10 eps relative error bound
+   * advertised).  Apparently I must have used the Goldberg expression
+   * for what I wrote down in the test case.
+   */
+  printf("mpfr 53-bit\t%.17g\n", nextafter(mpfr_get_d(r, MPFR_RNDN), 0), 0);
+  volatile double p0 = .49999;
+  printf("log1p\t\t%.17g\n", nextafter(-log1p((1 - 2*p0)/p0), 0));
+  volatile double x = (1 - 2*p0)/p0;
+  volatile double xp1 = x + 1;
+  printf("Goldberg\t%.17g\n", -x*log(xp1)/(xp1 - 1));
+
+  /*
+   * Print a bad approximation, using the naive expression, to see a
+   * lot of wrong digits, far beyond the 10 eps relative error attained
+   * by -log1p((1 - 2*p)/p).
+   */
+  printf("naive\t\t%.17g\n", log(p0/(1 - p0)));
+
+  fflush(stdout);
+  return ferror(stdout);
+}

+ 2 - 0
src/test/test.c

@@ -845,6 +845,7 @@ struct testgroup_t testgroups[] = {
   { "channeltls/", channeltls_tests },
   { "channeltls/", channeltls_tests },
   { "checkdir/", checkdir_tests },
   { "checkdir/", checkdir_tests },
   { "circuitbuild/", circuitbuild_tests },
   { "circuitbuild/", circuitbuild_tests },
+  { "circuitpadding/", circuitpadding_tests },
   { "circuitlist/", circuitlist_tests },
   { "circuitlist/", circuitlist_tests },
   { "circuitmux/", circuitmux_tests },
   { "circuitmux/", circuitmux_tests },
   { "circuitstats/", circuitstats_tests },
   { "circuitstats/", circuitstats_tests },
@@ -900,6 +901,7 @@ struct testgroup_t testgroups[] = {
   { "parsecommon/", parsecommon_tests },
   { "parsecommon/", parsecommon_tests },
   { "periodic-event/" , periodic_event_tests },
   { "periodic-event/" , periodic_event_tests },
   { "policy/" , policy_tests },
   { "policy/" , policy_tests },
+  { "prob_distr/", prob_distr_tests },
   { "procmon/", procmon_tests },
   { "procmon/", procmon_tests },
   { "process/", process_tests },
   { "process/", process_tests },
   { "proto/http/", proto_http_tests },
   { "proto/http/", proto_http_tests },

+ 3 - 0
src/test/test.h

@@ -187,6 +187,7 @@ extern struct testcase_t cell_format_tests[];
 extern struct testcase_t cell_queue_tests[];
 extern struct testcase_t cell_queue_tests[];
 extern struct testcase_t channel_tests[];
 extern struct testcase_t channel_tests[];
 extern struct testcase_t channelpadding_tests[];
 extern struct testcase_t channelpadding_tests[];
+extern struct testcase_t circuitpadding_tests[];
 extern struct testcase_t channeltls_tests[];
 extern struct testcase_t channeltls_tests[];
 extern struct testcase_t checkdir_tests[];
 extern struct testcase_t checkdir_tests[];
 extern struct testcase_t circuitbuild_tests[];
 extern struct testcase_t circuitbuild_tests[];
@@ -242,6 +243,8 @@ extern struct testcase_t parsecommon_tests[];
 extern struct testcase_t pem_tests[];
 extern struct testcase_t pem_tests[];
 extern struct testcase_t periodic_event_tests[];
 extern struct testcase_t periodic_event_tests[];
 extern struct testcase_t policy_tests[];
 extern struct testcase_t policy_tests[];
+extern struct testcase_t prob_distr_tests[];
+extern struct testcase_t slow_stochastic_prob_distr_tests[];
 extern struct testcase_t procmon_tests[];
 extern struct testcase_t procmon_tests[];
 extern struct testcase_t process_tests[];
 extern struct testcase_t process_tests[];
 extern struct testcase_t proto_http_tests[];
 extern struct testcase_t proto_http_tests[];

+ 2356 - 0
src/test/test_circuitpadding.c

@@ -0,0 +1,2356 @@
+#define TOR_CHANNEL_INTERNAL_
+#define TOR_TIMERS_PRIVATE
+#define CIRCUITPADDING_PRIVATE
+#define NETWORKSTATUS_PRIVATE
+
+#include "core/or/or.h"
+#include "test.h"
+#include "lib/testsupport/testsupport.h"
+#include "core/or/connection_or.h"
+#include "core/or/channel.h"
+#include "core/or/channeltls.h"
+#include <event.h>
+#include "lib/evloop/compat_libevent.h"
+#include "lib/time/compat_time.h"
+#include "lib/defs/time.h"
+#include "core/or/relay.h"
+#include "core/or/circuitlist.h"
+#include "core/or/circuitbuild.h"
+#include "core/or/circuitpadding.h"
+#include "core/crypto/relay_crypto.h"
+#include "core/or/protover.h"
+#include "feature/nodelist/nodelist.h"
+#include "lib/evloop/compat_libevent.h"
+#include "app/config/config.h"
+
+#include "feature/nodelist/routerstatus_st.h"
+#include "feature/nodelist/networkstatus_st.h"
+#include "feature/nodelist/node_st.h"
+#include "core/or/cell_st.h"
+#include "core/or/crypt_path_st.h"
+#include "core/or/or_circuit_st.h"
+#include "core/or/origin_circuit_st.h"
+
+extern smartlist_t *connection_array;
+
+circid_t get_unique_circ_id_by_chan(channel_t *chan);
+void helper_create_basic_machine(void);
+static void helper_create_conditional_machines(void);
+
+static or_circuit_t * new_fake_orcirc(channel_t *nchan, channel_t *pchan);
+channel_t *new_fake_channel(void);
+void test_circuitpadding_negotiation(void *arg);
+void test_circuitpadding_wronghop(void *arg);
+void test_circuitpadding_conditions(void *arg);
+
+void test_circuitpadding_serialize(void *arg);
+void test_circuitpadding_rtt(void *arg);
+void test_circuitpadding_tokens(void *arg);
+void test_circuitpadding_circuitsetup_machine(void *arg);
+
+static void
+simulate_single_hop_extend(circuit_t *client, circuit_t *mid_relay,
+                           int padding);
+void free_fake_orcirc(circuit_t *circ);
+void free_fake_origin_circuit(origin_circuit_t *circ);
+
+static int deliver_negotiated = 1;
+static int64_t curr_mocked_time;
+
+static node_t padding_node;
+static node_t non_padding_node;
+
+static channel_t dummy_channel;
+static circpad_machine_spec_t circ_client_machine;
+
+static void
+timers_advance_and_run(int64_t msec_update)
+{
+  curr_mocked_time += msec_update*TOR_NSEC_PER_MSEC;
+  monotime_coarse_set_mock_time_nsec(curr_mocked_time);
+  monotime_set_mock_time_nsec(curr_mocked_time);
+  timers_run_pending();
+}
+
+static void
+nodes_init(void)
+{
+  padding_node.rs = tor_malloc_zero(sizeof(routerstatus_t));
+  padding_node.rs->pv.supports_padding = 1;
+
+  non_padding_node.rs = tor_malloc_zero(sizeof(routerstatus_t));
+  non_padding_node.rs->pv.supports_padding = 0;
+}
+
+static void
+nodes_free(void)
+{
+  tor_free(padding_node.rs);
+
+  tor_free(non_padding_node.rs);
+}
+
+static const node_t *
+node_get_by_id_mock(const char *identity_digest)
+{
+  if (identity_digest[0] == 1) {
+    return &padding_node;
+  } else if (identity_digest[0] == 0) {
+    return &non_padding_node;
+  }
+
+  return NULL;
+}
+
+static or_circuit_t *
+new_fake_orcirc(channel_t *nchan, channel_t *pchan)
+{
+  or_circuit_t *orcirc = NULL;
+  circuit_t *circ = NULL;
+  crypt_path_t tmp_cpath;
+  char whatevs_key[CPATH_KEY_MATERIAL_LEN];
+
+  orcirc = tor_malloc_zero(sizeof(*orcirc));
+  circ = &(orcirc->base_);
+  circ->magic = OR_CIRCUIT_MAGIC;
+
+  //circ->n_chan = nchan;
+  circ->n_circ_id = get_unique_circ_id_by_chan(nchan);
+  circ->n_mux = NULL; /* ?? */
+  cell_queue_init(&(circ->n_chan_cells));
+  circ->n_hop = NULL;
+  circ->streams_blocked_on_n_chan = 0;
+  circ->streams_blocked_on_p_chan = 0;
+  circ->n_delete_pending = 0;
+  circ->p_delete_pending = 0;
+  circ->received_destroy = 0;
+  circ->state = CIRCUIT_STATE_OPEN;
+  circ->purpose = CIRCUIT_PURPOSE_OR;
+  circ->package_window = CIRCWINDOW_START_MAX;
+  circ->deliver_window = CIRCWINDOW_START_MAX;
+  circ->n_chan_create_cell = NULL;
+
+  //orcirc->p_chan = pchan;
+  orcirc->p_circ_id = get_unique_circ_id_by_chan(pchan);
+  cell_queue_init(&(orcirc->p_chan_cells));
+
+  circuit_set_p_circid_chan(orcirc, orcirc->p_circ_id, pchan);
+  circuit_set_n_circid_chan(circ, circ->n_circ_id, nchan);
+
+  memset(&tmp_cpath, 0, sizeof(tmp_cpath));
+  if (circuit_init_cpath_crypto(&tmp_cpath, whatevs_key,
+                                sizeof(whatevs_key), 0, 0)<0) {
+    log_warn(LD_BUG,"Circuit initialization failed");
+    return NULL;
+  }
+  orcirc->crypto = tmp_cpath.crypto;
+
+  return orcirc;
+}
+
+void
+free_fake_orcirc(circuit_t *circ)
+{
+  or_circuit_t *orcirc = TO_OR_CIRCUIT(circ);
+
+  relay_crypto_clear(&orcirc->crypto);
+
+  circpad_circuit_free_all_machineinfos(circ);
+  tor_free(circ);
+}
+
+void
+free_fake_origin_circuit(origin_circuit_t *circ)
+{
+  circpad_circuit_free_all_machineinfos(TO_CIRCUIT(circ));
+  circuit_clear_cpath(circ);
+  tor_free(circ);
+}
+
+void dummy_nop_timer(void);
+
+//static int dont_stop_libevent = 0;
+
+static circuit_t *client_side;
+static circuit_t *relay_side;
+
+static int n_client_cells = 0;
+static int n_relay_cells = 0;
+
+static int
+circuit_package_relay_cell_mock(cell_t *cell, circuit_t *circ,
+                           cell_direction_t cell_direction,
+                           crypt_path_t *layer_hint, streamid_t on_stream,
+                           const char *filename, int lineno);
+
+static void
+circuitmux_attach_circuit_mock(circuitmux_t *cmux, circuit_t *circ,
+                               cell_direction_t direction);
+
+static void
+circuitmux_attach_circuit_mock(circuitmux_t *cmux, circuit_t *circ,
+                               cell_direction_t direction)
+{
+  (void)cmux;
+  (void)circ;
+  (void)direction;
+
+  return;
+}
+
+static int
+circuit_package_relay_cell_mock(cell_t *cell, circuit_t *circ,
+                           cell_direction_t cell_direction,
+                           crypt_path_t *layer_hint, streamid_t on_stream,
+                           const char *filename, int lineno)
+{
+  (void)cell; (void)on_stream; (void)filename; (void)lineno;
+
+  if (circ == client_side) {
+    if (cell->payload[0] == RELAY_COMMAND_PADDING_NEGOTIATE) {
+      // Deliver to relay
+      circpad_handle_padding_negotiate(relay_side, cell);
+    } else {
+
+      int is_target_hop = circpad_padding_is_from_expected_hop(circ,
+                                                             layer_hint);
+      tt_int_op(cell_direction, OP_EQ, CELL_DIRECTION_OUT);
+      tt_int_op(is_target_hop, OP_EQ, 1);
+
+      // No need to pretend a padding cell was sent: This event is
+      // now emitted internally when the circuitpadding code sends them.
+      //circpad_cell_event_padding_sent(client_side);
+
+      // Receive padding cell at middle
+      circpad_deliver_recognized_relay_cell_events(relay_side,
+              cell->payload[0], NULL);
+    }
+    n_client_cells++;
+  } else if (circ == relay_side) {
+    tt_int_op(cell_direction, OP_EQ, CELL_DIRECTION_IN);
+
+    if (cell->payload[0] == RELAY_COMMAND_PADDING_NEGOTIATED) {
+      // XXX: blah need right layer_hint..
+      if (deliver_negotiated)
+        circpad_handle_padding_negotiated(client_side, cell,
+                                          TO_ORIGIN_CIRCUIT(client_side)
+                                             ->cpath->next);
+    } else if (cell->payload[0] == RELAY_COMMAND_PADDING_NEGOTIATE) {
+      circpad_handle_padding_negotiate(client_side, cell);
+    } else {
+      // No need to pretend a padding cell was sent: This event is
+      // now emitted internally when the circuitpadding code sends them.
+      //circpad_cell_event_padding_sent(relay_side);
+
+      // Receive padding cell at client
+      circpad_deliver_recognized_relay_cell_events(client_side,
+              cell->payload[0],
+              TO_ORIGIN_CIRCUIT(client_side)->cpath->next);
+    }
+
+    n_relay_cells++;
+  }
+
+ done:
+  timers_advance_and_run(1);
+  return 0;
+}
+
+// Test reading and writing padding to strings (or options_t + consensus)
+void
+test_circuitpadding_serialize(void *arg)
+{
+  (void)arg;
+}
+
+static signed_error_t
+circpad_send_command_to_hop_mock(origin_circuit_t *circ, uint8_t hopnum,
+                                 uint8_t relay_command, const uint8_t *payload,
+                                 ssize_t payload_len)
+{
+  (void) circ;
+  (void) hopnum;
+  (void) relay_command;
+  (void) payload;
+  (void) payload_len;
+  return 0;
+}
+
+void
+test_circuitpadding_rtt(void *arg)
+{
+  /* Test Plan:
+   *
+   * 1. Test RTT measurement server side
+   *    a. test usage of measured RTT
+   * 2. Test termination of RTT measurement
+   *    a. test non-update of RTT
+   * 3. Test client side circuit and non-application of RTT..
+   */
+  circpad_delay_t rtt_estimate;
+  (void)arg;
+
+  MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock);
+  MOCK(circpad_send_command_to_hop, circpad_send_command_to_hop_mock);
+
+  dummy_channel.cmux = circuitmux_alloc();
+  relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel));
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  monotime_init();
+  monotime_enable_test_mocking();
+  monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  curr_mocked_time = 1*TOR_NSEC_PER_USEC;
+
+  timers_initialize();
+  circpad_machines_init();
+  helper_create_basic_machine();
+
+  MOCK(circuit_package_relay_cell,
+       circuit_package_relay_cell_mock);
+
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] = circpad_circuit_machineinfo_new(client_side,
+                                                                 0);
+
+  relay_side->padding_machine[0] = &circ_client_machine;
+  relay_side->padding_info[0] = circpad_circuit_machineinfo_new(client_side,0);
+
+  /* Test 1: Test measuring RTT */
+  circpad_cell_event_nonpadding_received((circuit_t*)relay_side);
+  tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_NE, 0);
+
+  timers_advance_and_run(20);
+
+  circpad_cell_event_nonpadding_sent((circuit_t*)relay_side);
+  tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_EQ, 0);
+
+  tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_GE, 19000);
+  tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_LE, 30000);
+  tt_int_op(circpad_histogram_bin_to_usec(relay_side->padding_info[0], 0),
+            OP_EQ,
+            relay_side->padding_info[0]->rtt_estimate_usec+
+            circpad_machine_current_state(
+             relay_side->padding_info[0])->start_usec);
+
+  circpad_cell_event_nonpadding_received((circuit_t*)relay_side);
+  circpad_cell_event_nonpadding_received((circuit_t*)relay_side);
+  tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_NE, 0);
+  timers_advance_and_run(20);
+  circpad_cell_event_nonpadding_sent((circuit_t*)relay_side);
+  circpad_cell_event_nonpadding_sent((circuit_t*)relay_side);
+  tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_EQ, 0);
+
+  tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_GE, 20000);
+  tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_LE, 21000);
+  tt_int_op(circpad_histogram_bin_to_usec(relay_side->padding_info[0], 0),
+            OP_EQ,
+            relay_side->padding_info[0]->rtt_estimate_usec+
+            circpad_machine_current_state(
+             relay_side->padding_info[0])->start_usec);
+
+  /* Test 2: Termination of RTT measurement (from the previous test) */
+  tt_int_op(relay_side->padding_info[0]->stop_rtt_update, OP_EQ, 1);
+  rtt_estimate = relay_side->padding_info[0]->rtt_estimate_usec;
+
+  circpad_cell_event_nonpadding_received((circuit_t*)relay_side);
+  timers_advance_and_run(4);
+  circpad_cell_event_nonpadding_sent((circuit_t*)relay_side);
+
+  tt_int_op(relay_side->padding_info[0]->rtt_estimate_usec, OP_EQ,
+            rtt_estimate);
+  tt_int_op(relay_side->padding_info[0]->last_received_time_usec, OP_EQ, 0);
+  tt_int_op(relay_side->padding_info[0]->stop_rtt_update, OP_EQ, 1);
+  tt_int_op(circpad_histogram_bin_to_usec(relay_side->padding_info[0], 0),
+            OP_EQ,
+            relay_side->padding_info[0]->rtt_estimate_usec+
+            circpad_machine_current_state(
+             relay_side->padding_info[0])->start_usec);
+
+  /* Test 3: Make sure client side machine properly ignores RTT */
+  circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  tt_int_op(client_side->padding_info[0]->last_received_time_usec, OP_EQ, 0);
+
+  timers_advance_and_run(20);
+  circpad_cell_event_nonpadding_sent((circuit_t*)client_side);
+  tt_int_op(client_side->padding_info[0]->last_received_time_usec, OP_EQ, 0);
+
+  tt_int_op(client_side->padding_info[0]->rtt_estimate_usec, OP_EQ, 0);
+  tt_int_op(circpad_histogram_bin_to_usec(client_side->padding_info[0], 0),
+            OP_NE, client_side->padding_info[0]->rtt_estimate_usec);
+  tt_int_op(circpad_histogram_bin_to_usec(client_side->padding_info[0], 0),
+            OP_EQ,
+            circpad_machine_current_state(
+                client_side->padding_info[0])->start_usec);
+ done:
+  free_fake_orcirc(relay_side);
+  circuitmux_detach_all_circuits(dummy_channel.cmux, NULL);
+  circuitmux_free(dummy_channel.cmux);
+  timers_shutdown();
+  monotime_disable_test_mocking();
+  UNMOCK(circuit_package_relay_cell);
+  UNMOCK(circuitmux_attach_circuit);
+  tor_free(circ_client_machine.states);
+
+  return;
+}
+
+void
+helper_create_basic_machine(void)
+{
+  /* Start, burst */
+  circpad_machine_states_init(&circ_client_machine, 2);
+
+  circ_client_machine.states[CIRCPAD_STATE_START].
+      next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST;
+
+  circ_client_machine.states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST;
+
+  circ_client_machine.states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_NONPADDING_SENT] = CIRCPAD_STATE_CANCEL;
+
+  // FIXME: Is this what we want?
+  circ_client_machine.states[CIRCPAD_STATE_BURST].token_removal =
+      CIRCPAD_TOKEN_REMOVAL_HIGHER;
+
+  // FIXME: Tune this histogram
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_len = 5;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 500;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].range_usec = 1000000;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[0] = 1;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[1] = 0;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[2] = 2;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[3] = 2;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram[4] = 2;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].histogram_total_tokens = 7;
+  circ_client_machine.states[CIRCPAD_STATE_BURST].use_rtt_estimate = 1;
+
+  return;
+}
+
+#define BIG_HISTOGRAM_LEN 10
+
+/** Setup a machine with a big histogram */
+static void
+helper_create_machine_with_big_histogram(circpad_removal_t removal_strategy)
+{
+  const int tokens_per_bin = 2;
+
+  /* Start, burst */
+  circpad_machine_states_init(&circ_client_machine, 2);
+
+  circpad_state_t *burst_state =
+    &circ_client_machine.states[CIRCPAD_STATE_BURST];
+
+  circ_client_machine.states[CIRCPAD_STATE_START].
+    next_state[CIRCPAD_EVENT_NONPADDING_RECV] = CIRCPAD_STATE_BURST;
+
+  burst_state->next_state[CIRCPAD_EVENT_PADDING_RECV] = CIRCPAD_STATE_BURST;
+  burst_state->next_state[CIRCPAD_EVENT_NONPADDING_RECV] =CIRCPAD_STATE_BURST;
+
+  burst_state->next_state[CIRCPAD_EVENT_NONPADDING_SENT] =CIRCPAD_STATE_CANCEL;
+
+  burst_state->token_removal = CIRCPAD_TOKEN_REMOVAL_HIGHER;
+
+  burst_state->histogram_len = BIG_HISTOGRAM_LEN;
+  burst_state->start_usec = 0;
+  burst_state->range_usec = 1000;
+
+  int n_tokens = 0;
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    burst_state->histogram[i] = tokens_per_bin;
+    n_tokens += tokens_per_bin;
+  }
+
+  burst_state->histogram_total_tokens = n_tokens;
+  burst_state->length_dist.type = CIRCPAD_DIST_UNIFORM;
+  burst_state->length_dist.param1 = n_tokens;
+  burst_state->length_dist.param2 = n_tokens;
+  burst_state->max_length = n_tokens;
+  burst_state->length_includes_nonpadding = 1;
+  burst_state->use_rtt_estimate = 0;
+  burst_state->token_removal = removal_strategy;
+}
+
+static circpad_decision_t
+circpad_machine_schedule_padding_mock(circpad_machine_state_t *mi)
+{
+  (void)mi;
+  return 0;
+}
+
+static uint64_t
+mock_monotime_absolute_usec(void)
+{
+  return 100;
+}
+
+/** Test higher token removal strategy by bin  */
+static void
+test_circuitpadding_token_removal_higher(void *arg)
+{
+  circpad_machine_state_t *mi;
+  (void)arg;
+
+  /* Mock it up */
+  MOCK(monotime_absolute_usec, mock_monotime_absolute_usec);
+  MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock);
+
+  /* Setup test environment (time etc.) */
+  client_side = (circuit_t *)origin_circuit_new();
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  monotime_enable_test_mocking();
+
+  /* Create test machine */
+  helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_HIGHER);
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(client_side, 0);
+
+  /* move the machine to the right state */
+  circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+
+  /* Get the machine and setup tokens */
+  mi = client_side->padding_info[0];
+  tt_assert(mi);
+
+  /*************************************************************************/
+
+  uint64_t current_time = monotime_absolute_usec();
+
+  /* Test left boundaries of each histogram bin: */
+  const circpad_delay_t bin_left_bounds[] =
+    {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE};
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_uint_op(bin_left_bounds[i], OP_EQ,
+               circpad_histogram_bin_to_usec(mi, i));
+  }
+
+  /* Check that all bins have two tokens right now */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* This is the right order to remove tokens from this histogram. That is, we
+   * first remove tokens from the 4th bin since 57 usec is nearest to the 4th
+   * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for
+   * the same reason, then from the 5th, etc. */
+  const int bin_removal_order[] = {4, 5, 6, 7, 8};
+  unsigned i;
+
+  /* Remove all tokens from all bins apart from the infinity bin */
+  for (i = 0; i < sizeof(bin_removal_order)/sizeof(int) ; i++) {
+    int bin_to_remove = bin_removal_order[i];
+    log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin",
+              i, bin_to_remove);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    /* Test that we cleaned out this bin. Don't do this in the case of the last
+       bin since the tokens will get refilled */
+    if (i != BIG_HISTOGRAM_LEN - 2) {
+      tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0);
+    }
+  }
+
+  /* Check that all lowe bins are not touched */
+  for (i=0; i < 4 ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* Test below the lowest bin, for coverage */
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  mi->padding_scheduled_at_usec = current_time - 1;
+  circpad_machine_remove_token(mi);
+  tt_int_op(mi->histogram[0], OP_EQ, 1);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  monotime_disable_test_mocking();
+  tor_free(circ_client_machine.states);
+}
+
+/** Test lower token removal strategy by bin  */
+static void
+test_circuitpadding_token_removal_lower(void *arg)
+{
+  circpad_machine_state_t *mi;
+  (void)arg;
+
+  /* Mock it up */
+  MOCK(monotime_absolute_usec, mock_monotime_absolute_usec);
+  MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock);
+
+  /* Setup test environment (time etc.) */
+  client_side = (circuit_t *)origin_circuit_new();
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  monotime_enable_test_mocking();
+
+  /* Create test machine */
+  helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_LOWER);
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(client_side, 0);
+
+  /* move the machine to the right state */
+  circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+
+  /* Get the machine and setup tokens */
+  mi = client_side->padding_info[0];
+  tt_assert(mi);
+
+  /*************************************************************************/
+
+  uint64_t current_time = monotime_absolute_usec();
+
+  /* Test left boundaries of each histogram bin: */
+  const circpad_delay_t bin_left_bounds[] =
+    {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE};
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_uint_op(bin_left_bounds[i], OP_EQ,
+               circpad_histogram_bin_to_usec(mi, i));
+  }
+
+  /* Check that all bins have two tokens right now */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* This is the right order to remove tokens from this histogram. That is, we
+   * first remove tokens from the 4th bin since 57 usec is nearest to the 4th
+   * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for
+   * the same reason, then from the 5th, etc. */
+  const int bin_removal_order[] = {4, 3, 2, 1, 0};
+  unsigned i;
+
+  /* Remove all tokens from all bins apart from the infinity bin */
+  for (i = 0; i < sizeof(bin_removal_order)/sizeof(int) ; i++) {
+    int bin_to_remove = bin_removal_order[i];
+    log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin",
+              i, bin_to_remove);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    /* Test that we cleaned out this bin. Don't do this in the case of the last
+       bin since the tokens will get refilled */
+    if (i != BIG_HISTOGRAM_LEN - 2) {
+      tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0);
+    }
+  }
+
+  /* Check that all higher bins are untouched */
+  for (i = 5; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* Test above the highest bin, for coverage */
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  mi->padding_scheduled_at_usec = current_time - 29202;
+  circpad_machine_remove_token(mi);
+  tt_int_op(mi->histogram[BIG_HISTOGRAM_LEN-2], OP_EQ, 1);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  monotime_disable_test_mocking();
+  tor_free(circ_client_machine.states);
+}
+
+/** Test closest token removal strategy by bin  */
+static void
+test_circuitpadding_closest_token_removal(void *arg)
+{
+  circpad_machine_state_t *mi;
+  (void)arg;
+
+  /* Mock it up */
+  MOCK(monotime_absolute_usec, mock_monotime_absolute_usec);
+  MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock);
+
+  /* Setup test environment (time etc.) */
+  client_side = (circuit_t *)origin_circuit_new();
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  monotime_enable_test_mocking();
+
+  /* Create test machine */
+  helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_CLOSEST);
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(client_side, 0);
+
+  /* move the machine to the right state */
+  circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+
+  /* Get the machine and setup tokens */
+  mi = client_side->padding_info[0];
+  tt_assert(mi);
+
+  /*************************************************************************/
+
+  uint64_t current_time = monotime_absolute_usec();
+
+  /* Test left boundaries of each histogram bin: */
+  const circpad_delay_t bin_left_bounds[] =
+    {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE};
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_uint_op(bin_left_bounds[i], OP_EQ,
+               circpad_histogram_bin_to_usec(mi, i));
+  }
+
+  /* Check that all bins have two tokens right now */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* This is the right order to remove tokens from this histogram. That is, we
+   * first remove tokens from the 4th bin since 57 usec is nearest to the 4th
+   * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for
+   * the same reason, then from the 5th, etc. */
+  const int bin_removal_order[] = {4, 3, 5, 2, 6, 1, 7, 0, 8, 9};
+
+  /* Remove all tokens from all bins apart from the infinity bin */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN-1 ; i++) {
+    int bin_to_remove = bin_removal_order[i];
+    log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin",
+              i, bin_to_remove);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    /* Test that we cleaned out this bin. Don't do this in the case of the last
+       bin since the tokens will get refilled */
+    if (i != BIG_HISTOGRAM_LEN - 2) {
+      tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0);
+    }
+  }
+
+  /* Check that all bins have been refilled */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* Test below the lowest bin, for coverage */
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  mi->padding_scheduled_at_usec = current_time - 102;
+  mi->histogram[0] = 0;
+  circpad_machine_remove_token(mi);
+  tt_int_op(mi->histogram[1], OP_EQ, 1);
+
+  /* Test above the highest bin, for coverage */
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  mi->padding_scheduled_at_usec = current_time - 29202;
+  circpad_machine_remove_token(mi);
+  tt_int_op(mi->histogram[BIG_HISTOGRAM_LEN-2], OP_EQ, 1);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  monotime_disable_test_mocking();
+  tor_free(circ_client_machine.states);
+}
+
+/** Test closest token removal strategy with usec  */
+static void
+test_circuitpadding_closest_token_removal_usec(void *arg)
+{
+  circpad_machine_state_t *mi;
+  (void)arg;
+
+  /* Mock it up */
+  MOCK(monotime_absolute_usec, mock_monotime_absolute_usec);
+  MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock);
+
+  /* Setup test environment (time etc.) */
+  client_side = (circuit_t *)origin_circuit_new();
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  monotime_enable_test_mocking();
+
+  /* Create test machine */
+  helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_CLOSEST_USEC);
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(client_side, 0);
+
+  /* move the machine to the right state */
+  circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+
+  /* Get the machine and setup tokens */
+  mi = client_side->padding_info[0];
+  tt_assert(mi);
+
+  /*************************************************************************/
+
+  uint64_t current_time = monotime_absolute_usec();
+
+  /* Test left boundaries of each histogram bin: */
+  const circpad_delay_t bin_left_bounds[] =
+    {0, 1, 7, 15, 31, 62, 125, 250, 500, CIRCPAD_DELAY_INFINITE};
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_uint_op(bin_left_bounds[i], OP_EQ,
+               circpad_histogram_bin_to_usec(mi, i));
+  }
+
+  /* XXX we want to test remove_token_exact and
+     circpad_machine_remove_closest_token() with usec */
+
+  /* Check that all bins have two tokens right now */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* This is the right order to remove tokens from this histogram. That is, we
+   * first remove tokens from the 4th bin since 57 usec is nearest to the 4th
+   * bin midpoint (31 + (62-31)/2 == 46). Then we remove from the 3rd bin for
+   * the same reason, then from the 5th, etc. */
+  const int bin_removal_order[] = {4, 3, 5, 2, 1, 0, 6, 7, 8, 9};
+
+  /* Remove all tokens from all bins apart from the infinity bin */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN-1 ; i++) {
+    int bin_to_remove = bin_removal_order[i];
+    log_debug(LD_GENERAL, "Testing that %d attempt removes %d bin",
+              i, bin_to_remove);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 2);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 1);
+
+    mi->padding_scheduled_at_usec = current_time - 57;
+    circpad_machine_remove_token(mi);
+
+    /* Test that we cleaned out this bin. Don't do this in the case of the last
+       bin since the tokens will get refilled */
+    if (i != BIG_HISTOGRAM_LEN - 2) {
+      tt_int_op(mi->histogram[bin_to_remove], OP_EQ, 0);
+    }
+  }
+
+  /* Check that all bins have been refilled */
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    tt_int_op(mi->histogram[i], OP_EQ, 2);
+  }
+
+  /* Test below the lowest bin, for coverage */
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  mi->padding_scheduled_at_usec = current_time - 102;
+  mi->histogram[0] = 0;
+  circpad_machine_remove_token(mi);
+  tt_int_op(mi->histogram[1], OP_EQ, 1);
+
+  /* Test above the highest bin, for coverage */
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+  circ_client_machine.states[CIRCPAD_STATE_BURST].start_usec = 100;
+  mi->padding_scheduled_at_usec = current_time - 29202;
+  circpad_machine_remove_token(mi);
+  tt_int_op(mi->histogram[BIG_HISTOGRAM_LEN-2], OP_EQ, 1);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  monotime_disable_test_mocking();
+  tor_free(circ_client_machine.states);
+}
+
+/** Test closest token removal strategy with usec  */
+static void
+test_circuitpadding_token_removal_exact(void *arg)
+{
+  circpad_machine_state_t *mi;
+  (void)arg;
+
+  /* Mock it up */
+  MOCK(monotime_absolute_usec, mock_monotime_absolute_usec);
+  MOCK(circpad_machine_schedule_padding,circpad_machine_schedule_padding_mock);
+
+  /* Setup test environment (time etc.) */
+  client_side = (circuit_t *)origin_circuit_new();
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  monotime_enable_test_mocking();
+
+  /* Create test machine */
+  helper_create_machine_with_big_histogram(CIRCPAD_TOKEN_REMOVAL_EXACT);
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(client_side, 0);
+
+  /* move the machine to the right state */
+  circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+
+  /* Get the machine and setup tokens */
+  mi = client_side->padding_info[0];
+  tt_assert(mi);
+
+  /**********************************************************************/
+  uint64_t current_time = monotime_absolute_usec();
+
+  /* Ensure that we will clear out bin #4 with this usec */
+  mi->padding_scheduled_at_usec = current_time - 57;
+  tt_int_op(mi->histogram[4], OP_EQ, 2);
+  circpad_machine_remove_token(mi);
+  mi->padding_scheduled_at_usec = current_time - 57;
+  tt_int_op(mi->histogram[4], OP_EQ, 1);
+  circpad_machine_remove_token(mi);
+  tt_int_op(mi->histogram[4], OP_EQ, 0);
+
+  /* Ensure that we will not remove any other tokens even tho we try to, since
+   * this is what the exact strategy dictates */
+  mi->padding_scheduled_at_usec = current_time - 57;
+  circpad_machine_remove_token(mi);
+  for (int i = 0; i < BIG_HISTOGRAM_LEN ; i++) {
+    if (i != 4) {
+      tt_int_op(mi->histogram[i], OP_EQ, 2);
+    }
+  }
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  monotime_disable_test_mocking();
+  tor_free(circ_client_machine.states);
+}
+
+#undef BIG_HISTOGRAM_LEN
+
+void
+test_circuitpadding_tokens(void *arg)
+{
+  const circpad_state_t *state;
+  circpad_machine_state_t *mi;
+  (void)arg;
+
+  /** Test plan:
+   *
+   * 1. Test symmetry between bin_to_usec and usec_to_bin
+   *    a. Test conversion
+   *    b. Test edge transitions (lower, upper)
+   * 2. Test remove higher on an empty bin
+   *    a. Normal bin
+   *    b. Infinity bin
+   *    c. Bin 0
+   *    d. No higher
+   * 3. Test remove lower
+   *    a. Normal bin
+   *    b. Bin 0
+   *    c. No lower
+   * 4. Test remove closest
+   *    a. Closest lower
+   *    b. Closest higher
+   *    c. Closest 0
+   *    d. Closest Infinity
+   */
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  monotime_init();
+  monotime_enable_test_mocking();
+  monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  curr_mocked_time = 1*TOR_NSEC_PER_USEC;
+
+  timers_initialize();
+
+  helper_create_basic_machine();
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] = circpad_circuit_machineinfo_new(client_side,
+                                                                 0);
+
+  mi = client_side->padding_info[0];
+
+  // Pretend a non-padding cell was sent
+  // XXX: This messes us up.. Padding gets scheduled..
+  circpad_cell_event_nonpadding_sent((circuit_t*)client_side);
+  circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  /* We have to save the infinity bin because one inf delay
+   * could have been chosen when we transition to burst */
+  circpad_hist_token_t inf_bin = mi->histogram[4];
+
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+            CIRCPAD_STATE_BURST);
+
+  state = circpad_machine_current_state(client_side->padding_info[0]);
+
+  // Test 0: convert bin->usec->bin
+  // Bin 0+1 have different semantics
+  for (int bin = 0; bin < 2; bin++) {
+    circpad_delay_t usec =
+        circpad_histogram_bin_to_usec(client_side->padding_info[0], bin);
+    int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                             usec);
+    tt_int_op(bin, OP_EQ, bin2);
+  }
+  for (int bin = 2; bin < state->histogram_len-1; bin++) {
+    circpad_delay_t usec =
+        circpad_histogram_bin_to_usec(client_side->padding_info[0], bin);
+    int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                             usec);
+    tt_int_op(bin, OP_EQ, bin2);
+    /* Verify we round down */
+    bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                             usec+3);
+    tt_int_op(bin, OP_EQ, bin2);
+
+    bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                             usec-1);
+    tt_int_op(bin, OP_EQ, bin2+1);
+  }
+
+  // Test 1: converting usec->bin->usec->bin
+  // Bin 0+1 have different semantics.
+  for (circpad_delay_t i = 0; i <= state->start_usec+1; i++) {
+    int bin = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                            i);
+    circpad_delay_t usec =
+        circpad_histogram_bin_to_usec(client_side->padding_info[0], bin);
+    int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                             usec);
+    tt_int_op(bin, OP_EQ, bin2);
+    tt_int_op(i, OP_LE, usec);
+  }
+  for (circpad_delay_t i = state->start_usec+1;
+           i <= state->start_usec + state->range_usec; i++) {
+    int bin = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                            i);
+    circpad_delay_t usec =
+        circpad_histogram_bin_to_usec(client_side->padding_info[0], bin);
+    int bin2 = circpad_histogram_usec_to_bin(client_side->padding_info[0],
+                                             usec);
+    tt_int_op(bin, OP_EQ, bin2);
+    tt_int_op(i, OP_GE, usec);
+  }
+
+  /* 2.a. Normal higher bin */
+  {
+    tt_int_op(mi->histogram[2], OP_EQ, 2);
+    tt_int_op(mi->histogram[3], OP_EQ, 2);
+    circpad_machine_remove_higher_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1);
+    tt_int_op(mi->histogram[3], OP_EQ, 2);
+    tt_int_op(mi->histogram[2], OP_EQ, 1);
+
+    circpad_machine_remove_higher_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1);
+    tt_int_op(mi->histogram[2], OP_EQ, 0);
+
+    tt_int_op(mi->histogram[3], OP_EQ, 2);
+    circpad_machine_remove_higher_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1);
+    circpad_machine_remove_higher_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1);
+    tt_int_op(mi->histogram[3], OP_EQ, 0);
+    circpad_machine_remove_higher_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1);
+    tt_int_op(mi->histogram[3], OP_EQ, 0);
+  }
+
+  /* 2.b. Higher Infinity bin */
+  {
+    tt_int_op(mi->histogram[4], OP_EQ, inf_bin);
+    circpad_machine_remove_higher_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1);
+    tt_int_op(mi->histogram[4], OP_EQ, inf_bin);
+
+    /* Test past the infinity bin */
+    circpad_machine_remove_higher_token(mi,
+         circpad_histogram_bin_to_usec(mi, 5)+1000000);
+
+    tt_int_op(mi->histogram[4], OP_EQ, inf_bin);
+  }
+
+  /* 2.c. Bin 0 */
+  {
+    tt_int_op(mi->histogram[0], OP_EQ, 1);
+    circpad_machine_remove_higher_token(mi,
+         state->start_usec/2);
+    tt_int_op(mi->histogram[0], OP_EQ, 0);
+  }
+
+  /* Drain the infinity bin and cause a refill */
+  while (inf_bin != 0) {
+    tt_int_op(mi->histogram[4], OP_EQ, inf_bin);
+    circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+    inf_bin--;
+  }
+
+  circpad_cell_event_nonpadding_sent((circuit_t*)client_side);
+
+  // We should have refilled here.
+  tt_int_op(mi->histogram[4], OP_EQ, 2);
+
+  /* 3.a. Bin 0 */
+  {
+    tt_int_op(mi->histogram[0], OP_EQ, 1);
+    circpad_machine_remove_higher_token(mi,
+         state->start_usec/2);
+    tt_int_op(mi->histogram[0], OP_EQ, 0);
+  }
+
+  /* 3.b. Test remove lower normal bin */
+  {
+    tt_int_op(mi->histogram[3], OP_EQ, 2);
+    circpad_machine_remove_lower_token(mi,
+         circpad_histogram_bin_to_usec(mi, 3)+1);
+    circpad_machine_remove_lower_token(mi,
+         circpad_histogram_bin_to_usec(mi, 3)+1);
+    tt_int_op(mi->histogram[3], OP_EQ, 0);
+    tt_int_op(mi->histogram[2], OP_EQ, 2);
+    circpad_machine_remove_lower_token(mi,
+         circpad_histogram_bin_to_usec(mi, 3)+1);
+    circpad_machine_remove_lower_token(mi,
+         circpad_histogram_bin_to_usec(mi, 3)+1);
+    /* 3.c. No lower */
+    circpad_machine_remove_lower_token(mi,
+         circpad_histogram_bin_to_usec(mi, 3)+1);
+    tt_int_op(mi->histogram[2], OP_EQ, 0);
+  }
+
+  /* 4. Test remove closest
+   *    a. Closest lower
+   *    b. Closest higher
+   *    c. Closest 0
+   *    d. Closest Infinity
+   */
+  circpad_machine_setup_tokens(mi);
+  tt_int_op(mi->histogram[2], OP_EQ, 2);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1, 0);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1, 0);
+  tt_int_op(mi->histogram[2], OP_EQ, 0);
+  tt_int_op(mi->histogram[3], OP_EQ, 2);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1, 0);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1, 0);
+  tt_int_op(mi->histogram[3], OP_EQ, 0);
+  tt_int_op(mi->histogram[0], OP_EQ, 1);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1, 0);
+  tt_int_op(mi->histogram[0], OP_EQ, 0);
+  tt_int_op(mi->histogram[4], OP_EQ, 2);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 2)+1, 0);
+  tt_int_op(mi->histogram[4], OP_EQ, 2);
+
+  /* 5. Test remove closest usec
+   *    a. Closest 0
+   *    b. Closest lower (below midpoint)
+   *    c. Closest higher (above midpoint)
+   *    d. Closest Infinity
+   */
+  circpad_machine_setup_tokens(mi);
+
+  tt_int_op(mi->histogram[0], OP_EQ, 1);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 0)/3, 1);
+  tt_int_op(mi->histogram[0], OP_EQ, 0);
+  tt_int_op(mi->histogram[2], OP_EQ, 2);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 0)/3, 1);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 0)/3, 1);
+  tt_int_op(mi->histogram[2], OP_EQ, 0);
+  tt_int_op(mi->histogram[3], OP_EQ, 2);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 4), 1);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 4), 1);
+  tt_int_op(mi->histogram[3], OP_EQ, 0);
+  tt_int_op(mi->histogram[4], OP_EQ, 2);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 4), 1);
+  circpad_machine_remove_closest_token(mi,
+         circpad_histogram_bin_to_usec(mi, 4), 1);
+  tt_int_op(mi->histogram[4], OP_EQ, 2);
+
+  // XXX: Need more coverage of the actual usec branches
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  monotime_disable_test_mocking();
+  tor_free(circ_client_machine.states);
+}
+
+void
+test_circuitpadding_wronghop(void *arg)
+{
+  /**
+   * Test plan:
+   * 1. Padding sent from hop 1 and 3 to client
+   * 2. Send negotiated from hop 1 and 3 to client
+   * 3. Garbled negotiated cell
+   * 4. Padding negotiate sent to client
+   * 5. Send negotiate stop command for unknown machine
+   * 6. Send negotiated to relay
+   * 7. Garbled padding negotiate cell
+   */
+  (void)arg;
+  uint32_t read_bw = 0, overhead_bw = 0;
+  cell_t cell;
+  signed_error_t ret;
+  origin_circuit_t *orig_client;
+
+  MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock);
+
+  client_side = (circuit_t *)origin_circuit_new();
+  dummy_channel.cmux = circuitmux_alloc();
+  relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel,
+                                            &dummy_channel);
+  orig_client = TO_ORIGIN_CIRCUIT(client_side);
+
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  nodes_init();
+
+  monotime_init();
+  monotime_enable_test_mocking();
+  monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  curr_mocked_time = 1*TOR_NSEC_PER_USEC;
+
+  timers_initialize();
+  circpad_machines_init();
+
+  MOCK(node_get_by_id,
+       node_get_by_id_mock);
+
+  MOCK(circuit_package_relay_cell,
+       circuit_package_relay_cell_mock);
+
+  /* Build three hops */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* verify padding was negotiated */
+  tt_ptr_op(relay_side->padding_machine[0], OP_NE, NULL);
+  tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL);
+
+  /* verify echo was sent */
+  tt_int_op(n_relay_cells, OP_EQ, 1);
+  tt_int_op(n_client_cells, OP_EQ, 1);
+
+  read_bw = orig_client->n_delivered_read_circ_bw;
+  overhead_bw = orig_client->n_overhead_read_circ_bw;
+
+  /* 1. Test padding from first and third hop */
+  circpad_deliver_recognized_relay_cell_events(client_side,
+              RELAY_COMMAND_DROP,
+              TO_ORIGIN_CIRCUIT(client_side)->cpath);
+  tt_int_op(read_bw, OP_EQ,
+            orig_client->n_delivered_read_circ_bw);
+  tt_int_op(overhead_bw, OP_EQ,
+            orig_client->n_overhead_read_circ_bw);
+
+  circpad_deliver_recognized_relay_cell_events(client_side,
+              RELAY_COMMAND_DROP,
+              TO_ORIGIN_CIRCUIT(client_side)->cpath->next->next);
+  tt_int_op(read_bw, OP_EQ,
+            orig_client->n_delivered_read_circ_bw);
+  tt_int_op(overhead_bw, OP_EQ,
+            orig_client->n_overhead_read_circ_bw);
+
+  circpad_deliver_recognized_relay_cell_events(client_side,
+              RELAY_COMMAND_DROP,
+              TO_ORIGIN_CIRCUIT(client_side)->cpath->next);
+  tt_int_op(read_bw, OP_EQ,
+            orig_client->n_delivered_read_circ_bw);
+  tt_int_op(overhead_bw, OP_LT,
+            orig_client->n_overhead_read_circ_bw);
+
+  /* 2. Test padding negotiated not handled from hops 1,3 */
+  ret = circpad_handle_padding_negotiated(client_side, &cell,
+          TO_ORIGIN_CIRCUIT(client_side)->cpath);
+  tt_int_op(ret, OP_EQ, -1);
+
+  ret = circpad_handle_padding_negotiated(client_side, &cell,
+          TO_ORIGIN_CIRCUIT(client_side)->cpath->next->next);
+  tt_int_op(ret, OP_EQ, -1);
+
+  /* 3. Garbled negotiated cell */
+  memset(&cell, 255, sizeof(cell));
+  ret = circpad_handle_padding_negotiated(client_side, &cell,
+          TO_ORIGIN_CIRCUIT(client_side)->cpath->next);
+  tt_int_op(ret, OP_EQ, -1);
+
+  /* 4. Test that negotiate is dropped at origin */
+  read_bw = orig_client->n_delivered_read_circ_bw;
+  overhead_bw = orig_client->n_overhead_read_circ_bw;
+  relay_send_command_from_edge(0, relay_side,
+                               RELAY_COMMAND_PADDING_NEGOTIATE,
+                               (void*)cell.payload,
+                               (size_t)3, NULL);
+  tt_int_op(read_bw, OP_EQ,
+            orig_client->n_delivered_read_circ_bw);
+  tt_int_op(overhead_bw, OP_EQ,
+            orig_client->n_overhead_read_circ_bw);
+
+  tt_int_op(n_relay_cells, OP_EQ, 2);
+  tt_int_op(n_client_cells, OP_EQ, 1);
+
+  /* 5. Test that asking to stop the wrong machine does nothing */
+  circpad_negotiate_padding(TO_ORIGIN_CIRCUIT(client_side),
+                            255, 2, CIRCPAD_COMMAND_STOP);
+  tt_ptr_op(client_side->padding_machine[0], OP_NE, NULL);
+  tt_ptr_op(client_side->padding_info[0], OP_NE, NULL);
+  tt_ptr_op(relay_side->padding_machine[0], OP_NE, NULL);
+  tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL);
+  tt_int_op(n_relay_cells, OP_EQ, 3);
+  tt_int_op(n_client_cells, OP_EQ, 2);
+
+  /* 6. Sending negotiated command to relay does nothing */
+  ret = circpad_handle_padding_negotiated(relay_side, &cell, NULL);
+  tt_int_op(ret, OP_EQ, -1);
+
+  /* 7. Test garbled negotated cell (bad command 255) */
+  memset(&cell, 0, sizeof(cell));
+  ret = circpad_handle_padding_negotiate(relay_side, &cell);
+  tt_int_op(ret, OP_EQ, -1);
+  tt_int_op(n_client_cells, OP_EQ, 2);
+
+  /* Test 2: Test no padding */
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  free_fake_orcirc(relay_side);
+
+  client_side = (circuit_t *)origin_circuit_new();
+  relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel,
+                                            &dummy_channel);
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 0);
+
+  /* verify no padding was negotiated */
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+  tt_int_op(n_relay_cells, OP_EQ, 3);
+  tt_int_op(n_client_cells, OP_EQ, 2);
+
+  /* verify no echo was sent */
+  tt_int_op(n_relay_cells, OP_EQ, 3);
+  tt_int_op(n_client_cells, OP_EQ, 2);
+
+  /* Finish circuit */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* Spoof padding negotiated on circuit with no padding */
+  circpad_padding_negotiated(relay_side,
+                             CIRCPAD_MACHINE_CIRC_SETUP,
+                             CIRCPAD_COMMAND_START,
+                             CIRCPAD_RESPONSE_OK);
+
+  /* verify no padding was negotiated */
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+
+  circpad_padding_negotiated(relay_side,
+                             CIRCPAD_MACHINE_CIRC_SETUP,
+                             CIRCPAD_COMMAND_START,
+                             CIRCPAD_RESPONSE_ERR);
+
+  /* verify no padding was negotiated */
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  free_fake_orcirc(relay_side);
+  circuitmux_detach_all_circuits(dummy_channel.cmux, NULL);
+  circuitmux_free(dummy_channel.cmux);
+  monotime_disable_test_mocking();
+  UNMOCK(node_get_by_id);
+  UNMOCK(circuit_package_relay_cell);
+  UNMOCK(circuitmux_attach_circuit);
+  nodes_free();
+}
+
+void
+test_circuitpadding_negotiation(void *arg)
+{
+  /**
+   * Test plan:
+   * 1. Test circuit where padding is supported by middle
+   *    a. Make sure padding negotiation is sent
+   *    b. Test padding negotiation delivery and parsing
+   * 2. Test circuit where padding is unsupported by middle
+   *    a. Make sure padding negotiation is not sent
+   * 3. Test failure to negotiate a machine due to desync.
+   */
+  (void)arg;
+
+  MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock);
+
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  dummy_channel.cmux = circuitmux_alloc();
+  relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel));
+
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  nodes_init();
+
+  monotime_init();
+  monotime_enable_test_mocking();
+  monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  curr_mocked_time = 1*TOR_NSEC_PER_USEC;
+
+  timers_initialize();
+  circpad_machines_init();
+
+  MOCK(node_get_by_id,
+       node_get_by_id_mock);
+
+  MOCK(circuit_package_relay_cell,
+       circuit_package_relay_cell_mock);
+
+  /* Build two hops */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* verify padding was negotiated */
+  tt_ptr_op(relay_side->padding_machine[0], OP_NE, NULL);
+  tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL);
+
+  /* verify echo was sent */
+  tt_int_op(n_relay_cells, OP_EQ, 1);
+  tt_int_op(n_client_cells, OP_EQ, 1);
+
+  /* Finish circuit */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* Test 2: Test no padding */
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  free_fake_orcirc(relay_side);
+
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel));
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 0);
+
+  /* verify no padding was negotiated */
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+  tt_int_op(n_relay_cells, OP_EQ, 1);
+  tt_int_op(n_client_cells, OP_EQ, 1);
+
+  /* verify no echo was sent */
+  tt_int_op(n_relay_cells, OP_EQ, 1);
+  tt_int_op(n_client_cells, OP_EQ, 1);
+
+  /* Finish circuit */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* Force negotiate padding. */
+  circpad_negotiate_padding(TO_ORIGIN_CIRCUIT(client_side),
+                            CIRCPAD_MACHINE_CIRC_SETUP,
+                            2, CIRCPAD_COMMAND_START);
+
+  /* verify no padding was negotiated */
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+
+  /* verify no echo was sent */
+  tt_int_op(n_relay_cells, OP_EQ, 1);
+  tt_int_op(n_client_cells, OP_EQ, 1);
+
+  /* 3. Test failure to negotiate a machine due to desync */
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  free_fake_orcirc(relay_side);
+
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel));
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  SMARTLIST_FOREACH(relay_padding_machines,
+          circpad_machine_spec_t *,
+          m, tor_free(m->states); tor_free(m));
+  smartlist_free(relay_padding_machines);
+  relay_padding_machines = smartlist_new();
+
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* verify echo was sent */
+  tt_int_op(n_client_cells, OP_EQ, 2);
+  tt_int_op(n_relay_cells, OP_EQ, 2);
+
+  /* verify no padding was negotiated */
+  tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+  tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  free_fake_orcirc(relay_side);
+  circuitmux_detach_all_circuits(dummy_channel.cmux, NULL);
+  circuitmux_free(dummy_channel.cmux);
+  monotime_disable_test_mocking();
+  UNMOCK(node_get_by_id);
+  UNMOCK(circuit_package_relay_cell);
+  UNMOCK(circuitmux_attach_circuit);
+  nodes_free();
+}
+
+static void
+simulate_single_hop_extend(circuit_t *client, circuit_t *mid_relay,
+                           int padding)
+{
+  char whatevs_key[CPATH_KEY_MATERIAL_LEN];
+  char digest[DIGEST_LEN];
+  tor_addr_t addr;
+
+  // Pretend a non-padding cell was sent
+  circpad_cell_event_nonpadding_sent((circuit_t*)client);
+
+  // Receive extend cell at middle
+  circpad_cell_event_nonpadding_received((circuit_t*)mid_relay);
+
+  // Advance time a tiny bit so we can calculate an RTT
+  curr_mocked_time += 10 * TOR_NSEC_PER_MSEC;
+  monotime_coarse_set_mock_time_nsec(curr_mocked_time);
+  monotime_set_mock_time_nsec(curr_mocked_time);
+
+  // Receive extended cell at middle
+  circpad_cell_event_nonpadding_sent((circuit_t*)mid_relay);
+
+  // Receive extended cell at first hop
+  circpad_cell_event_nonpadding_received((circuit_t*)client);
+
+  // Add a hop to cpath
+  crypt_path_t *hop = tor_malloc_zero(sizeof(crypt_path_t));
+  onion_append_to_cpath(&TO_ORIGIN_CIRCUIT(client)->cpath, hop);
+
+  hop->magic = CRYPT_PATH_MAGIC;
+  hop->state = CPATH_STATE_OPEN;
+
+  // add an extend info to indicate if this node supports padding or not.
+  // (set the first byte of the digest for our mocked node_get_by_id)
+  digest[0] = padding;
+
+  hop->extend_info = extend_info_new(
+          padding ? "padding" : "non-padding",
+          digest, NULL, NULL, NULL,
+          &addr, padding);
+
+  circuit_init_cpath_crypto(hop, whatevs_key, sizeof(whatevs_key), 0, 0);
+
+  hop->package_window = circuit_initial_package_window();
+  hop->deliver_window = CIRCWINDOW_START;
+
+  // Signal that the hop was added
+  circpad_machine_event_circ_added_hop(TO_ORIGIN_CIRCUIT(client));
+}
+
+static circpad_machine_spec_t *
+helper_create_conditional_machine(void)
+{
+  circpad_machine_spec_t *ret =
+    tor_malloc_zero(sizeof(circpad_machine_spec_t));
+
+  /* Start, burst */
+  circpad_machine_states_init(ret, 2);
+
+  ret->states[CIRCPAD_STATE_START].
+      next_state[CIRCPAD_EVENT_PADDING_SENT] = CIRCPAD_STATE_BURST;
+
+  ret->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_PADDING_SENT] = CIRCPAD_STATE_BURST;
+
+  ret->states[CIRCPAD_STATE_BURST].
+      next_state[CIRCPAD_EVENT_LENGTH_COUNT] = CIRCPAD_STATE_END;
+
+  ret->states[CIRCPAD_STATE_BURST].token_removal =
+      CIRCPAD_TOKEN_REMOVAL_NONE;
+
+  ret->states[CIRCPAD_STATE_BURST].histogram_len = 3;
+  ret->states[CIRCPAD_STATE_BURST].start_usec = 0;
+  ret->states[CIRCPAD_STATE_BURST].range_usec = 1000000;
+  ret->states[CIRCPAD_STATE_BURST].histogram[0] = 6;
+  ret->states[CIRCPAD_STATE_BURST].histogram[1] = 0;
+  ret->states[CIRCPAD_STATE_BURST].histogram[1] = 0;
+  ret->states[CIRCPAD_STATE_BURST].histogram_total_tokens = 6;
+  ret->states[CIRCPAD_STATE_BURST].use_rtt_estimate = 0;
+  ret->states[CIRCPAD_STATE_BURST].length_includes_nonpadding = 1;
+
+  return ret;
+}
+
+static void
+helper_create_conditional_machines(void)
+{
+  circpad_machine_spec_t *add = helper_create_conditional_machine();
+  origin_padding_machines = smartlist_new();
+  relay_padding_machines = smartlist_new();
+
+  add->machine_num = 2;
+  add->is_origin_side = 1;
+  add->should_negotiate_end = 1;
+  add->target_hopnum = 2;
+
+  /* Let's have this one end after 4 packets */
+  add->states[CIRCPAD_STATE_BURST].length_dist.type = CIRCPAD_DIST_UNIFORM;
+  add->states[CIRCPAD_STATE_BURST].length_dist.param1 = 4;
+  add->states[CIRCPAD_STATE_BURST].length_dist.param2 = 4;
+  add->states[CIRCPAD_STATE_BURST].max_length = 4;
+
+  add->conditions.requires_vanguards = 0;
+  add->conditions.min_hops = 2;
+  add->conditions.state_mask = CIRCPAD_CIRC_BUILDING|
+           CIRCPAD_CIRC_NO_STREAMS|CIRCPAD_CIRC_HAS_RELAY_EARLY;
+  add->conditions.purpose_mask = CIRCPAD_PURPOSE_ALL;
+
+  smartlist_add(origin_padding_machines, add);
+
+  add = helper_create_conditional_machine();
+  add->machine_num = 3;
+  add->is_origin_side = 1;
+  add->should_negotiate_end = 1;
+  add->target_hopnum = 2;
+
+  /* Let's have this one end after 4 packets */
+  add->states[CIRCPAD_STATE_BURST].length_dist.type = CIRCPAD_DIST_UNIFORM;
+  add->states[CIRCPAD_STATE_BURST].length_dist.param1 = 4;
+  add->states[CIRCPAD_STATE_BURST].length_dist.param2 = 4;
+  add->states[CIRCPAD_STATE_BURST].max_length = 4;
+
+  add->conditions.requires_vanguards = 1;
+  add->conditions.min_hops = 3;
+  add->conditions.state_mask = CIRCPAD_CIRC_OPENED|
+           CIRCPAD_CIRC_STREAMS|CIRCPAD_CIRC_HAS_NO_RELAY_EARLY;
+  add->conditions.purpose_mask = CIRCPAD_PURPOSE_ALL;
+  smartlist_add(origin_padding_machines, add);
+
+  add = helper_create_conditional_machine();
+  add->machine_num = 2;
+  smartlist_add(relay_padding_machines, add);
+
+  add = helper_create_conditional_machine();
+  add->machine_num = 3;
+  smartlist_add(relay_padding_machines, add);
+}
+
+void
+test_circuitpadding_conditions(void *arg)
+{
+  /**
+   * Test plan:
+   *  0. Make a few origin and client machines with diff conditions
+   *     * vanguards, purposes, has_opened circs, no relay early
+   *     * Client side should_negotiate_end
+   *     * Length limits
+   *  1. Test STATE_END transitions
+   *  2. Test new machine after end with same conditions
+   *  3. Test new machine due to changed conditions
+   *     * Esp: built event, no relay early, no streams
+   * XXX: Diff test:
+   *  1. Test STATE_END with pending timers
+   *  2. Test marking a circuit before padding callback fires
+   *  3. Test freeing a circuit before padding callback fires
+   */
+  (void)arg;
+  MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock);
+
+  nodes_init();
+  dummy_channel.cmux = circuitmux_alloc();
+  relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel,
+                                            &dummy_channel);
+  client_side = (circuit_t *)origin_circuit_new();
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  monotime_init();
+  monotime_enable_test_mocking();
+  monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  curr_mocked_time = 1*TOR_NSEC_PER_USEC;
+
+  timers_initialize();
+  helper_create_conditional_machines();
+
+  MOCK(circuit_package_relay_cell,
+       circuit_package_relay_cell_mock);
+  MOCK(node_get_by_id,
+       node_get_by_id_mock);
+
+  /* Simulate extend. This should result in the original machine getting
+   * added, since the circuit is not built */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* Verify that machine #2 is added */
+  tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2);
+  tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2);
+
+  /* Deliver a padding cell to the client, to trigger burst state */
+  circpad_cell_event_padding_sent(client_side);
+
+  /* This should have trigger length shutdown condition on client.. */
+  tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+
+  /* Verify machine is gone from both sides */
+  tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+
+  /* Send another event.. verify machine gets re-added properly
+   * (test race with shutdown) */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2);
+  tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2);
+
+  TO_ORIGIN_CIRCUIT(client_side)->p_streams = 0;
+  circpad_machine_event_circ_has_no_streams(TO_ORIGIN_CIRCUIT(client_side));
+  tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2);
+  tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2);
+
+  /* Now make the circuit opened and send built event */
+  TO_ORIGIN_CIRCUIT(client_side)->has_opened = 1;
+  circpad_machine_event_circ_built(TO_ORIGIN_CIRCUIT(client_side));
+  tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2);
+  tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2);
+
+  TO_ORIGIN_CIRCUIT(client_side)->remaining_relay_early_cells = 0;
+  circpad_machine_event_circ_has_no_relay_early(
+          TO_ORIGIN_CIRCUIT(client_side));
+  tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 2);
+  tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 2);
+
+  get_options_mutable()->HSLayer2Nodes = (void*)1;
+  TO_ORIGIN_CIRCUIT(client_side)->p_streams = (void*)1;
+  circpad_machine_event_circ_has_streams(TO_ORIGIN_CIRCUIT(client_side));
+
+  /* Verify different machine is added */
+  tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 3);
+  tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 3);
+
+  /* Hold off on negotiated */
+  deliver_negotiated = 0;
+
+  /* Deliver a padding cell to the client, to trigger burst state */
+  circpad_cell_event_padding_sent(client_side);
+
+  /* This should have trigger length shutdown condition on client
+   * but not the response for the padding machine */
+  tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_NE, NULL);
+
+  /* Verify machine is gone from the relay (but negotiated not back yet */
+  tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+
+  /* Add another hop and verify it's back */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  tt_int_op(client_side->padding_machine[0]->machine_num, OP_EQ, 3);
+  tt_int_op(relay_side->padding_machine[0]->machine_num, OP_EQ, 3);
+
+  tt_ptr_op(client_side->padding_info[0], OP_NE, NULL);
+  tt_ptr_op(relay_side->padding_info[0], OP_NE, NULL);
+
+ done:
+  /* XXX: Free everything */
+  return;
+}
+
+void
+test_circuitpadding_circuitsetup_machine(void *arg)
+{
+  /**
+   * Test case plan:
+   *
+   * 1. Simulate a normal circuit setup pattern
+   *    a. Application traffic
+   *
+   * FIXME: This should focus more on exercising the machine
+   * features rather than actual traffic patterns. For example,
+   * test cancellation and bins empty/refill
+   */
+  (void)arg;
+
+  MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock);
+
+  dummy_channel.cmux = circuitmux_alloc();
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  relay_side = TO_CIRCUIT(new_fake_orcirc(&dummy_channel, &dummy_channel));
+
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+
+  nodes_init();
+
+  monotime_init();
+  monotime_enable_test_mocking();
+  monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  curr_mocked_time = 1*TOR_NSEC_PER_USEC;
+
+  timers_initialize();
+  circpad_machines_init();
+
+  MOCK(circuit_package_relay_cell,
+       circuit_package_relay_cell_mock);
+  MOCK(node_get_by_id,
+       node_get_by_id_mock);
+
+  /* Test case #1: Build a 3 hop circuit, then wait and let pad */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  tt_int_op(n_client_cells, OP_EQ, 1);
+  tt_int_op(n_relay_cells, OP_EQ, 1);
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+                CIRCPAD_STATE_BURST);
+  tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ,
+          CIRCPAD_STATE_BURST);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  tt_int_op(relay_side->padding_info[0]->is_padding_timer_scheduled,
+            OP_EQ, 0);
+  timers_advance_and_run(2000);
+  tt_int_op(n_client_cells, OP_EQ, 2);
+  tt_int_op(n_relay_cells, OP_EQ, 1);
+
+  tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ,
+              CIRCPAD_STATE_GAP);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  timers_advance_and_run(5000);
+  tt_int_op(n_client_cells, OP_EQ, 2);
+  tt_int_op(n_relay_cells, OP_EQ, 2);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  timers_advance_and_run(2000);
+  tt_int_op(n_client_cells, OP_EQ, 3);
+  tt_int_op(n_relay_cells, OP_EQ, 2);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  timers_advance_and_run(5000);
+  tt_int_op(n_client_cells, OP_EQ, 3);
+  tt_int_op(n_relay_cells, OP_EQ, 3);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  timers_advance_and_run(2000);
+  tt_int_op(n_client_cells, OP_EQ, 4);
+  tt_int_op(n_relay_cells, OP_EQ, 3);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  timers_advance_and_run(5000);
+  tt_int_op(n_client_cells, OP_EQ, 4);
+  tt_int_op(n_relay_cells, OP_EQ, 4);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  timers_advance_and_run(2000);
+  tt_int_op(n_client_cells, OP_EQ, 5);
+  tt_int_op(n_relay_cells, OP_EQ, 4);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  timers_advance_and_run(5000);
+  tt_int_op(n_client_cells, OP_EQ, 5);
+  tt_int_op(n_relay_cells, OP_EQ, 5);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  timers_advance_and_run(2000);
+  tt_int_op(n_client_cells, OP_EQ, 6);
+  tt_int_op(n_relay_cells, OP_EQ, 5);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  timers_advance_and_run(5000);
+  tt_int_op(n_client_cells, OP_EQ, 6);
+  tt_int_op(n_relay_cells, OP_EQ, 6);
+
+  tt_int_op(client_side->padding_info[0]->current_state,
+            OP_EQ, CIRCPAD_STATE_END);
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  tt_int_op(relay_side->padding_info[0]->current_state,
+            OP_EQ, CIRCPAD_STATE_GAP);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+
+  /* Verify we can't schedule padding in END state */
+  circpad_decision_t ret =
+      circpad_machine_schedule_padding(client_side->padding_info[0]);
+  tt_int_op(ret, OP_EQ, CIRCPAD_STATE_UNCHANGED);
+
+  /* Simulate application traffic */
+  circpad_cell_event_nonpadding_sent(client_side);
+  circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_OUT);
+  circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_IN);
+  circpad_deliver_recognized_relay_cell_events(client_side, RELAY_COMMAND_DATA,
+                                  TO_ORIGIN_CIRCUIT(client_side)->cpath->next);
+
+  tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+
+  tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+  tt_int_op(n_client_cells, OP_EQ, 6);
+  tt_int_op(n_relay_cells, OP_EQ, 7);
+
+  // Test timer cancellation
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  timers_advance_and_run(5000);
+  circpad_cell_event_padding_received(client_side);
+
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+                CIRCPAD_STATE_BURST);
+  tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ,
+          CIRCPAD_STATE_GAP);
+
+  tt_int_op(n_client_cells, OP_EQ, 8);
+  tt_int_op(n_relay_cells, OP_EQ, 8);
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+
+  /* Test timer cancel due to state rules */
+  circpad_cell_event_nonpadding_sent(client_side);
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_EQ, 0);
+  circpad_cell_event_padding_received(client_side);
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+
+  /* Simulate application traffic to cancel timer */
+  circpad_cell_event_nonpadding_sent(client_side);
+  circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_OUT);
+  circpad_deliver_unrecognized_cell_events(relay_side, CELL_DIRECTION_IN);
+  circpad_deliver_recognized_relay_cell_events(client_side, RELAY_COMMAND_DATA,
+                                  TO_ORIGIN_CIRCUIT(client_side)->cpath->next);
+
+  tt_ptr_op(client_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(client_side->padding_machine[0], OP_EQ, NULL);
+
+  tt_ptr_op(relay_side->padding_info[0], OP_EQ, NULL);
+  tt_ptr_op(relay_side->padding_machine[0], OP_EQ, NULL);
+
+  /* No cells sent, except negotiate end from relay */
+  tt_int_op(n_client_cells, OP_EQ, 8);
+  tt_int_op(n_relay_cells, OP_EQ, 9);
+
+  /* Test mark for close and free */
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  timers_advance_and_run(5000);
+  circpad_cell_event_padding_received(client_side);
+
+  tt_int_op(n_client_cells, OP_EQ, 10);
+  tt_int_op(n_relay_cells, OP_EQ, 10);
+
+  tt_int_op(client_side->padding_info[0]->current_state, OP_EQ,
+                CIRCPAD_STATE_BURST);
+  tt_int_op(relay_side->padding_info[0]->current_state, OP_EQ,
+          CIRCPAD_STATE_GAP);
+
+  tt_int_op(client_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  tt_int_op(relay_side->padding_info[0]->padding_scheduled_at_usec,
+            OP_NE, 0);
+  circuit_mark_for_close(client_side, END_CIRC_REASON_FLAG_REMOTE);
+  free_fake_orcirc(relay_side);
+  timers_advance_and_run(5000);
+
+  /* No cells sent */
+  tt_int_op(n_client_cells, OP_EQ, 10);
+  tt_int_op(n_relay_cells, OP_EQ, 10);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+
+  circuitmux_detach_all_circuits(dummy_channel.cmux, NULL);
+  circuitmux_free(dummy_channel.cmux);
+  timers_shutdown();
+  monotime_disable_test_mocking();
+  UNMOCK(circuit_package_relay_cell);
+  UNMOCK(circuitmux_attach_circuit);
+
+  return;
+}
+
+/** Helper function: Initializes a padding machine where every state uses the
+ *  uniform probability distribution.  */
+static void
+helper_circpad_circ_distribution_machine_setup(int min, int max)
+{
+  circpad_machine_states_init(&circ_client_machine, 7);
+
+  circpad_state_t *zero_st = &circ_client_machine.states[0];
+  zero_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 1;
+  zero_st->iat_dist.type = CIRCPAD_DIST_UNIFORM;
+  zero_st->iat_dist.param1 = min;
+  zero_st->iat_dist.param2 = max;
+  zero_st->start_usec = min;
+  zero_st->range_usec = max;
+
+  circpad_state_t *first_st = &circ_client_machine.states[1];
+  first_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 2;
+  first_st->iat_dist.type = CIRCPAD_DIST_LOGISTIC;
+  first_st->iat_dist.param1 = min;
+  first_st->iat_dist.param2 = max;
+  first_st->start_usec = min;
+  first_st->range_usec = max;
+
+  circpad_state_t *second_st = &circ_client_machine.states[2];
+  second_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 3;
+  second_st->iat_dist.type = CIRCPAD_DIST_LOG_LOGISTIC;
+  second_st->iat_dist.param1 = min;
+  second_st->iat_dist.param2 = max;
+  second_st->start_usec = min;
+  second_st->range_usec = max;
+
+  circpad_state_t *third_st = &circ_client_machine.states[3];
+  third_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 4;
+  third_st->iat_dist.type = CIRCPAD_DIST_GEOMETRIC;
+  third_st->iat_dist.param1 = min;
+  third_st->iat_dist.param2 = max;
+  third_st->start_usec = min;
+  third_st->range_usec = max;
+
+  circpad_state_t *fourth_st = &circ_client_machine.states[4];
+  fourth_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 5;
+  fourth_st->iat_dist.type = CIRCPAD_DIST_WEIBULL;
+  fourth_st->iat_dist.param1 = min;
+  fourth_st->iat_dist.param2 = max;
+  fourth_st->start_usec = min;
+  fourth_st->range_usec = max;
+
+  circpad_state_t *fifth_st = &circ_client_machine.states[5];
+  fifth_st->next_state[CIRCPAD_EVENT_NONPADDING_RECV] = 6;
+  fifth_st->iat_dist.type = CIRCPAD_DIST_PARETO;
+  fifth_st->iat_dist.param1 = min;
+  fifth_st->iat_dist.param2 = max;
+  fifth_st->start_usec = min;
+  fifth_st->range_usec = max;
+}
+
+/** Simple test that the padding delays sampled from a uniform distribution
+ *  actually faill within the uniform distribution range. */
+/* TODO: Upgrade this test so that each state tests a different prob
+ * distribution */
+static void
+test_circuitpadding_sample_distribution(void *arg)
+{
+  circpad_machine_state_t *mi;
+  int n_samples;
+  int n_states;
+
+  (void) arg;
+
+  /* mock this function so that we dont actually schedule any padding */
+  MOCK(circpad_machine_schedule_padding,
+       circpad_machine_schedule_padding_mock);
+
+  /* Initialize a machine with multiple probability distributions that should
+   * return values between 0 and 5 */
+  circpad_machines_init();
+  helper_circpad_circ_distribution_machine_setup(0, 10);
+
+  /* Initialize machine and circuits */
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(client_side, 0);
+  mi = client_side->padding_info[0];
+
+  /* For every state, sample a bunch of values from the distribution and ensure
+   * they fall within range. */
+  for (n_states = 0 ; n_states < 6; n_states++) {
+    /* Make sure we in the right state */
+    tt_int_op(client_side->padding_info[0]->current_state, OP_EQ, n_states);
+
+    for (n_samples = 0; n_samples < 100; n_samples++) {
+      circpad_delay_t delay = circpad_machine_sample_delay(mi);
+      tt_int_op(delay, OP_GE, 0);
+      tt_int_op(delay, OP_LE, 10);
+    }
+
+    /* send a non-padding cell to move to the next machine state */
+    circpad_cell_event_nonpadding_received((circuit_t*)client_side);
+  }
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+  UNMOCK(circpad_machine_schedule_padding);
+}
+
+static circpad_decision_t
+circpad_machine_spec_transition_mock(circpad_machine_state_t *mi,
+                                circpad_event_t event)
+{
+  (void) mi;
+  (void) event;
+
+  return CIRCPAD_STATE_UNCHANGED;
+}
+
+/* Test per-machine padding rate limits */
+static void
+test_circuitpadding_machine_rate_limiting(void *arg)
+{
+  (void) arg;
+  bool retval;
+  circpad_machine_state_t *mi;
+  int i;
+
+  /* Ignore machine transitions for the purposes of this function, we only
+   * really care about padding counts */
+  MOCK(circpad_machine_spec_transition, circpad_machine_spec_transition_mock);
+  MOCK(circpad_send_command_to_hop, circpad_send_command_to_hop_mock);
+
+  /* Setup machine and circuits */
+  client_side = TO_CIRCUIT(origin_circuit_new());
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  helper_create_basic_machine();
+  client_side->padding_machine[0] = &circ_client_machine;
+  client_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(client_side, 0);
+  mi = client_side->padding_info[0];
+  /* Set up the machine info so that we can get through the basic functions */
+  mi->state_length = CIRCPAD_STATE_LENGTH_INFINITE;
+
+  /* First we are going to test the per-machine rate limits */
+  circ_client_machine.max_padding_percent = 50;
+  circ_client_machine.allowed_padding_count = 100;
+
+  /* Check padding limit, should be fine since we haven't sent anything yet. */
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 0);
+
+  /* Send 99 padding cells which is below circpad_global_allowed_cells=100, so
+   * the rate limit will not trigger */
+  for (i=0;i<99;i++) {
+    circpad_send_padding_cell_for_callback(mi);
+  }
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 0);
+
+  /* Now send another padding cell to pass circpad_global_allowed_cells=100,
+     and see that the limit will trigger */
+  circpad_send_padding_cell_for_callback(mi);
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 1);
+
+  retval = circpad_machine_schedule_padding(mi);
+  tt_int_op(retval, OP_EQ, CIRCPAD_STATE_UNCHANGED);
+
+  /* Cover wrap */
+  for (;i<UINT16_MAX;i++) {
+    circpad_send_padding_cell_for_callback(mi);
+  }
+  tt_int_op(mi->padding_sent, OP_EQ, UINT16_MAX/2+1);
+
+  tt_ptr_op(client_side->padding_info[0], OP_EQ, mi);
+  for (i=0;i<UINT16_MAX;i++) {
+    circpad_cell_event_nonpadding_sent(client_side);
+  }
+
+  tt_int_op(mi->nonpadding_sent, OP_EQ, UINT16_MAX/2);
+  tt_int_op(mi->padding_sent, OP_EQ, UINT16_MAX/4+1);
+
+ done:
+  free_fake_origin_circuit(TO_ORIGIN_CIRCUIT(client_side));
+}
+
+/* Test global padding rate limits */
+static void
+test_circuitpadding_global_rate_limiting(void *arg)
+{
+  (void) arg;
+  bool retval;
+  circpad_machine_state_t *mi;
+  int i;
+
+  /* Ignore machine transitions for the purposes of this function, we only
+   * really care about padding counts */
+  MOCK(circpad_machine_spec_transition, circpad_machine_spec_transition_mock);
+  MOCK(circuitmux_attach_circuit, circuitmux_attach_circuit_mock);
+  MOCK(circuit_package_relay_cell,
+       circuit_package_relay_cell_mock);
+  MOCK(monotime_absolute_usec, mock_monotime_absolute_usec);
+
+  monotime_init();
+  monotime_enable_test_mocking();
+  monotime_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  monotime_coarse_set_mock_time_nsec(1*TOR_NSEC_PER_USEC);
+  curr_mocked_time = 1*TOR_NSEC_PER_USEC;
+  timers_initialize();
+
+  client_side = (circuit_t *)origin_circuit_new();
+  client_side->purpose = CIRCUIT_PURPOSE_C_GENERAL;
+  dummy_channel.cmux = circuitmux_alloc();
+
+  /* Setup machine and circuits */
+  relay_side = (circuit_t *)new_fake_orcirc(&dummy_channel, &dummy_channel);
+  relay_side->purpose = CIRCUIT_PURPOSE_OR;
+  helper_create_basic_machine();
+  relay_side->padding_machine[0] = &circ_client_machine;
+  relay_side->padding_info[0] =
+    circpad_circuit_machineinfo_new(relay_side, 0);
+  mi = relay_side->padding_info[0];
+  /* Set up the machine info so that we can get through the basic functions */
+  mi->state_length = CIRCPAD_STATE_LENGTH_INFINITE;
+
+  simulate_single_hop_extend(client_side, relay_side, 1);
+  simulate_single_hop_extend(client_side, relay_side, 1);
+
+  /* Now test the global limits by setting up the consensus */
+  networkstatus_t vote1;
+  vote1.net_params = smartlist_new();
+  smartlist_split_string(vote1.net_params,
+         "circpad_global_allowed_cells=100 circpad_global_max_padding_pct=50",
+                         NULL, 0, 0);
+  /* Register global limits with the padding subsystem */
+  circpad_new_consensus_params(&vote1);
+
+  /* Check padding limit, should be fine since we haven't sent anything yet. */
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 0);
+
+  /* Send 99 padding cells which is below circpad_global_allowed_cells=100, so
+   * the rate limit will not trigger */
+  for (i=0;i<99;i++) {
+    circpad_send_padding_cell_for_callback(mi);
+  }
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 0);
+
+  /* Now send another padding cell to pass circpad_global_allowed_cells=100,
+     and see that the limit will trigger */
+  circpad_send_padding_cell_for_callback(mi);
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 1);
+
+  retval = circpad_machine_schedule_padding(mi);
+  tt_int_op(retval, OP_EQ, CIRCPAD_STATE_UNCHANGED);
+
+  /* Now send 92 non-padding cells to get near the
+   * circpad_global_max_padding_pct=50 limit; in particular with 96 non-padding
+   * cells, the padding traffic is still 51% of total traffic so limit should
+   * trigger */
+  for (i=0;i<92;i++) {
+    circpad_cell_event_nonpadding_sent(relay_side);
+  }
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 1);
+
+  /* Send another non-padding cell to bring the padding traffic to 50% of total
+   * traffic and get past the limit */
+  circpad_cell_event_nonpadding_sent(relay_side);
+  retval = circpad_machine_reached_padding_limit(mi);
+  tt_int_op(retval, OP_EQ, 0);
+
+ done:
+  free_fake_orcirc(relay_side);
+  circuitmux_detach_all_circuits(dummy_channel.cmux, NULL);
+  circuitmux_free(dummy_channel.cmux);
+  SMARTLIST_FOREACH(vote1.net_params, char *, cp, tor_free(cp));
+  smartlist_free(vote1.net_params);
+}
+
+#define TEST_CIRCUITPADDING(name, flags) \
+    { #name, test_##name, (flags), NULL, NULL }
+
+struct testcase_t circuitpadding_tests[] = {
+  //TEST_CIRCUITPADDING(circuitpadding_circuitsetup_machine, 0),
+  TEST_CIRCUITPADDING(circuitpadding_tokens, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_negotiation, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_wronghop, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_circuitsetup_machine, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_conditions, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_rtt, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_sample_distribution, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_machine_rate_limiting, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_global_rate_limiting, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_token_removal_lower, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_token_removal_higher, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_closest_token_removal, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_closest_token_removal_usec, TT_FORK),
+  TEST_CIRCUITPADDING(circuitpadding_token_removal_exact, TT_FORK),
+  END_OF_TESTCASES
+};
+

+ 25 - 0
src/test/test_containers.c

@@ -96,6 +96,30 @@ test_container_smartlist_basic(void *arg)
   tor_free(v555);
   tor_free(v555);
 }
 }
 
 
+/** Test SMARTLIST_FOREACH_REVERSE_BEGIN loop macro */
+static void
+test_container_smartlist_foreach_reverse(void *arg)
+{
+  smartlist_t *sl = smartlist_new();
+  int i;
+
+  (void) arg;
+
+  /* Add integers to smartlist in increasing order */
+  for (i=0;i<100;i++) {
+    smartlist_add(sl, (void*)(uintptr_t)i);
+  }
+
+  /* Pop them out in reverse and test their value */
+  SMARTLIST_FOREACH_REVERSE_BEGIN(sl, void*, k) {
+    i--;
+    tt_ptr_op(k, OP_EQ, (void*)(uintptr_t)i);
+  } SMARTLIST_FOREACH_END(k);
+
+ done:
+  smartlist_free(sl);
+}
+
 /** Run unit tests for smartlist-of-strings functionality. */
 /** Run unit tests for smartlist-of-strings functionality. */
 static void
 static void
 test_container_smartlist_strings(void *arg)
 test_container_smartlist_strings(void *arg)
@@ -1281,6 +1305,7 @@ test_container_smartlist_strings_eq(void *arg)
 struct testcase_t container_tests[] = {
 struct testcase_t container_tests[] = {
   CONTAINER_LEGACY(smartlist_basic),
   CONTAINER_LEGACY(smartlist_basic),
   CONTAINER_LEGACY(smartlist_strings),
   CONTAINER_LEGACY(smartlist_strings),
+  CONTAINER_LEGACY(smartlist_foreach_reverse),
   CONTAINER_LEGACY(smartlist_overlap),
   CONTAINER_LEGACY(smartlist_overlap),
   CONTAINER_LEGACY(smartlist_digests),
   CONTAINER_LEGACY(smartlist_digests),
   CONTAINER_LEGACY(smartlist_join),
   CONTAINER_LEGACY(smartlist_join),

+ 1428 - 0
src/test/test_prob_distr.c

@@ -0,0 +1,1428 @@
+/* Copyright (c) 2018, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file test_prob_distr.c
+ * \brief Test probability distributions.
+ * \detail
+ *
+ * For each probability distribution we do two kinds of tests:
+ *
+ * a) We do numerical deterministic testing of their cdf/icdf/sf/isf functions
+ *    and the various relationships between them for each distribution. We also
+ *    do deterministic tests on their sampling functions. Test vectors for
+ *    these tests were computed from alternative implementations and were
+ *    eyeballed to make sure they make sense
+ *    (e.g. src/test/prob_distr_mpfr_ref.c computes logit(p) using GNU mpfr
+ *    with 200-bit precision and is then tested in test_logit_logistic()).
+ *
+ * b) We do stochastic hypothesis testing (G-test) to ensure that sampling from
+ *    the given distributions is distributed properly. The stochastic tests are
+ *    slow and their false positive rate is not well suited for CI, so they are
+ *    currently disabled-by-default and put into 'tests-slow'.
+ */
+
+#define PROB_DISTR_PRIVATE
+
+#include "orconfig.h"
+
+#include "test/test.h"
+
+#include "core/or/or.h"
+
+#include "lib/math/prob_distr.h"
+#include "lib/math/fp.h"
+#include "lib/crypt_ops/crypto_rand.h"
+
+#include <float.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+/**
+ * Return floor(d) converted to size_t, as a workaround for complaints
+ * under -Wbad-function-cast for (size_t)floor(d).
+ */
+static size_t
+floor_to_size_t(double d)
+{
+  double integral_d = floor(d);
+  return (size_t)integral_d;
+}
+
+/**
+ * Return ceil(d) converted to size_t, as a workaround for complaints
+ * under -Wbad-function-cast for (size_t)ceil(d).
+ */
+static size_t
+ceil_to_size_t(double d)
+{
+  double integral_d = ceil(d);
+  return (size_t)integral_d;
+}
+
+/*
+ * Geometric(p) distribution, supported on {1, 2, 3, ...}.
+ *
+ * Compute the probability mass function Geom(n; p) of the number of
+ * trials before the first success when success has probability p.
+ */
+static double
+logpmf_geometric(unsigned n, double p)
+{
+  /* This is actually a check against 1, but we do >= so that the compiler
+     does not raise a -Wfloat-equal */
+  if (p >= 1) {
+    if (n == 1)
+      return 0;
+    else
+      return -HUGE_VAL;
+  }
+  return (n - 1)*log1p(-p) + log(p);
+}
+
+/**
+ * Compute the logistic function, translated in output by 1/2:
+ * logistichalf(x) = logistic(x) - 1/2.  Well-conditioned on the entire
+ * real plane, with maximum condition number 1 at 0.
+ *
+ * This implementation gives relative error bounded by 5 eps.
+ */
+static double
+logistichalf(double x)
+{
+  /*
+   * Rewrite this with the identity
+   *
+   *  1/(1 + e^{-x}) - 1/2
+   *  = (1 - 1/2 - e^{-x}/2)/(1 + e^{-x})
+   *  = (1/2 - e^{-x}/2)/(1 + e^{-x})
+   *  = (1 - e^{-x})/[2 (1 + e^{-x})]
+   *  = -(e^{-x} - 1)/[2 (1 + e^{-x})],
+   *
+   * which we can evaluate by -expm1(-x)/[2 (1 + exp(-x))].
+   *
+   * Suppose exp has error d0, + has error d1, expm1 has error
+   * d2, and / has error d3, so we evaluate
+   *
+   *  -(1 + d2) (1 + d3) (e^{-x} - 1)
+   *    / [2 (1 + d1) (1 + (1 + d0) e^{-x})].
+   *
+   * In the denominator,
+   *
+   *  1 + (1 + d0) e^{-x}
+   *  = 1 + e^{-x} + d0 e^{-x}
+   *  = (1 + e^{-x}) (1 + d0 e^{-x}/(1 + e^{-x})),
+   *
+   * so the relative error of the numerator is
+   *
+   *  d' = d2 + d3 + d2 d3,
+   * and of the denominator,
+   *  d'' = d1 + d0 e^{-x}/(1 + e^{-x}) + d0 d1 e^{-x}/(1 + e^{-x})
+   *      = d1 + d0 L(-x) + d0 d1 L(-x),
+   *
+   * where L(-x) is logistic(-x).  By Lemma 1 the relative error
+   * of the quotient is bounded by
+   *
+   *  2|d2 + d3 + d2 d3 - d1 - d0 L(x) + d0 d1 L(x)|,
+   *
+   * Since 0 < L(x) < 1, this is bounded by
+   *
+   *  2|d2| + 2|d3| + 2|d2 d3| + 2|d1| + 2|d0| + 2|d0 d1|
+   *  <= 4 eps + 2 eps^2.
+   */
+  if (x < log(DBL_EPSILON/8)) {
+    /*
+     * Avoid overflow in e^{-x}.  When x < log(eps/4), we
+     * we further have x < logit(eps/4), so that
+     * logistic(x) < eps/4.  Hence the relative error of
+     * logistic(x) - 1/2 from -1/2 is bounded by eps/2, and
+     * so the relative error of -1/2 from logistic(x) - 1/2
+     * is bounded by eps.
+     */
+    return -0.5;
+  } else {
+    return -expm1(-x)/(2*(1 + exp(-x)));
+  }
+}
+
+/**
+ * Compute the log of the sum of the exps.  Caller should arrange the
+ * array in descending order to minimize error because I don't want to
+ * deal with using temporary space and the one caller in this file
+ * arranges that anyway.
+ *
+ * Warning: This implementation does not handle infinite or NaN inputs
+ * sensibly, because I don't need that here at the moment.  (NaN, or
+ * -inf and +inf together, should yield NaN; +inf and finite should
+ * yield +inf; otherwise all -inf should be ignored because exp(-inf) =
+ * 0.)
+ */
+static double
+logsumexp(double *A, size_t n)
+{
+  double maximum, sum;
+  size_t i;
+
+  if (n == 0)
+    return log(0);
+
+  maximum = A[0];
+  for (i = 1; i < n; i++) {
+    if (A[i] > maximum)
+      maximum = A[i];
+  }
+
+  sum = 0;
+  for (i = n; i --> 0;)
+    sum += exp(A[i] - maximum);
+
+  return log(sum) + maximum;
+}
+
+/**
+ * Compute log(1 - e^x).  Defined only for negative x so that e^x < 1.
+ * This is the complement of a probability in log space.
+ */
+static double
+log1mexp(double x)
+{
+
+  /*
+   * We want to compute log on [0, 1/2) but log1p on [1/2, +inf),
+   * so partition x at -log(2) = log(1/2).
+   */
+  if (-log(2) < x)
+    return log(-expm1(x));
+  else
+    return log1p(-exp(x));
+}
+
+/*
+ * Tests of numerical errors in computing logit, logistic, and the
+ * various cdfs, sfs, icdfs, and isfs.
+ */
+
+#define arraycount(A) (sizeof(A)/sizeof(A[0]))
+
+/** Return relative error between <b>actual</b> and <b>expected</b>.
+ *  Special cases: If <b>expected</b> is zero or infinite, return 1 if
+ *  <b>actual</b> is equal to <b>expected</b> and 0 if not, since the
+ *  usual notion of relative error is undefined but we only use this
+ *  for testing relerr(e, a) <= bound.  If either is NaN, return NaN,
+ *  which has the property that NaN <= bound is false no matter what
+ *  bound is.
+ *
+ *  Beware: if you test !(relerr(e, a) > bound), then then the result
+ *  is true when a is NaN because NaN > bound is false too.  See
+ *  CHECK_RELERR for correct use to decide when to report failure.
+ */
+static double
+relerr(double expected, double actual)
+{
+  /*
+   * To silence -Wfloat-equal, we have to test for equality using
+   * inequalities: we have (fabs(expected) <= 0) iff (expected == 0),
+   * and (actual <= expected && actual >= expected) iff actual ==
+   * expected whether expected is zero or infinite.
+   */
+  if (fabs(expected) <= 0 || tor_isinf(expected)) {
+    if (actual <= expected && actual >= expected)
+      return 0;
+    else
+      return 1;
+  } else {
+    return fabs((expected - actual)/expected);
+  }
+}
+
+/** Check that relative error of <b>expected</b> and <b>actual</b> is within
+ *  <b>relerr_bound</b>.  Caller must arrange to have i and relerr_bound in
+ *  scope.  */
+#define CHECK_RELERR(expected, actual) do {                                   \
+  double check_expected = (expected);                                         \
+  double check_actual = (actual);                                             \
+  const char *str_expected = #expected;                                       \
+  const char *str_actual = #actual;                                           \
+  double check_relerr = relerr(expected, actual);                             \
+  if (!(relerr(check_expected, check_actual) <= relerr_bound)) {              \
+    log_warn(LD_GENERAL, "%s:%d: case %u: relerr(%s=%.17e, %s=%.17e)"        \
+             " = %.17e > %.17e\n",                                            \
+             __func__, __LINE__, (unsigned) i,                                \
+             str_expected, check_expected,                                    \
+             str_actual, check_actual,                                        \
+             check_relerr, relerr_bound);                                     \
+    ok = false;                                                               \
+  }                                                                           \
+} while (0)
+
+/* Check that a <= b.
+ * Caller must arrange to have i in scope.  */
+#define CHECK_LE(a, b) do {                                                   \
+  double check_a = (a);                                                       \
+  double check_b = (b);                                                       \
+  const char *str_a = #a;                                                     \
+  const char *str_b = #b;                                                     \
+  if (!(check_a <= check_b)) {                                                \
+    log_warn(LD_GENERAL, "%s:%d: case %u: %s=%.17e > %s=%.17e\n",             \
+             __func__, __LINE__, (unsigned) i,                                \
+             str_a, check_a, str_b, check_b);                                 \
+    ok = false;                                                               \
+  }                                                                           \
+} while (0)
+
+/**
+ * Test the logit and logistic functions.  Confirm that they agree with
+ * the cdf, sf, icdf, and isf of the standard Logistic distribution.
+ * Confirm that the sampler for the standard logistic distribution maps
+ * [0, 1] into the right subinterval for the inverse transform, for
+ * this implementation.
+ */
+static void
+test_logit_logistic(void *arg)
+{
+  (void) arg;
+
+  static const struct {
+    double x;                   /* x = logit(p) */
+    double p;                   /* p = logistic(x) */
+    double phalf;               /* p - 1/2 = logistic(x) - 1/2 */
+  } cases[] = {
+    { -HUGE_VAL, 0, -0.5 },
+    { -1000, 0, -0.5 },
+    { -710, 4.47628622567513e-309, -0.5 },
+    { -708, 3.307553003638408e-308, -0.5 },
+    { -2, .11920292202211755, -.3807970779778824 },
+    { -1.0000001, .2689414017088022, -.23105859829119776 },
+    { -1, .2689414213699951, -.23105857863000487 },
+    { -0.9999999, .26894144103118883, -.2310585589688111 },
+    /* see src/test/prob_distr_mpfr_ref.c for computation */
+    { -4.000000000537333e-5, .49999, -1.0000000000010001e-5 },
+    { -4.000000000533334e-5, .49999, -.00001 },
+    { -4.000000108916878e-9, .499999999, -1.0000000272292198e-9 },
+    { -4e-9, .499999999, -1e-9 },
+    { -4e-16, .5, -1e-16 },
+    { -4e-300, .5, -1e-300 },
+    { 0, .5, 0 },
+    { 4e-300, .5, 1e-300 },
+    { 4e-16, .5, 1e-16 },
+    { 3.999999886872274e-9, .500000001, 9.999999717180685e-10 },
+    { 4e-9, .500000001, 1e-9 },
+    { 4.0000000005333336e-5, .50001, .00001 },
+    { 8.000042667076272e-3, .502, .002 },
+    { 0.9999999, .7310585589688111, .2310585589688111 },
+    { 1, .7310585786300049, .23105857863000487 },
+    { 1.0000001, .7310585982911977, .23105859829119774 },
+    { 2, .8807970779778823, .3807970779778824 },
+    { 708, 1, .5 },
+    { 710, 1, .5 },
+    { 1000, 1, .5 },
+    { HUGE_VAL, 1, .5 },
+  };
+  double relerr_bound = 3e-15; /* >10eps */
+  size_t i;
+  bool ok = true;
+
+  for (i = 0; i < arraycount(cases); i++) {
+    double x = cases[i].x;
+    double p = cases[i].p;
+    double phalf = cases[i].phalf;
+
+    /*
+     * cdf is logistic, icdf is logit, and symmetry for
+     * sf/isf.
+     */
+    CHECK_RELERR(logistic(x), cdf_logistic(x, 0, 1));
+    CHECK_RELERR(logistic(-x), sf_logistic(x, 0, 1));
+    CHECK_RELERR(logit(p), icdf_logistic(p, 0, 1));
+    CHECK_RELERR(-logit(p), isf_logistic(p, 0, 1));
+
+    CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x*2, 0, 2));
+    CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x*2, 0, 2));
+    CHECK_RELERR(icdf_logistic(p, 0, 1), icdf_logistic(p, 0, 2)/2);
+    CHECK_RELERR(isf_logistic(p, 0, 1), isf_logistic(p, 0, 2)/2);
+
+    CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x/2, 0, .5));
+    CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x/2, 0, .5));
+    CHECK_RELERR(icdf_logistic(p, 0, 1), icdf_logistic(p, 0,.5)*2);
+    CHECK_RELERR(isf_logistic(p, 0, 1), isf_logistic(p, 0, .5)*2);
+
+    CHECK_RELERR(cdf_logistic(x, 0, 1), cdf_logistic(x*2 + 1, 1, 2));
+    CHECK_RELERR(sf_logistic(x, 0, 1), sf_logistic(x*2 + 1, 1, 2));
+
+    /*
+     * For p near 0 and p near 1/2, the arithmetic of
+     * translating by 1 loses precision.
+     */
+    if (fabs(p) > DBL_EPSILON && fabs(p) < 0.4) {
+      CHECK_RELERR(icdf_logistic(p, 0, 1),
+          (icdf_logistic(p, 1, 2) - 1)/2);
+      CHECK_RELERR(isf_logistic(p, 0, 1),
+          (isf_logistic(p, 1, 2) - 1)/2);
+    }
+
+    CHECK_RELERR(p, logistic(x));
+    CHECK_RELERR(phalf, logistichalf(x));
+
+    /*
+     * On the interior floating-point numbers, either logit or
+     * logithalf had better give the correct answer.
+     *
+     * For probabilities near 0, we can get much finer resolution with
+     * logit, and for probabilities near 1/2, we can get much finer
+     * resolution with logithalf by representing them using p - 1/2.
+     *
+     * E.g., we can write -.00001 for phalf, and .49999 for p, but the
+     * difference 1/2 - .00001 gives 1.0000000000010001e-5 in binary64
+     * arithmetic.  So test logit(.49999) which should give the same
+     * answer as logithalf(-1.0000000000010001e-5), namely
+     * -4.000000000537333e-5, and also test logithalf(-.00001) which
+     * gives -4.000000000533334e-5 instead -- but don't expect
+     * logit(.49999) to give -4.000000000533334e-5 even though it looks
+     * like 1/2 - .00001.
+     *
+     * A naive implementation of logit will just use log(p/(1 - p)) and
+     * give the answer -4.000000000551673e-05 for .49999, which is
+     * wrong in a lot of digits, which happens because log is
+     * ill-conditioned near 1 and thus amplifies whatever relative
+     * error we made in computing p/(1 - p).
+     */
+    if ((0 < p && p < 1) || tor_isinf(x)) {
+      if (phalf >= p - 0.5 && phalf <= p - 0.5)
+        CHECK_RELERR(x, logit(p));
+      if (p >= 0.5 + phalf && p <= 0.5 + phalf)
+        CHECK_RELERR(x, logithalf(phalf));
+    }
+
+    CHECK_RELERR(-phalf, logistichalf(-x));
+    if (fabs(phalf) < 0.5 || tor_isinf(x))
+      CHECK_RELERR(-x, logithalf(-phalf));
+    if (p < 1 || tor_isinf(x)) {
+      CHECK_RELERR(1 - p, logistic(-x));
+      if (p > .75 || tor_isinf(x))
+        CHECK_RELERR(-x, logit(1 - p));
+    } else {
+      CHECK_LE(logistic(-x), 1e-300);
+    }
+  }
+
+  for (i = 0; i <= 100; i++) {
+    double p0 = (double)i/100;
+
+    CHECK_RELERR(logit(p0/(1 + M_E)), sample_logistic(0, 0, p0));
+    CHECK_RELERR(-logit(p0/(1 + M_E)), sample_logistic(1, 0, p0));
+    CHECK_RELERR(logithalf(p0*(0.5 - 1/(1 + M_E))),
+        sample_logistic(0, 1, p0));
+    CHECK_RELERR(-logithalf(p0*(0.5 - 1/(1 + M_E))),
+        sample_logistic(1, 1, p0));
+  }
+
+  if (!ok)
+    printf("fail logit/logistic / logistic cdf/sf\n");
+
+  tt_assert(ok);
+
+ done:
+  ;
+}
+
+/**
+ * Test the cdf, sf, icdf, and isf of the LogLogistic distribution.
+ */
+static void
+test_log_logistic(void *arg)
+{
+  (void) arg;
+
+  static const struct {
+    /* x is a point in the support of the LogLogistic distribution */
+    double x;
+    /* 'p' is the probability that a random variable X for a given LogLogistic
+     * probability ditribution will take value less-or-equal to x */
+    double p;
+    /* 'np' is the probability that a random variable X for a given LogLogistic
+     * probability distribution will take value greater-or-equal to x. */
+    double np;
+  } cases[] = {
+    { 0, 0, 1 },
+    { 1e-300, 1e-300, 1 },
+    { 1e-17, 1e-17, 1 },
+    { 1e-15, 1e-15, .999999999999999 },
+    { .1, .09090909090909091, .90909090909090909 },
+    { .25, .2, .8 },
+    { .5, .33333333333333333, .66666666666666667 },
+    { .75, .42857142857142855, .5714285714285714 },
+    { .9999, .49997499874993756, .5000250012500626 },
+    { .99999999, .49999999749999996, .5000000025 },
+    { .999999999999999, .49999999999999994, .5000000000000002 },
+    { 1, .5, .5 },
+  };
+  double relerr_bound = 3e-15;
+  size_t i;
+  bool ok = true;
+
+  for (i = 0; i < arraycount(cases); i++) {
+    double x = cases[i].x;
+    double p = cases[i].p;
+    double np = cases[i].np;
+
+    CHECK_RELERR(p, cdf_log_logistic(x, 1, 1));
+    CHECK_RELERR(p, cdf_log_logistic(x/2, .5, 1));
+    CHECK_RELERR(p, cdf_log_logistic(x*2, 2, 1));
+    CHECK_RELERR(p, cdf_log_logistic(sqrt(x), 1, 2));
+    CHECK_RELERR(p, cdf_log_logistic(sqrt(x)/2, .5, 2));
+    CHECK_RELERR(p, cdf_log_logistic(sqrt(x)*2, 2, 2));
+    if (2*sqrt(DBL_MIN) < x) {
+      CHECK_RELERR(p, cdf_log_logistic(x*x, 1, .5));
+      CHECK_RELERR(p, cdf_log_logistic(x*x/2, .5, .5));
+      CHECK_RELERR(p, cdf_log_logistic(x*x*2, 2, .5));
+    }
+
+    CHECK_RELERR(np, sf_log_logistic(x, 1, 1));
+    CHECK_RELERR(np, sf_log_logistic(x/2, .5, 1));
+    CHECK_RELERR(np, sf_log_logistic(x*2, 2, 1));
+    CHECK_RELERR(np, sf_log_logistic(sqrt(x), 1, 2));
+    CHECK_RELERR(np, sf_log_logistic(sqrt(x)/2, .5, 2));
+    CHECK_RELERR(np, sf_log_logistic(sqrt(x)*2, 2, 2));
+    if (2*sqrt(DBL_MIN) < x) {
+      CHECK_RELERR(np, sf_log_logistic(x*x, 1, .5));
+      CHECK_RELERR(np, sf_log_logistic(x*x/2, .5, .5));
+      CHECK_RELERR(np, sf_log_logistic(x*x*2, 2, .5));
+    }
+
+    CHECK_RELERR(np, cdf_log_logistic(1/x, 1, 1));
+    CHECK_RELERR(np, cdf_log_logistic(1/(2*x), .5, 1));
+    CHECK_RELERR(np, cdf_log_logistic(2/x, 2, 1));
+    CHECK_RELERR(np, cdf_log_logistic(1/sqrt(x), 1, 2));
+    CHECK_RELERR(np, cdf_log_logistic(1/(2*sqrt(x)), .5, 2));
+    CHECK_RELERR(np, cdf_log_logistic(2/sqrt(x), 2, 2));
+    if (2*sqrt(DBL_MIN) < x && x < 1/(2*sqrt(DBL_MIN))) {
+      CHECK_RELERR(np, cdf_log_logistic(1/(x*x), 1, .5));
+      CHECK_RELERR(np, cdf_log_logistic(1/(2*x*x), .5, .5));
+      CHECK_RELERR(np, cdf_log_logistic(2/(x*x), 2, .5));
+    }
+
+    CHECK_RELERR(p, sf_log_logistic(1/x, 1, 1));
+    CHECK_RELERR(p, sf_log_logistic(1/(2*x), .5, 1));
+    CHECK_RELERR(p, sf_log_logistic(2/x, 2, 1));
+    CHECK_RELERR(p, sf_log_logistic(1/sqrt(x), 1, 2));
+    CHECK_RELERR(p, sf_log_logistic(1/(2*sqrt(x)), .5, 2));
+    CHECK_RELERR(p, sf_log_logistic(2/sqrt(x), 2, 2));
+    if (2*sqrt(DBL_MIN) < x && x < 1/(2*sqrt(DBL_MIN))) {
+      CHECK_RELERR(p, sf_log_logistic(1/(x*x), 1, .5));
+      CHECK_RELERR(p, sf_log_logistic(1/(2*x*x), .5, .5));
+      CHECK_RELERR(p, sf_log_logistic(2/(x*x), 2, .5));
+    }
+
+    CHECK_RELERR(x, icdf_log_logistic(p, 1, 1));
+    CHECK_RELERR(x/2, icdf_log_logistic(p, .5, 1));
+    CHECK_RELERR(x*2, icdf_log_logistic(p, 2, 1));
+    CHECK_RELERR(x, icdf_log_logistic(p, 1, 1));
+    CHECK_RELERR(sqrt(x)/2, icdf_log_logistic(p, .5, 2));
+    CHECK_RELERR(sqrt(x)*2, icdf_log_logistic(p, 2, 2));
+    CHECK_RELERR(sqrt(x), icdf_log_logistic(p, 1, 2));
+    CHECK_RELERR(x*x/2, icdf_log_logistic(p, .5, .5));
+    CHECK_RELERR(x*x*2, icdf_log_logistic(p, 2, .5));
+
+    if (np < .9) {
+      CHECK_RELERR(x, isf_log_logistic(np, 1, 1));
+      CHECK_RELERR(x/2, isf_log_logistic(np, .5, 1));
+      CHECK_RELERR(x*2, isf_log_logistic(np, 2, 1));
+      CHECK_RELERR(sqrt(x), isf_log_logistic(np, 1, 2));
+      CHECK_RELERR(sqrt(x)/2, isf_log_logistic(np, .5, 2));
+      CHECK_RELERR(sqrt(x)*2, isf_log_logistic(np, 2, 2));
+      CHECK_RELERR(x*x, isf_log_logistic(np, 1, .5));
+      CHECK_RELERR(x*x/2, isf_log_logistic(np, .5, .5));
+      CHECK_RELERR(x*x*2, isf_log_logistic(np, 2, .5));
+
+      CHECK_RELERR(1/x, icdf_log_logistic(np, 1, 1));
+      CHECK_RELERR(1/(2*x), icdf_log_logistic(np, .5, 1));
+      CHECK_RELERR(2/x, icdf_log_logistic(np, 2, 1));
+      CHECK_RELERR(1/sqrt(x), icdf_log_logistic(np, 1, 2));
+      CHECK_RELERR(1/(2*sqrt(x)),
+          icdf_log_logistic(np, .5, 2));
+      CHECK_RELERR(2/sqrt(x), icdf_log_logistic(np, 2, 2));
+      CHECK_RELERR(1/(x*x), icdf_log_logistic(np, 1, .5));
+      CHECK_RELERR(1/(2*x*x), icdf_log_logistic(np, .5, .5));
+      CHECK_RELERR(2/(x*x), icdf_log_logistic(np, 2, .5));
+    }
+
+    CHECK_RELERR(1/x, isf_log_logistic(p, 1, 1));
+    CHECK_RELERR(1/(2*x), isf_log_logistic(p, .5, 1));
+    CHECK_RELERR(2/x, isf_log_logistic(p, 2, 1));
+    CHECK_RELERR(1/sqrt(x), isf_log_logistic(p, 1, 2));
+    CHECK_RELERR(1/(2*sqrt(x)), isf_log_logistic(p, .5, 2));
+    CHECK_RELERR(2/sqrt(x), isf_log_logistic(p, 2, 2));
+    CHECK_RELERR(1/(x*x), isf_log_logistic(p, 1, .5));
+    CHECK_RELERR(1/(2*x*x), isf_log_logistic(p, .5, .5));
+    CHECK_RELERR(2/(x*x), isf_log_logistic(p, 2, .5));
+  }
+
+  for (i = 0; i <= 100; i++) {
+    double p0 = (double)i/100;
+
+    CHECK_RELERR(0.5*p0/(1 - 0.5*p0), sample_log_logistic(0, p0));
+    CHECK_RELERR((1 - 0.5*p0)/(0.5*p0),
+        sample_log_logistic(1, p0));
+  }
+
+  if (!ok)
+    printf("fail log logistic cdf/sf\n");
+
+  tt_assert(ok);
+
+ done:
+  ;
+}
+
+/**
+ * Test the cdf, sf, icdf, isf of the Weibull distribution.
+ */
+static void
+test_weibull(void *arg)
+{
+  (void) arg;
+
+  static const struct {
+    /* x is a point in the support of the Weibull distribution */
+    double x;
+    /* 'p' is the probability that a random variable X for a given Weibull
+     * probability ditribution will take value less-or-equal to x */
+    double p;
+    /* 'np' is the probability that a random variable X for a given Weibull
+     * probability distribution will take value greater-or-equal to x. */
+    double np;
+  } cases[] = {
+    { 0, 0, 1 },
+    { 1e-300, 1e-300, 1 },
+    { 1e-17, 1e-17, 1 },
+    { .1, .09516258196404043, .9048374180359595 },
+    { .5, .3934693402873666, .6065306597126334 },
+    { .6931471805599453, .5, .5 },
+    { 1, .6321205588285577, .36787944117144233 },
+    { 10, .9999546000702375, 4.5399929762484854e-5 },
+    { 36, .9999999999999998, 2.319522830243569e-16 },
+    { 37, .9999999999999999, 8.533047625744066e-17 },
+    { 38, 1, 3.1391327920480296e-17 },
+    { 100, 1, 3.720075976020836e-44 },
+    { 708, 1, 3.307553003638408e-308 },
+    { 710, 1, 4.47628622567513e-309 },
+    { 1000, 1, 0 },
+    { HUGE_VAL, 1, 0 },
+  };
+  double relerr_bound = 3e-15;
+  size_t i;
+  bool ok = true;
+
+  for (i = 0; i < arraycount(cases); i++) {
+    double x = cases[i].x;
+    double p = cases[i].p;
+    double np = cases[i].np;
+
+    CHECK_RELERR(p, cdf_weibull(x, 1, 1));
+    CHECK_RELERR(p, cdf_weibull(x/2, .5, 1));
+    CHECK_RELERR(p, cdf_weibull(x*2, 2, 1));
+    /* For 0 < x < sqrt(DBL_MIN), x^2 loses lots of bits.  */
+    if (x <= 0 ||
+        sqrt(DBL_MIN) <= x) {
+      CHECK_RELERR(p, cdf_weibull(x*x, 1, .5));
+      CHECK_RELERR(p, cdf_weibull(x*x/2, .5, .5));
+      CHECK_RELERR(p, cdf_weibull(x*x*2, 2, .5));
+    }
+    CHECK_RELERR(p, cdf_weibull(sqrt(x), 1, 2));
+    CHECK_RELERR(p, cdf_weibull(sqrt(x)/2, .5, 2));
+    CHECK_RELERR(p, cdf_weibull(sqrt(x)*2, 2, 2));
+    CHECK_RELERR(np, sf_weibull(x, 1, 1));
+    CHECK_RELERR(np, sf_weibull(x/2, .5, 1));
+    CHECK_RELERR(np, sf_weibull(x*2, 2, 1));
+    CHECK_RELERR(np, sf_weibull(x*x, 1, .5));
+    CHECK_RELERR(np, sf_weibull(x*x/2, .5, .5));
+    CHECK_RELERR(np, sf_weibull(x*x*2, 2, .5));
+    if (x >= 10) {
+      /*
+       * exp amplifies the error of sqrt(x)^2
+       * proportionally to exp(x); for large inputs
+       * this is significant.
+       */
+      double t = -expm1(-x*(2*DBL_EPSILON + DBL_EPSILON));
+      relerr_bound = t + DBL_EPSILON + t*DBL_EPSILON;
+      if (relerr_bound < 3e-15)
+        /*
+         * The tests are written only to 16
+         * decimal places anyway even if your
+         * `double' is, say, i387 binary80, for
+         * whatever reason.
+         */
+        relerr_bound = 3e-15;
+      CHECK_RELERR(np, sf_weibull(sqrt(x), 1, 2));
+      CHECK_RELERR(np, sf_weibull(sqrt(x)/2, .5, 2));
+      CHECK_RELERR(np, sf_weibull(sqrt(x)*2, 2, 2));
+    }
+
+    if (p <= 0.75) {
+      /*
+       * For p near 1, not enough precision near 1 to
+       * recover x.
+       */
+      CHECK_RELERR(x, icdf_weibull(p, 1, 1));
+      CHECK_RELERR(x/2, icdf_weibull(p, .5, 1));
+      CHECK_RELERR(x*2, icdf_weibull(p, 2, 1));
+    }
+    if (p >= 0.25 && !tor_isinf(x) && np > 0) {
+      /*
+       * For p near 0, not enough precision in np
+       * near 1 to recover x.  For 0, isf gives inf,
+       * even if p is precise enough for the icdf to
+       * work.
+       */
+      CHECK_RELERR(x, isf_weibull(np, 1, 1));
+      CHECK_RELERR(x/2, isf_weibull(np, .5, 1));
+      CHECK_RELERR(x*2, isf_weibull(np, 2, 1));
+    }
+  }
+
+  for (i = 0; i <= 100; i++) {
+    double p0 = (double)i/100;
+
+    CHECK_RELERR(3*sqrt(-log(p0/2)), sample_weibull(0, p0, 3, 2));
+    CHECK_RELERR(3*sqrt(-log1p(-p0/2)),
+        sample_weibull(1, p0, 3, 2));
+  }
+
+  if (!ok)
+    printf("fail Weibull cdf/sf\n");
+
+  tt_assert(ok);
+
+ done:
+  ;
+}
+
+/**
+ * Test the cdf, sf, icdf, and isf of the generalized Pareto
+ * distribution.
+ */
+static void
+test_genpareto(void *arg)
+{
+  (void) arg;
+
+  struct {
+    /* xi is the 'xi' parameter of the generalized Pareto distribution, and the
+     * rest are the same as in the above tests */
+    double xi, x, p, np;
+  } cases[] = {
+    { 0, 0, 0, 1 },
+    { 1e-300, .004, 3.992010656008528e-3, .9960079893439915 },
+    { 1e-300, .1, .09516258196404043, .9048374180359595 },
+    { 1e-300, 1, .6321205588285577, .36787944117144233 },
+    { 1e-300, 10, .9999546000702375, 4.5399929762484854e-5 },
+    { 1e-200, 1e-16, 9.999999999999999e-17, .9999999999999999 },
+    { 1e-16, 1e-200, 9.999999999999998e-201, 1 },
+    { 1e-16, 1e-16, 1e-16, 1 },
+    { 1e-16, .004, 3.992010656008528e-3, .9960079893439915 },
+    { 1e-16, .1, .09516258196404043, .9048374180359595 },
+    { 1e-16, 1, .6321205588285577, .36787944117144233 },
+    { 1e-16, 10, .9999546000702375, 4.539992976248509e-5 },
+    { 1e-10, 1e-6, 9.999995000001667e-7, .9999990000005 },
+    { 1e-8, 1e-8, 9.999999950000001e-9, .9999999900000001 },
+    { 1, 1e-300, 1e-300, 1 },
+    { 1, 1e-16, 1e-16, .9999999999999999 },
+    { 1, .1, .09090909090909091, .9090909090909091 },
+    { 1, 1, .5, .5 },
+    { 1, 10, .9090909090909091, .0909090909090909 },
+    { 1, 100, .9900990099009901, .0099009900990099 },
+    { 1, 1000, .999000999000999, 9.990009990009992e-4 },
+    { 10, 1e-300, 1e-300, 1 },
+    { 10, 1e-16, 9.999999999999995e-17, .9999999999999999 },
+    { 10, .1, .06696700846319258, .9330329915368074 },
+    { 10, 1, .21320655780322778, .7867934421967723 },
+    { 10, 10, .3696701667040189, .6303298332959811 },
+    { 10, 100, .49886285755007337, .5011371424499267 },
+    { 10, 1000, .6018968102992647, .3981031897007353 },
+  };
+  double xi_array[] = { -1.5, -1, -1e-30, 0, 1e-30, 1, 1.5 };
+  size_t i, j;
+  double relerr_bound = 3e-15;
+  bool ok = true;
+
+  for (i = 0; i < arraycount(cases); i++) {
+    double xi = cases[i].xi;
+    double x = cases[i].x;
+    double p = cases[i].p;
+    double np = cases[i].np;
+
+    CHECK_RELERR(p, cdf_genpareto(x, 0, 1, xi));
+    CHECK_RELERR(p, cdf_genpareto(x*2, 0, 2, xi));
+    CHECK_RELERR(p, cdf_genpareto(x/2, 0, .5, xi));
+    CHECK_RELERR(np, sf_genpareto(x, 0, 1, xi));
+    CHECK_RELERR(np, sf_genpareto(x*2, 0, 2, xi));
+    CHECK_RELERR(np, sf_genpareto(x/2, 0, .5, xi));
+
+    if (p < .5) {
+      CHECK_RELERR(x, icdf_genpareto(p, 0, 1, xi));
+      CHECK_RELERR(x*2, icdf_genpareto(p, 0, 2, xi));
+      CHECK_RELERR(x/2, icdf_genpareto(p, 0, .5, xi));
+    }
+    if (np < .5) {
+      CHECK_RELERR(x, isf_genpareto(np, 0, 1, xi));
+      CHECK_RELERR(x*2, isf_genpareto(np, 0, 2, xi));
+      CHECK_RELERR(x/2, isf_genpareto(np, 0, .5, xi));
+    }
+  }
+
+  for (i = 0; i < arraycount(xi_array); i++) {
+    for (j = 0; j <= 100; j++) {
+      double p0 = (j == 0 ? 2*DBL_MIN : (double)j/100);
+
+      /* This is actually a check against 0, but we do <= so that the compiler
+         does not raise a -Wfloat-equal */
+      if (fabs(xi_array[i]) <= 0) {
+        /*
+         * When xi == 0, the generalized Pareto
+         * distribution reduces to an
+         * exponential distribution.
+         */
+        CHECK_RELERR(-log(p0/2),
+            sample_genpareto(0, p0, 0));
+        CHECK_RELERR(-log1p(-p0/2),
+            sample_genpareto(1, p0, 0));
+      } else {
+        CHECK_RELERR(expm1(-xi_array[i]*log(p0/2))/xi_array[i],
+            sample_genpareto(0, p0, xi_array[i]));
+        CHECK_RELERR((j == 0 ? DBL_MIN :
+                expm1(-xi_array[i]*log1p(-p0/2))/xi_array[i]),
+            sample_genpareto(1, p0, xi_array[i]));
+      }
+
+      CHECK_RELERR(isf_genpareto(p0/2, 0, 1, xi_array[i]),
+          sample_genpareto(0, p0, xi_array[i]));
+      CHECK_RELERR(icdf_genpareto(p0/2, 0, 1, xi_array[i]),
+          sample_genpareto(1, p0, xi_array[i]));
+    }
+  }
+
+  tt_assert(ok);
+
+ done:
+  ;
+}
+
+/**
+ * Test the deterministic sampler for uniform distribution on [a, b].
+ *
+ * This currently only tests whether the outcome lies within [a, b].
+ */
+static void
+test_uniform_interval(void *arg)
+{
+  (void) arg;
+  struct {
+    /* Sample from a uniform distribution with parameters 'a' and 'b', using
+     * 't' as the sampling index. */
+    double t, a, b;
+  } cases[] = {
+    { 0, 0, 0 },
+    { 0, 0, 1 },
+    { 0, 1.0000000000000007, 3.999999999999995 },
+    { 0, 4000, 4000 },
+    { 0.42475836677491291, 4000, 4000 },
+    { 0, -DBL_MAX, DBL_MAX },
+    { 0.25, -DBL_MAX, DBL_MAX },
+    { 0.5, -DBL_MAX, DBL_MAX },
+  };
+  size_t i = 0;
+  bool ok = true;
+
+  for (i = 0; i < arraycount(cases); i++) {
+    double t = cases[i].t;
+    double a = cases[i].a;
+    double b = cases[i].b;
+
+    CHECK_LE(a, sample_uniform_interval(t, a, b));
+    CHECK_LE(sample_uniform_interval(t, a, b), b);
+
+    CHECK_LE(a, sample_uniform_interval(1 - t, a, b));
+    CHECK_LE(sample_uniform_interval(1 - t, a, b), b);
+
+    CHECK_LE(sample_uniform_interval(t, -b, -a), -a);
+    CHECK_LE(-b, sample_uniform_interval(t, -b, -a));
+
+    CHECK_LE(sample_uniform_interval(1 - t, -b, -a), -a);
+    CHECK_LE(-b, sample_uniform_interval(1 - t, -b, -a));
+  }
+
+  tt_assert(ok);
+
+ done:
+  ;
+}
+
+/********************** Stochastic tests ****************************/
+
+/*
+ * Psi test, sometimes also called G-test.  The psi test statistic,
+ * suitably scaled, has chi^2 distribution, but the psi test tends to
+ * have better statistical power in practice to detect deviations than
+ * the chi^2 test does.  (The chi^2 test statistic is the first term of
+ * the Taylor expansion of the psi test statistic.)  The psi test is
+ * generic, for any CDF; particular distributions might have higher-
+ * power tests to distinguish them from predictable deviations or bugs.
+ *
+ * We choose the psi critical value so that a single psi test has
+ * probability below alpha = 1% of spuriously failing even if all the
+ * code is correct.  But the false positive rate for a suite of n tests
+ * is higher: 1 - Binom(0; n, alpha) = 1 - (1 - alpha)^n.  For n = 10,
+ * this is about 10%, and for n = 100 it is well over 50%.
+ *
+ * We can drive it down by running each test twice, and accepting it if
+ * it passes at least once; in that case, it is as if we used Binom(2;
+ * 2, alpha) = alpha^2 as the false positive rate for each test, and
+ * for n = 10 tests, it would be 0.1%, and for n = 100 tests, still
+ * only 1%.
+ *
+ * The critical value for a chi^2 distribution with 100 degrees of
+ * freedom and false positive rate alpha = 1% was taken from:
+ *
+ *  NIST/SEMATECH e-Handbook of Statistical Methods, Section
+ *  1.3.6.7.4 `Critical Values of the Chi-Square Distribution',
+ *  <http://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm>,
+ *  retrieved 2018-10-28.
+ */
+
+static const size_t NSAMPLES = 100000;
+/* Number of chances we give to the test to succeed. */
+static const unsigned NTRIALS = 2;
+/* Number of times we want the test to pass per NTRIALS. */
+static const unsigned NPASSES_MIN = 1;
+
+#define PSI_DF 100                          /* degrees of freedom */
+static const double PSI_CRITICAL = 135.807; /* critical value, alpha = .01 */
+
+/**
+ * Perform a psi test on an array of sample counts, C, adding up to N
+ * samples, and an array of log expected probabilities, logP,
+ * representing the null hypothesis for the distribution of samples
+ * counted.  Return false if the psi test rejects the null hypothesis,
+ * true if otherwise.
+ */
+static bool
+psi_test(const size_t C[PSI_DF], const double logP[PSI_DF], size_t N)
+{
+  double psi = 0;
+  double c = 0;                 /* Kahan compensation */
+  double t, u;
+  size_t i;
+
+  for (i = 0; i < PSI_DF; i++) {
+    /*
+     * c*log(c/(n*p)) = (1/n) * f*log(f/p) where f = c/n is
+     * the frequency, and f*log(f/p) ---> 0 as f ---> 0, so
+     * this is a reasonable choice.  Further, any mass that
+     * _fails_ to turn up in this bin will inflate another
+     * bin instead, so we don't really lose anything by
+     * ignoring empty bins even if they have high
+     * probability.
+     */
+    if (C[i] == 0)
+      continue;
+    t = C[i]*(log((double)C[i]/N) - logP[i]) - c;
+    u = psi + t;
+    c = (u - psi) - t;
+    psi = u;
+  }
+  psi *= 2;
+
+  return psi <= PSI_CRITICAL;
+}
+
+static bool
+test_stochastic_geometric_impl(double p)
+{
+  const struct geometric geometric = {
+    .base = GEOMETRIC(geometric),
+    .p = p,
+  };
+  double logP[PSI_DF] = {0};
+  unsigned ntry = NTRIALS, npass = 0;
+  unsigned i;
+  size_t j;
+
+  /* Compute logP[i] = Geom(i + 1; p).  */
+  for (i = 0; i < PSI_DF - 1; i++)
+    logP[i] = logpmf_geometric(i + 1, p);
+
+  /* Compute logP[n-1] = log (1 - (P[0] + P[1] + ... + P[n-2])).  */
+  logP[PSI_DF - 1] = log1mexp(logsumexp(logP, PSI_DF - 1));
+
+  while (ntry --> 0) {
+    size_t C[PSI_DF] = {0};
+
+    for (j = 0; j < NSAMPLES; j++) {
+      double n_tmp = dist_sample(&geometric.base);
+
+      /* Must be an integer.  (XXX -Wfloat-equal)  */
+      tor_assert(ceil(n_tmp) <= n_tmp && ceil(n_tmp) >= n_tmp);
+
+      /* Must be a positive integer.  */
+      tor_assert(n_tmp >= 1);
+
+      /* Probability of getting a value in the billions is negligible.  */
+      tor_assert(n_tmp <= (double)UINT_MAX);
+
+      unsigned n = (unsigned) n_tmp;
+
+      if (n > PSI_DF)
+        n = PSI_DF;
+      C[n - 1]++;
+    }
+
+    if (psi_test(C, logP, NSAMPLES)) {
+      if (++npass >= NPASSES_MIN)
+        break;
+    }
+  }
+
+  if (npass >= NPASSES_MIN) {
+    /* printf("pass %s sampler\n", "geometric"); */
+    return true;
+  } else {
+    printf("fail %s sampler\n", "geometric");
+    return false;
+  }
+}
+
+/**
+ * Divide the support of <b>dist</b> into histogram bins in <b>logP</b>. Start
+ * at the 1st percentile and ending at the 99th percentile. Pick the bin
+ * boundaries using linear interpolation so that they are uniformly spaced.
+ *
+ * In each bin logP[i] we insert the expected log-probability that a sampled
+ * value will fall into that bin. We will use this as the null hypothesis of
+ * the psi test.
+ *
+ * Set logP[i] = log(CDF(x_i) - CDF(x_{i-1})), where x_-1 = -inf, x_n =
+ * +inf, and x_i = i*(hi - lo)/(n - 2).
+ */
+static void
+bin_cdfs(const struct dist *dist, double lo, double hi, double *logP, size_t n)
+{
+#define CDF(x)  dist_cdf(dist, x)
+#define SF(x)   dist_sf(dist, x)
+  const double w = (hi - lo)/(n - 2);
+  double halfway = dist_icdf(dist, 0.5);
+  double x_0, x_1;
+  size_t i;
+  size_t n2 = ceil_to_size_t((halfway - lo)/w);
+
+  tor_assert(lo <= halfway);
+  tor_assert(halfway <= hi);
+  tor_assert(n2 <= n);
+
+  x_1 = lo;
+  logP[0] = log(CDF(x_1) - 0); /* 0 = CDF(-inf) */
+  for (i = 1; i < n2; i++) {
+    x_0 = x_1;
+    /* do the linear interpolation */
+    x_1 = (i <= n/2 ? lo + i*w : hi - (n - 2 - i)*w);
+    /* set the expected log-probability */
+    logP[i] = log(CDF(x_1) - CDF(x_0));
+  }
+  x_0 = hi;
+  logP[n - 1] = log(SF(x_0) - 0); /* 0 = SF(+inf) = 1 - CDF(+inf) */
+
+  /* In this loop we are filling out the high part of the array. We are using
+   * SF because in these cases the CDF is near 1 where precision is lower. So
+   * instead we are using SF near 0 where the precision is higher. We have
+   * SF(t) = 1 - CDF(t).  */
+  for (i = 1; i < n - n2; i++) {
+    x_1 = x_0;
+    /* do the linear interpolation */
+    x_0 = (i <= n/2 ? hi - i*w : lo + (n - 2 - i)*w);
+    /* set the expected log-probability */
+    logP[n - i - 1] = log(SF(x_0) - SF(x_1));
+  }
+#undef SF
+#undef CDF
+}
+
+/**
+ * Draw NSAMPLES samples from dist, counting the number of samples x in
+ * the ith bin C[i] if x_{i-1} <= x < x_i, where x_-1 = -inf, x_n =
+ * +inf, and x_i = i*(hi - lo)/(n - 2).
+ */
+static void
+bin_samples(const struct dist *dist, double lo, double hi, size_t *C, size_t n)
+{
+  const double w = (hi - lo)/(n - 2);
+  size_t i;
+
+  for (i = 0; i < NSAMPLES; i++) {
+    double x = dist_sample(dist);
+    size_t bin;
+
+    if (x < lo)
+      bin = 0;
+    else if (x < hi)
+      bin = 1 + floor_to_size_t((x - lo)/w);
+    else
+      bin = n - 1;
+    tor_assert(bin < n);
+    C[bin]++;
+  }
+}
+
+/**
+ * Carry out a Psi test on <b>dist</b>.
+ *
+ * Sample NSAMPLES from dist, putting them in bins from -inf to lo to
+ * hi to +inf, and apply up to two psi tests.  True if at least one psi
+ * test passes; false if not.  False positive rate should be bounded by
+ * 0.01^2 = 0.0001.
+ */
+static bool
+test_psi_dist_sample(const struct dist *dist)
+{
+  double logP[PSI_DF] = {0};
+  unsigned ntry = NTRIALS, npass = 0;
+  double lo = dist_icdf(dist, 1/(double)(PSI_DF + 2));
+  double hi = dist_isf(dist, 1/(double)(PSI_DF + 2));
+
+  /* Create the null hypothesis in logP */
+  bin_cdfs(dist, lo, hi, logP, PSI_DF);
+
+  /* Now run the test */
+  while (ntry --> 0) {
+    size_t C[PSI_DF] = {0};
+    bin_samples(dist, lo, hi, C, PSI_DF);
+    if (psi_test(C, logP, NSAMPLES)) {
+      if (++npass >= NPASSES_MIN)
+        break;
+    }
+  }
+
+  /* Did we fail or succeed? */
+  if (npass >= NPASSES_MIN) {
+    /* printf("pass %s sampler\n", dist_name(dist));*/
+    return true;
+  } else {
+    printf("fail %s sampler\n", dist_name(dist));
+    return false;
+  }
+}
+
+/* This is the seed of the deterministic randomness */
+static uint32_t deterministic_rand_counter;
+
+/** Initialize the seed of the deterministic randomness. */
+static void
+init_deterministic_rand(void)
+{
+  deterministic_rand_counter = crypto_rand_u32();
+}
+
+/** Produce deterministic randomness for the stochastic tests using the global
+ *  deterministic_rand_counter seed
+ *
+ *  This function produces deterministic data over multiple calls iff it's
+ *  called in the same call order with the same 'n' parameter (which is the
+ *  case for the psi test). If not, outputs will deviate. */
+static void
+crypto_rand_deterministic(char *out, size_t n)
+{
+  /* Use a XOF to squeeze bytes out of that silly counter */
+  crypto_xof_t *xof = crypto_xof_new();
+  tor_assert(xof);
+  crypto_xof_add_bytes(xof, (uint8_t*)&deterministic_rand_counter,
+                       sizeof(deterministic_rand_counter));
+  crypto_xof_squeeze_bytes(xof, (uint8_t*)out, n);
+  crypto_xof_free(xof);
+
+  /* Increase counter for next run */
+  deterministic_rand_counter++;
+}
+
+static void
+test_stochastic_uniform(void *arg)
+{
+  (void) arg;
+
+  const struct uniform uniform01 = {
+    .base = UNIFORM(uniform01),
+    .a = 0,
+    .b = 1,
+  };
+  const struct uniform uniform_pos = {
+    .base = UNIFORM(uniform_pos),
+    .a = 1.23,
+    .b = 4.56,
+  };
+  const struct uniform uniform_neg = {
+    .base = UNIFORM(uniform_neg),
+    .a = -10,
+    .b = -1,
+  };
+  const struct uniform uniform_cross = {
+    .base = UNIFORM(uniform_cross),
+    .a = -1.23,
+    .b = 4.56,
+  };
+  const struct uniform uniform_subnormal = {
+    .base = UNIFORM(uniform_subnormal),
+    .a = 4e-324,
+    .b = 4e-310,
+  };
+  const struct uniform uniform_subnormal_cross = {
+    .base = UNIFORM(uniform_subnormal_cross),
+    .a = -4e-324,
+    .b = 4e-310,
+  };
+  bool ok = true;
+
+  init_deterministic_rand();
+  MOCK(crypto_rand, crypto_rand_deterministic);
+
+  ok &= test_psi_dist_sample(&uniform01.base);
+  ok &= test_psi_dist_sample(&uniform_pos.base);
+  ok &= test_psi_dist_sample(&uniform_neg.base);
+  ok &= test_psi_dist_sample(&uniform_cross.base);
+  ok &= test_psi_dist_sample(&uniform_subnormal.base);
+  ok &= test_psi_dist_sample(&uniform_subnormal_cross.base);
+
+  tt_assert(ok);
+
+ done:
+    ;
+}
+
+static bool
+test_stochastic_logistic_impl(double mu, double sigma)
+{
+  const struct logistic dist = {
+    .base = LOGISTIC(dist),
+    .mu = mu,
+    .sigma = sigma,
+  };
+
+  /* XXX Consider some fancier logistic test.  */
+  return test_psi_dist_sample(&dist.base);
+}
+
+static bool
+test_stochastic_log_logistic_impl(double alpha, double beta)
+{
+  const struct log_logistic dist = {
+    .base = LOG_LOGISTIC(dist),
+    .alpha = alpha,
+    .beta = beta,
+  };
+
+  /* XXX Consider some fancier log logistic test.  */
+  return test_psi_dist_sample(&dist.base);
+}
+
+static bool
+test_stochastic_weibull_impl(double lambda, double k)
+{
+  const struct weibull dist = {
+    .base = WEIBULL(dist),
+    .lambda = lambda,
+    .k = k,
+  };
+
+/*
+ * XXX Consider applying a Tiku-Singh test:
+ *
+ *    M.L. Tiku and M. Singh, `Testing the two-parameter
+ *    Weibull distribution', Communications in Statistics --
+ *    Theory and Methods A10(9), 1981, 907--918.
+ *https://www.tandfonline.com/doi/pdf/10.1080/03610928108828082?needAccess=true
+ */
+  return test_psi_dist_sample(&dist.base);
+}
+
+static bool
+test_stochastic_genpareto_impl(double mu, double sigma, double xi)
+{
+  const struct genpareto dist = {
+    .base = GENPARETO(dist),
+    .mu = mu,
+    .sigma = sigma,
+    .xi = xi,
+  };
+
+  /* XXX Consider some fancier GPD test.  */
+  return test_psi_dist_sample(&dist.base);
+}
+
+static void
+test_stochastic_genpareto(void *arg)
+{
+  bool ok = 0;
+  bool tests_failed = true;
+  (void) arg;
+
+  init_deterministic_rand();
+  MOCK(crypto_rand, crypto_rand_deterministic);
+
+  ok = test_stochastic_genpareto_impl(0, 1, -0.25);
+  tt_assert(ok);
+  ok = test_stochastic_genpareto_impl(0, 1, -1e-30);
+  tt_assert(ok);
+  ok = test_stochastic_genpareto_impl(0, 1, 0);
+  tt_assert(ok);
+  ok = test_stochastic_genpareto_impl(0, 1, 1e-30);
+  tt_assert(ok);
+  ok = test_stochastic_genpareto_impl(0, 1, 0.25);
+  tt_assert(ok);
+  ok = test_stochastic_genpareto_impl(-1, 1, -0.25);
+  tt_assert(ok);
+  ok = test_stochastic_genpareto_impl(1, 2, 0.25);
+  tt_assert(ok);
+
+  tests_failed = false;
+
+ done:
+  if (tests_failed) {
+    printf("seed: %"PRIu32, deterministic_rand_counter);
+  }
+  UNMOCK(crypto_rand);
+}
+
+static void
+test_stochastic_geometric(void *arg)
+{
+  bool ok = 0;
+  bool tests_failed = true;
+
+  (void) arg;
+
+  init_deterministic_rand();
+  MOCK(crypto_rand, crypto_rand_deterministic);
+
+  ok = test_stochastic_geometric_impl(0.1);
+  tt_assert(ok);
+  ok = test_stochastic_geometric_impl(0.5);
+  tt_assert(ok);
+  ok = test_stochastic_geometric_impl(0.9);
+  tt_assert(ok);
+  ok = test_stochastic_geometric_impl(1);
+  tt_assert(ok);
+
+  tests_failed = false;
+
+ done:
+  if (tests_failed) {
+    printf("seed: %"PRIu32, deterministic_rand_counter);
+  }
+  UNMOCK(crypto_rand);
+}
+
+static void
+test_stochastic_logistic(void *arg)
+{
+  bool ok = 0;
+  bool tests_failed = true;
+  (void) arg;
+
+  init_deterministic_rand();
+  MOCK(crypto_rand, crypto_rand_deterministic);
+
+  ok = test_stochastic_logistic_impl(0, 1);
+  tt_assert(ok);
+  ok = test_stochastic_logistic_impl(0, 1e-16);
+  tt_assert(ok);
+  ok = test_stochastic_logistic_impl(1, 10);
+  tt_assert(ok);
+  ok = test_stochastic_logistic_impl(-10, 100);
+  tt_assert(ok);
+
+  tests_failed = false;
+
+ done:
+  if (tests_failed) {
+    printf("seed: %"PRIu32, deterministic_rand_counter);
+  }
+  UNMOCK(crypto_rand);
+}
+
+static void
+test_stochastic_log_logistic(void *arg)
+{
+  bool ok = 0;
+  bool tests_failed = true;
+  (void) arg;
+
+  init_deterministic_rand();
+  MOCK(crypto_rand, crypto_rand_deterministic);
+
+  ok = test_stochastic_log_logistic_impl(1, 1);
+  tt_assert(ok);
+  ok = test_stochastic_log_logistic_impl(1, 10);
+  tt_assert(ok);
+  ok = test_stochastic_log_logistic_impl(M_E, 1e-1);
+  tt_assert(ok);
+  ok = test_stochastic_log_logistic_impl(exp(-10), 1e-2);
+  tt_assert(ok);
+
+  tests_failed = false;
+
+ done:
+  if (tests_failed) {
+    printf("seed: %"PRIu32, deterministic_rand_counter);
+  }
+  UNMOCK(crypto_rand);
+}
+
+static void
+test_stochastic_weibull(void *arg)
+{
+  bool ok = 0;
+  bool tests_failed = true;
+  (void) arg;
+
+  init_deterministic_rand();
+  MOCK(crypto_rand, crypto_rand_deterministic);
+
+  ok = test_stochastic_weibull_impl(1, 0.5);
+  tt_assert(ok);
+  ok = test_stochastic_weibull_impl(1, 1);
+  tt_assert(ok);
+  ok = test_stochastic_weibull_impl(1, 1.5);
+  tt_assert(ok);
+  ok = test_stochastic_weibull_impl(1, 2);
+  tt_assert(ok);
+  ok = test_stochastic_weibull_impl(10, 1);
+  tt_assert(ok);
+
+  tests_failed = false;
+
+ done:
+  if (tests_failed) {
+    printf("seed: %"PRIu32, deterministic_rand_counter);
+  }
+  UNMOCK(crypto_rand);
+}
+
+struct testcase_t prob_distr_tests[] = {
+  { "logit_logistics", test_logit_logistic, TT_FORK, NULL, NULL },
+  { "log_logistic", test_log_logistic, TT_FORK, NULL, NULL },
+  { "weibull", test_weibull, TT_FORK, NULL, NULL },
+  { "genpareto", test_genpareto, TT_FORK, NULL, NULL },
+  { "uniform_interval", test_uniform_interval, TT_FORK, NULL, NULL },
+  END_OF_TESTCASES
+};
+
+struct testcase_t slow_stochastic_prob_distr_tests[] = {
+  { "stochastic_genpareto", test_stochastic_genpareto, TT_FORK, NULL, NULL },
+  { "stochastic_geometric", test_stochastic_geometric, TT_FORK, NULL, NULL },
+  { "stochastic_uniform", test_stochastic_uniform, TT_FORK, NULL, NULL },
+  { "stochastic_logistic", test_stochastic_logistic, TT_FORK, NULL, NULL },
+  { "stochastic_log_logistic", test_stochastic_log_logistic, TT_FORK, NULL,
+    NULL },
+  { "stochastic_weibull", test_stochastic_weibull, TT_FORK, NULL, NULL },
+  END_OF_TESTCASES
+};

+ 1 - 0
src/test/test_slow.c

@@ -21,6 +21,7 @@
 struct testgroup_t testgroups[] = {
 struct testgroup_t testgroups[] = {
   { "slow/crypto/", slow_crypto_tests },
   { "slow/crypto/", slow_crypto_tests },
   { "slow/process/", slow_process_tests },
   { "slow/process/", slow_process_tests },
+  { "slow/prob_distr/", slow_stochastic_prob_distr_tests },
   END_OF_GROUPS
   END_OF_GROUPS
 };
 };
 
 

+ 24 - 1
src/test/test_util.c

@@ -19,6 +19,7 @@
 #include "feature/client/transports.h"
 #include "feature/client/transports.h"
 #include "lib/crypt_ops/crypto_format.h"
 #include "lib/crypt_ops/crypto_format.h"
 #include "lib/crypt_ops/crypto_rand.h"
 #include "lib/crypt_ops/crypto_rand.h"
+#include "lib/defs/time.h"
 #include "test/test.h"
 #include "test/test.h"
 #include "lib/memarea/memarea.h"
 #include "lib/memarea/memarea.h"
 #include "lib/process/waitpid.h"
 #include "lib/process/waitpid.h"
@@ -69,6 +70,28 @@
 #define INFINITY_DBL ((double)INFINITY)
 #define INFINITY_DBL ((double)INFINITY)
 #define NAN_DBL ((double)NAN)
 #define NAN_DBL ((double)NAN)
 
 
+/** Test the tor_isinf() wrapper */
+static void
+test_tor_isinf(void *arg)
+{
+  (void) arg;
+
+  tt_assert(tor_isinf(INFINITY_DBL));
+
+  tt_assert(!tor_isinf(NAN_DBL));
+  tt_assert(!tor_isinf(DBL_EPSILON));
+  tt_assert(!tor_isinf(DBL_MAX));
+  tt_assert(!tor_isinf(DBL_MIN));
+
+  tt_assert(!tor_isinf(0.0));
+  tt_assert(!tor_isinf(0.1));
+  tt_assert(!tor_isinf(3));
+  tt_assert(!tor_isinf(3.14));
+
+ done:
+  ;
+}
+
 /* XXXX this is a minimal wrapper to make the unit tests compile with the
 /* XXXX this is a minimal wrapper to make the unit tests compile with the
  * changed tor_timegm interface. */
  * changed tor_timegm interface. */
 static time_t
 static time_t
@@ -404,7 +427,6 @@ test_util_time(void *arg)
 
 
 /* Assume tv_usec is an unsigned integer until proven otherwise */
 /* Assume tv_usec is an unsigned integer until proven otherwise */
 #define TV_USEC_MAX UINT_MAX
 #define TV_USEC_MAX UINT_MAX
-#define TOR_USEC_PER_SEC 1000000
 
 
   /* Overflows in the result type */
   /* Overflows in the result type */
 
 
@@ -6182,6 +6204,7 @@ struct testcase_t util_tests[] = {
   UTIL_TEST(mathlog, 0),
   UTIL_TEST(mathlog, 0),
   UTIL_TEST(fraction, 0),
   UTIL_TEST(fraction, 0),
   UTIL_TEST(weak_random, 0),
   UTIL_TEST(weak_random, 0),
+  { "tor_isinf", test_tor_isinf, TT_FORK, NULL, NULL },
   { "socket_ipv4", test_util_socket, TT_FORK, &passthrough_setup,
   { "socket_ipv4", test_util_socket, TT_FORK, &passthrough_setup,
     (void*)"4" },
     (void*)"4" },
   { "socket_ipv6", test_util_socket, TT_FORK,
   { "socket_ipv6", test_util_socket, TT_FORK,

+ 549 - 0
src/trunnel/circpad_negotiation.c

@@ -0,0 +1,549 @@
+/* circpad_negotiation.c -- generated by Trunnel v1.5.2.
+ * https://gitweb.torproject.org/trunnel.git
+ * You probably shouldn't edit this file.
+ */
+#include <stdlib.h>
+#include "trunnel-impl.h"
+
+#include "circpad_negotiation.h"
+
+#define TRUNNEL_SET_ERROR_CODE(obj) \
+  do {                              \
+    (obj)->trunnel_error_code_ = 1; \
+  } while (0)
+
+#if defined(__COVERITY__) || defined(__clang_analyzer__)
+/* If we're running a static analysis tool, we don't want it to complain
+ * that some of our remaining-bytes checks are dead-code. */
+int circpadnegotiation_deadcode_dummy__ = 0;
+#define OR_DEADCODE_DUMMY || circpadnegotiation_deadcode_dummy__
+#else
+#define OR_DEADCODE_DUMMY
+#endif
+
+#define CHECK_REMAINING(nbytes, label)                           \
+  do {                                                           \
+    if (remaining < (nbytes) OR_DEADCODE_DUMMY) {                \
+      goto label;                                                \
+    }                                                            \
+  } while (0)
+
+circpad_negotiate_t *
+circpad_negotiate_new(void)
+{
+  circpad_negotiate_t *val = trunnel_calloc(1, sizeof(circpad_negotiate_t));
+  if (NULL == val)
+    return NULL;
+  val->command = CIRCPAD_COMMAND_START;
+  return val;
+}
+
+/** Release all storage held inside 'obj', but do not free 'obj'.
+ */
+static void
+circpad_negotiate_clear(circpad_negotiate_t *obj)
+{
+  (void) obj;
+}
+
+void
+circpad_negotiate_free(circpad_negotiate_t *obj)
+{
+  if (obj == NULL)
+    return;
+  circpad_negotiate_clear(obj);
+  trunnel_memwipe(obj, sizeof(circpad_negotiate_t));
+  trunnel_free_(obj);
+}
+
+uint8_t
+circpad_negotiate_get_version(const circpad_negotiate_t *inp)
+{
+  return inp->version;
+}
+int
+circpad_negotiate_set_version(circpad_negotiate_t *inp, uint8_t val)
+{
+  if (! ((val == 0))) {
+     TRUNNEL_SET_ERROR_CODE(inp);
+     return -1;
+  }
+  inp->version = val;
+  return 0;
+}
+uint8_t
+circpad_negotiate_get_command(const circpad_negotiate_t *inp)
+{
+  return inp->command;
+}
+int
+circpad_negotiate_set_command(circpad_negotiate_t *inp, uint8_t val)
+{
+  if (! ((val == CIRCPAD_COMMAND_START || val == CIRCPAD_COMMAND_STOP))) {
+     TRUNNEL_SET_ERROR_CODE(inp);
+     return -1;
+  }
+  inp->command = val;
+  return 0;
+}
+uint8_t
+circpad_negotiate_get_machine_type(const circpad_negotiate_t *inp)
+{
+  return inp->machine_type;
+}
+int
+circpad_negotiate_set_machine_type(circpad_negotiate_t *inp, uint8_t val)
+{
+  inp->machine_type = val;
+  return 0;
+}
+uint8_t
+circpad_negotiate_get_echo_request(const circpad_negotiate_t *inp)
+{
+  return inp->echo_request;
+}
+int
+circpad_negotiate_set_echo_request(circpad_negotiate_t *inp, uint8_t val)
+{
+  if (! ((val == 0 || val == 1))) {
+     TRUNNEL_SET_ERROR_CODE(inp);
+     return -1;
+  }
+  inp->echo_request = val;
+  return 0;
+}
+const char *
+circpad_negotiate_check(const circpad_negotiate_t *obj)
+{
+  if (obj == NULL)
+    return "Object was NULL";
+  if (obj->trunnel_error_code_)
+    return "A set function failed on this object";
+  if (! (obj->version == 0))
+    return "Integer out of bounds";
+  if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP))
+    return "Integer out of bounds";
+  if (! (obj->echo_request == 0 || obj->echo_request == 1))
+    return "Integer out of bounds";
+  return NULL;
+}
+
+ssize_t
+circpad_negotiate_encoded_len(const circpad_negotiate_t *obj)
+{
+  ssize_t result = 0;
+
+  if (NULL != circpad_negotiate_check(obj))
+     return -1;
+
+
+  /* Length of u8 version IN [0] */
+  result += 1;
+
+  /* Length of u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */
+  result += 1;
+
+  /* Length of u8 machine_type */
+  result += 1;
+
+  /* Length of u8 echo_request IN [0, 1] */
+  result += 1;
+  return result;
+}
+int
+circpad_negotiate_clear_errors(circpad_negotiate_t *obj)
+{
+  int r = obj->trunnel_error_code_;
+  obj->trunnel_error_code_ = 0;
+  return r;
+}
+ssize_t
+circpad_negotiate_encode(uint8_t *output, const size_t avail, const circpad_negotiate_t *obj)
+{
+  ssize_t result = 0;
+  size_t written = 0;
+  uint8_t *ptr = output;
+  const char *msg;
+#ifdef TRUNNEL_CHECK_ENCODED_LEN
+  const ssize_t encoded_len = circpad_negotiate_encoded_len(obj);
+#endif
+
+  if (NULL != (msg = circpad_negotiate_check(obj)))
+    goto check_failed;
+
+#ifdef TRUNNEL_CHECK_ENCODED_LEN
+  trunnel_assert(encoded_len >= 0);
+#endif
+
+  /* Encode u8 version IN [0] */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->version));
+  written += 1; ptr += 1;
+
+  /* Encode u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->command));
+  written += 1; ptr += 1;
+
+  /* Encode u8 machine_type */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->machine_type));
+  written += 1; ptr += 1;
+
+  /* Encode u8 echo_request IN [0, 1] */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->echo_request));
+  written += 1; ptr += 1;
+
+
+  trunnel_assert(ptr == output + written);
+#ifdef TRUNNEL_CHECK_ENCODED_LEN
+  {
+    trunnel_assert(encoded_len >= 0);
+    trunnel_assert((size_t)encoded_len == written);
+  }
+
+#endif
+
+  return written;
+
+ truncated:
+  result = -2;
+  goto fail;
+ check_failed:
+  (void)msg;
+  result = -1;
+  goto fail;
+ fail:
+  trunnel_assert(result < 0);
+  return result;
+}
+
+/** As circpad_negotiate_parse(), but do not allocate the output
+ * object.
+ */
+static ssize_t
+circpad_negotiate_parse_into(circpad_negotiate_t *obj, const uint8_t *input, const size_t len_in)
+{
+  const uint8_t *ptr = input;
+  size_t remaining = len_in;
+  ssize_t result = 0;
+  (void)result;
+
+  /* Parse u8 version IN [0] */
+  CHECK_REMAINING(1, truncated);
+  obj->version = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+  if (! (obj->version == 0))
+    goto fail;
+
+  /* Parse u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */
+  CHECK_REMAINING(1, truncated);
+  obj->command = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+  if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP))
+    goto fail;
+
+  /* Parse u8 machine_type */
+  CHECK_REMAINING(1, truncated);
+  obj->machine_type = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+
+  /* Parse u8 echo_request IN [0, 1] */
+  CHECK_REMAINING(1, truncated);
+  obj->echo_request = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+  if (! (obj->echo_request == 0 || obj->echo_request == 1))
+    goto fail;
+  trunnel_assert(ptr + remaining == input + len_in);
+  return len_in - remaining;
+
+ truncated:
+  return -2;
+ fail:
+  result = -1;
+  return result;
+}
+
+ssize_t
+circpad_negotiate_parse(circpad_negotiate_t **output, const uint8_t *input, const size_t len_in)
+{
+  ssize_t result;
+  *output = circpad_negotiate_new();
+  if (NULL == *output)
+    return -1;
+  result = circpad_negotiate_parse_into(*output, input, len_in);
+  if (result < 0) {
+    circpad_negotiate_free(*output);
+    *output = NULL;
+  }
+  return result;
+}
+circpad_negotiated_t *
+circpad_negotiated_new(void)
+{
+  circpad_negotiated_t *val = trunnel_calloc(1, sizeof(circpad_negotiated_t));
+  if (NULL == val)
+    return NULL;
+  val->command = CIRCPAD_COMMAND_START;
+  val->response = CIRCPAD_RESPONSE_ERR;
+  return val;
+}
+
+/** Release all storage held inside 'obj', but do not free 'obj'.
+ */
+static void
+circpad_negotiated_clear(circpad_negotiated_t *obj)
+{
+  (void) obj;
+}
+
+void
+circpad_negotiated_free(circpad_negotiated_t *obj)
+{
+  if (obj == NULL)
+    return;
+  circpad_negotiated_clear(obj);
+  trunnel_memwipe(obj, sizeof(circpad_negotiated_t));
+  trunnel_free_(obj);
+}
+
+uint8_t
+circpad_negotiated_get_version(const circpad_negotiated_t *inp)
+{
+  return inp->version;
+}
+int
+circpad_negotiated_set_version(circpad_negotiated_t *inp, uint8_t val)
+{
+  if (! ((val == 0))) {
+     TRUNNEL_SET_ERROR_CODE(inp);
+     return -1;
+  }
+  inp->version = val;
+  return 0;
+}
+uint8_t
+circpad_negotiated_get_command(const circpad_negotiated_t *inp)
+{
+  return inp->command;
+}
+int
+circpad_negotiated_set_command(circpad_negotiated_t *inp, uint8_t val)
+{
+  if (! ((val == CIRCPAD_COMMAND_START || val == CIRCPAD_COMMAND_STOP))) {
+     TRUNNEL_SET_ERROR_CODE(inp);
+     return -1;
+  }
+  inp->command = val;
+  return 0;
+}
+uint8_t
+circpad_negotiated_get_response(const circpad_negotiated_t *inp)
+{
+  return inp->response;
+}
+int
+circpad_negotiated_set_response(circpad_negotiated_t *inp, uint8_t val)
+{
+  if (! ((val == CIRCPAD_RESPONSE_ERR || val == CIRCPAD_RESPONSE_OK))) {
+     TRUNNEL_SET_ERROR_CODE(inp);
+     return -1;
+  }
+  inp->response = val;
+  return 0;
+}
+uint8_t
+circpad_negotiated_get_machine_type(const circpad_negotiated_t *inp)
+{
+  return inp->machine_type;
+}
+int
+circpad_negotiated_set_machine_type(circpad_negotiated_t *inp, uint8_t val)
+{
+  inp->machine_type = val;
+  return 0;
+}
+const char *
+circpad_negotiated_check(const circpad_negotiated_t *obj)
+{
+  if (obj == NULL)
+    return "Object was NULL";
+  if (obj->trunnel_error_code_)
+    return "A set function failed on this object";
+  if (! (obj->version == 0))
+    return "Integer out of bounds";
+  if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP))
+    return "Integer out of bounds";
+  if (! (obj->response == CIRCPAD_RESPONSE_ERR || obj->response == CIRCPAD_RESPONSE_OK))
+    return "Integer out of bounds";
+  return NULL;
+}
+
+ssize_t
+circpad_negotiated_encoded_len(const circpad_negotiated_t *obj)
+{
+  ssize_t result = 0;
+
+  if (NULL != circpad_negotiated_check(obj))
+     return -1;
+
+
+  /* Length of u8 version IN [0] */
+  result += 1;
+
+  /* Length of u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */
+  result += 1;
+
+  /* Length of u8 response IN [CIRCPAD_RESPONSE_ERR, CIRCPAD_RESPONSE_OK] */
+  result += 1;
+
+  /* Length of u8 machine_type */
+  result += 1;
+  return result;
+}
+int
+circpad_negotiated_clear_errors(circpad_negotiated_t *obj)
+{
+  int r = obj->trunnel_error_code_;
+  obj->trunnel_error_code_ = 0;
+  return r;
+}
+ssize_t
+circpad_negotiated_encode(uint8_t *output, const size_t avail, const circpad_negotiated_t *obj)
+{
+  ssize_t result = 0;
+  size_t written = 0;
+  uint8_t *ptr = output;
+  const char *msg;
+#ifdef TRUNNEL_CHECK_ENCODED_LEN
+  const ssize_t encoded_len = circpad_negotiated_encoded_len(obj);
+#endif
+
+  if (NULL != (msg = circpad_negotiated_check(obj)))
+    goto check_failed;
+
+#ifdef TRUNNEL_CHECK_ENCODED_LEN
+  trunnel_assert(encoded_len >= 0);
+#endif
+
+  /* Encode u8 version IN [0] */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->version));
+  written += 1; ptr += 1;
+
+  /* Encode u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->command));
+  written += 1; ptr += 1;
+
+  /* Encode u8 response IN [CIRCPAD_RESPONSE_ERR, CIRCPAD_RESPONSE_OK] */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->response));
+  written += 1; ptr += 1;
+
+  /* Encode u8 machine_type */
+  trunnel_assert(written <= avail);
+  if (avail - written < 1)
+    goto truncated;
+  trunnel_set_uint8(ptr, (obj->machine_type));
+  written += 1; ptr += 1;
+
+
+  trunnel_assert(ptr == output + written);
+#ifdef TRUNNEL_CHECK_ENCODED_LEN
+  {
+    trunnel_assert(encoded_len >= 0);
+    trunnel_assert((size_t)encoded_len == written);
+  }
+
+#endif
+
+  return written;
+
+ truncated:
+  result = -2;
+  goto fail;
+ check_failed:
+  (void)msg;
+  result = -1;
+  goto fail;
+ fail:
+  trunnel_assert(result < 0);
+  return result;
+}
+
+/** As circpad_negotiated_parse(), but do not allocate the output
+ * object.
+ */
+static ssize_t
+circpad_negotiated_parse_into(circpad_negotiated_t *obj, const uint8_t *input, const size_t len_in)
+{
+  const uint8_t *ptr = input;
+  size_t remaining = len_in;
+  ssize_t result = 0;
+  (void)result;
+
+  /* Parse u8 version IN [0] */
+  CHECK_REMAINING(1, truncated);
+  obj->version = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+  if (! (obj->version == 0))
+    goto fail;
+
+  /* Parse u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP] */
+  CHECK_REMAINING(1, truncated);
+  obj->command = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+  if (! (obj->command == CIRCPAD_COMMAND_START || obj->command == CIRCPAD_COMMAND_STOP))
+    goto fail;
+
+  /* Parse u8 response IN [CIRCPAD_RESPONSE_ERR, CIRCPAD_RESPONSE_OK] */
+  CHECK_REMAINING(1, truncated);
+  obj->response = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+  if (! (obj->response == CIRCPAD_RESPONSE_ERR || obj->response == CIRCPAD_RESPONSE_OK))
+    goto fail;
+
+  /* Parse u8 machine_type */
+  CHECK_REMAINING(1, truncated);
+  obj->machine_type = (trunnel_get_uint8(ptr));
+  remaining -= 1; ptr += 1;
+  trunnel_assert(ptr + remaining == input + len_in);
+  return len_in - remaining;
+
+ truncated:
+  return -2;
+ fail:
+  result = -1;
+  return result;
+}
+
+ssize_t
+circpad_negotiated_parse(circpad_negotiated_t **output, const uint8_t *input, const size_t len_in)
+{
+  ssize_t result;
+  *output = circpad_negotiated_new();
+  if (NULL == *output)
+    return -1;
+  result = circpad_negotiated_parse_into(*output, input, len_in);
+  if (result < 0) {
+    circpad_negotiated_free(*output);
+    *output = NULL;
+  }
+  return result;
+}

+ 195 - 0
src/trunnel/circpad_negotiation.h

@@ -0,0 +1,195 @@
+/* circpad_negotiation.h -- generated by Trunnel v1.5.2.
+ * https://gitweb.torproject.org/trunnel.git
+ * You probably shouldn't edit this file.
+ */
+#ifndef TRUNNEL_CIRCPAD_NEGOTIATION_H
+#define TRUNNEL_CIRCPAD_NEGOTIATION_H
+
+#include <stdint.h>
+#include "trunnel.h"
+
+#define CIRCPAD_COMMAND_STOP 1
+#define CIRCPAD_COMMAND_START 2
+#define CIRCPAD_RESPONSE_OK 1
+#define CIRCPAD_RESPONSE_ERR 2
+#define CIRCPAD_MACHINE_CIRC_SETUP 1
+/**
+ * This command tells the relay to alter its min and max netflow
+ * timeout range values, and send padding at that rate (resuming
+ * if stopped). */
+#if !defined(TRUNNEL_OPAQUE) && !defined(TRUNNEL_OPAQUE_CIRCPAD_NEGOTIATE)
+struct circpad_negotiate_st {
+  uint8_t version;
+  uint8_t command;
+  /** Machine type is left unbounded because we can specify
+     * new machines in the consensus */
+  uint8_t machine_type;
+  /** If true, send a relay_drop reply.. */
+  uint8_t echo_request;
+  uint8_t trunnel_error_code_;
+};
+#endif
+typedef struct circpad_negotiate_st circpad_negotiate_t;
+/**
+ * This command tells the relay to alter its min and max netflow
+ * timeout range values, and send padding at that rate (resuming
+ * if stopped). */
+#if !defined(TRUNNEL_OPAQUE) && !defined(TRUNNEL_OPAQUE_CIRCPAD_NEGOTIATED)
+struct circpad_negotiated_st {
+  uint8_t version;
+  uint8_t command;
+  uint8_t response;
+  /** Machine type is left unbounded because we can specify
+     * new machines in the consensus */
+  uint8_t machine_type;
+  uint8_t trunnel_error_code_;
+};
+#endif
+typedef struct circpad_negotiated_st circpad_negotiated_t;
+/** Return a newly allocated circpad_negotiate with all elements set
+ * to zero.
+ */
+circpad_negotiate_t *circpad_negotiate_new(void);
+/** Release all storage held by the circpad_negotiate in 'victim'. (Do
+ * nothing if 'victim' is NULL.)
+ */
+void circpad_negotiate_free(circpad_negotiate_t *victim);
+/** Try to parse a circpad_negotiate from the buffer in 'input', using
+ * up to 'len_in' bytes from the input buffer. On success, return the
+ * number of bytes consumed and set *output to the newly allocated
+ * circpad_negotiate_t. On failure, return -2 if the input appears
+ * truncated, and -1 if the input is otherwise invalid.
+ */
+ssize_t circpad_negotiate_parse(circpad_negotiate_t **output, const uint8_t *input, const size_t len_in);
+/** Return the number of bytes we expect to need to encode the
+ * circpad_negotiate in 'obj'. On failure, return a negative value.
+ * Note that this value may be an overestimate, and can even be an
+ * underestimate for certain unencodeable objects.
+ */
+ssize_t circpad_negotiate_encoded_len(const circpad_negotiate_t *obj);
+/** Try to encode the circpad_negotiate from 'input' into the buffer
+ * at 'output', using up to 'avail' bytes of the output buffer. On
+ * success, return the number of bytes used. On failure, return -2 if
+ * the buffer was not long enough, and -1 if the input was invalid.
+ */
+ssize_t circpad_negotiate_encode(uint8_t *output, size_t avail, const circpad_negotiate_t *input);
+/** Check whether the internal state of the circpad_negotiate in 'obj'
+ * is consistent. Return NULL if it is, and a short message if it is
+ * not.
+ */
+const char *circpad_negotiate_check(const circpad_negotiate_t *obj);
+/** Clear any errors that were set on the object 'obj' by its setter
+ * functions. Return true iff errors were cleared.
+ */
+int circpad_negotiate_clear_errors(circpad_negotiate_t *obj);
+/** Return the value of the version field of the circpad_negotiate_t
+ * in 'inp'
+ */
+uint8_t circpad_negotiate_get_version(const circpad_negotiate_t *inp);
+/** Set the value of the version field of the circpad_negotiate_t in
+ * 'inp' to 'val'. Return 0 on success; return -1 and set the error
+ * code on 'inp' on failure.
+ */
+int circpad_negotiate_set_version(circpad_negotiate_t *inp, uint8_t val);
+/** Return the value of the command field of the circpad_negotiate_t
+ * in 'inp'
+ */
+uint8_t circpad_negotiate_get_command(const circpad_negotiate_t *inp);
+/** Set the value of the command field of the circpad_negotiate_t in
+ * 'inp' to 'val'. Return 0 on success; return -1 and set the error
+ * code on 'inp' on failure.
+ */
+int circpad_negotiate_set_command(circpad_negotiate_t *inp, uint8_t val);
+/** Return the value of the machine_type field of the
+ * circpad_negotiate_t in 'inp'
+ */
+uint8_t circpad_negotiate_get_machine_type(const circpad_negotiate_t *inp);
+/** Set the value of the machine_type field of the circpad_negotiate_t
+ * in 'inp' to 'val'. Return 0 on success; return -1 and set the error
+ * code on 'inp' on failure.
+ */
+int circpad_negotiate_set_machine_type(circpad_negotiate_t *inp, uint8_t val);
+/** Return the value of the echo_request field of the
+ * circpad_negotiate_t in 'inp'
+ */
+uint8_t circpad_negotiate_get_echo_request(const circpad_negotiate_t *inp);
+/** Set the value of the echo_request field of the circpad_negotiate_t
+ * in 'inp' to 'val'. Return 0 on success; return -1 and set the error
+ * code on 'inp' on failure.
+ */
+int circpad_negotiate_set_echo_request(circpad_negotiate_t *inp, uint8_t val);
+/** Return a newly allocated circpad_negotiated with all elements set
+ * to zero.
+ */
+circpad_negotiated_t *circpad_negotiated_new(void);
+/** Release all storage held by the circpad_negotiated in 'victim'.
+ * (Do nothing if 'victim' is NULL.)
+ */
+void circpad_negotiated_free(circpad_negotiated_t *victim);
+/** Try to parse a circpad_negotiated from the buffer in 'input',
+ * using up to 'len_in' bytes from the input buffer. On success,
+ * return the number of bytes consumed and set *output to the newly
+ * allocated circpad_negotiated_t. On failure, return -2 if the input
+ * appears truncated, and -1 if the input is otherwise invalid.
+ */
+ssize_t circpad_negotiated_parse(circpad_negotiated_t **output, const uint8_t *input, const size_t len_in);
+/** Return the number of bytes we expect to need to encode the
+ * circpad_negotiated in 'obj'. On failure, return a negative value.
+ * Note that this value may be an overestimate, and can even be an
+ * underestimate for certain unencodeable objects.
+ */
+ssize_t circpad_negotiated_encoded_len(const circpad_negotiated_t *obj);
+/** Try to encode the circpad_negotiated from 'input' into the buffer
+ * at 'output', using up to 'avail' bytes of the output buffer. On
+ * success, return the number of bytes used. On failure, return -2 if
+ * the buffer was not long enough, and -1 if the input was invalid.
+ */
+ssize_t circpad_negotiated_encode(uint8_t *output, size_t avail, const circpad_negotiated_t *input);
+/** Check whether the internal state of the circpad_negotiated in
+ * 'obj' is consistent. Return NULL if it is, and a short message if
+ * it is not.
+ */
+const char *circpad_negotiated_check(const circpad_negotiated_t *obj);
+/** Clear any errors that were set on the object 'obj' by its setter
+ * functions. Return true iff errors were cleared.
+ */
+int circpad_negotiated_clear_errors(circpad_negotiated_t *obj);
+/** Return the value of the version field of the circpad_negotiated_t
+ * in 'inp'
+ */
+uint8_t circpad_negotiated_get_version(const circpad_negotiated_t *inp);
+/** Set the value of the version field of the circpad_negotiated_t in
+ * 'inp' to 'val'. Return 0 on success; return -1 and set the error
+ * code on 'inp' on failure.
+ */
+int circpad_negotiated_set_version(circpad_negotiated_t *inp, uint8_t val);
+/** Return the value of the command field of the circpad_negotiated_t
+ * in 'inp'
+ */
+uint8_t circpad_negotiated_get_command(const circpad_negotiated_t *inp);
+/** Set the value of the command field of the circpad_negotiated_t in
+ * 'inp' to 'val'. Return 0 on success; return -1 and set the error
+ * code on 'inp' on failure.
+ */
+int circpad_negotiated_set_command(circpad_negotiated_t *inp, uint8_t val);
+/** Return the value of the response field of the circpad_negotiated_t
+ * in 'inp'
+ */
+uint8_t circpad_negotiated_get_response(const circpad_negotiated_t *inp);
+/** Set the value of the response field of the circpad_negotiated_t in
+ * 'inp' to 'val'. Return 0 on success; return -1 and set the error
+ * code on 'inp' on failure.
+ */
+int circpad_negotiated_set_response(circpad_negotiated_t *inp, uint8_t val);
+/** Return the value of the machine_type field of the
+ * circpad_negotiated_t in 'inp'
+ */
+uint8_t circpad_negotiated_get_machine_type(const circpad_negotiated_t *inp);
+/** Set the value of the machine_type field of the
+ * circpad_negotiated_t in 'inp' to 'val'. Return 0 on success; return
+ * -1 and set the error code on 'inp' on failure.
+ */
+int circpad_negotiated_set_machine_type(circpad_negotiated_t *inp, uint8_t val);
+
+
+#endif

+ 44 - 0
src/trunnel/circpad_negotiation.trunnel

@@ -0,0 +1,44 @@
+/* These are the padding negotiation commands */
+const CIRCPAD_COMMAND_STOP = 1;
+const CIRCPAD_COMMAND_START = 2;
+
+/* Responses to commands */
+const CIRCPAD_RESPONSE_OK = 1;
+const CIRCPAD_RESPONSE_ERR = 2;
+
+/* Built-in machine types */
+
+/* 1) Machine that obscures circuit setup */
+const CIRCPAD_MACHINE_CIRC_SETUP = 1;
+
+/**
+ * This command tells the relay to alter its min and max netflow
+ * timeout range values, and send padding at that rate (resuming
+ * if stopped). */
+struct circpad_negotiate {
+  u8 version IN [0];
+  u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP];
+
+  /** Machine type is left unbounded because we can specify
+   * new machines in the consensus */
+  u8 machine_type;
+
+  /** If true, send a relay_drop reply.. */
+  // FIXME-MP-AP: Maybe we just say to transition to the first state
+  // here instead.. Also what about delay before responding?
+  u8 echo_request IN [0,1];
+};
+
+/**
+ * This command tells the relay to alter its min and max netflow
+ * timeout range values, and send padding at that rate (resuming
+ * if stopped). */
+struct circpad_negotiated {
+  u8 version IN [0];
+  u8 command IN [CIRCPAD_COMMAND_START, CIRCPAD_COMMAND_STOP];
+  u8 response IN [CIRCPAD_RESPONSE_OK, CIRCPAD_RESPONSE_ERR];
+
+  /** Machine type is left unbounded because we can specify
+   * new machines in the consensus */
+  u8 machine_type;
+};

+ 7 - 4
src/trunnel/include.am

@@ -11,7 +11,8 @@ TRUNNELINPUTS = \
 	src/trunnel/link_handshake.trunnel \
 	src/trunnel/link_handshake.trunnel \
 	src/trunnel/pwbox.trunnel \
 	src/trunnel/pwbox.trunnel \
 	src/trunnel/channelpadding_negotiation.trunnel \
 	src/trunnel/channelpadding_negotiation.trunnel \
-	src/trunner/socks5.trunnel
+	src/trunnel/socks5.trunnel \
+	src/trunnel/circpad_negotiation.trunnel
 
 
 TRUNNELSOURCES = \
 TRUNNELSOURCES = \
 	src/ext/trunnel/trunnel.c \
 	src/ext/trunnel/trunnel.c \
@@ -23,8 +24,9 @@ TRUNNELSOURCES = \
 	src/trunnel/hs/cell_introduce1.c \
 	src/trunnel/hs/cell_introduce1.c \
 	src/trunnel/hs/cell_rendezvous.c \
 	src/trunnel/hs/cell_rendezvous.c \
 	src/trunnel/channelpadding_negotiation.c \
 	src/trunnel/channelpadding_negotiation.c \
-	src/trunnel/socks5.c                    \
-	src/trunnel/netinfo.c
+	src/trunnel/socks5.c \
+	src/trunnel/netinfo.c \
+	src/trunnel/circpad_negotiation.c
 
 
 TRUNNELHEADERS = \
 TRUNNELHEADERS = \
 	src/ext/trunnel/trunnel.h		\
 	src/ext/trunnel/trunnel.h		\
@@ -39,7 +41,8 @@ TRUNNELHEADERS = \
 	src/trunnel/hs/cell_rendezvous.h \
 	src/trunnel/hs/cell_rendezvous.h \
 	src/trunnel/channelpadding_negotiation.h \
 	src/trunnel/channelpadding_negotiation.h \
 	src/trunnel/socks5.h                    \
 	src/trunnel/socks5.h                    \
-	src/trunnel/netinfo.h
+	src/trunnel/netinfo.h \
+	src/trunnel/circpad_negotiation.h
 
 
 src_trunnel_libor_trunnel_a_SOURCES = $(TRUNNELSOURCES)
 src_trunnel_libor_trunnel_a_SOURCES = $(TRUNNELSOURCES)
 src_trunnel_libor_trunnel_a_CPPFLAGS = \
 src_trunnel_libor_trunnel_a_CPPFLAGS = \