13 years ago · dfbd19df41
--- a/changes/timed_onionqueue
+++ b/changes/timed_onionqueue
@@ -0,0 +1,11 @@
 
				+  o Minor features (relay):
			
 
				+    - Instead of limiting the number of queued onionskins to a configured,
			
 
				+      hard-to-configure number, we limit the size of the queue based on how
			
 
				+      many we expect to be able to process in a given amount of time. We
			
 
				+      estimate the time it will take to process an onionskin based on average
			
 
				+      processing time of previous onionskins.  Closes ticket 7291. You'll
			
 
				+      never have to configure MaxOnionsPending again.
			
 
				+
			
 
				+    - We compute the overhead from passing onionskins back and forth to
			
 
				+      cpuworkers, and report it when dumping statistics in response to
			
 
				+      SIGUSR1.
			
--- a/doc/tor.1.txt
+++ b/doc/tor.1.txt
@@ -1376,9 +1376,9 @@ is non-zero):
 
				     If set, and we are an exit node, allow clients to use us for IPv6
			
 
				     traffic. (Default: 0)
			
 
				 
			
 
				-**MaxOnionsPending** __NUM__::
			
 
				-    If you have more than this number of onionskins queued for decrypt, reject
			
 
				-    new ones. (Default: 100)
			
 
				+**MaxOnionQueueDelay** __NUM__ [**msec**|**second**]::
			
 
				+    If we have more onionskins queued for processing than we can process in
			
 
				+    this amount of time, reject new ones. (Default: 1750 msec)
			
 
				 
			
 
				 **MyFamily** __node__,__node__,__...__::
			
 
				     Declare that this Tor server is controlled or administered by a group or
			
--- a/src/or/config.c
+++ b/src/or/config.c
@@ -296,7 +296,8 @@ static config_var_t option_vars_[] = {
 
				   V(MaxAdvertisedBandwidth,      MEMUNIT,  "1 GB"),
			
 
				   V(MaxCircuitDirtiness,         INTERVAL, "10 minutes"),
			
 
				   V(MaxClientCircuitsPending,    UINT,     "32"),
			
 
				-  V(MaxOnionsPending,            UINT,     "100"),
			
 
				+  OBSOLETE("MaxOnionsPending"),
			
 
				+  V(MaxOnionQueueDelay,          MSEC_INTERVAL, "1750 msec"),
			
 
				   OBSOLETE("MonthlyAccountingStart"),
			
 
				   V(MyFamily,                    STRING,   NULL),
			
 
				   V(NewCircuitPeriod,            INTERVAL, "30 seconds"),
			
--- a/src/or/cpuworker.c
+++ b/src/or/cpuworker.c
@@ -98,6 +98,11 @@ typedef struct cpuworker_request_t {
 
				   /** Task code. Must be one of CPUWORKER_TASK_* */
			
 
				   uint8_t task;
			
 
				 
			
 
				+  /** Flag: Are we timing this request? */
			
 
				+  unsigned timed : 1;
			
 
				+  /** If we're timing this request, when was it sent to the cpuworker? */
			
 
				+  struct timeval started_at;
			
 
				+
			
 
				   /** A create cell for the cpuworker to process. */
			
 
				   create_cell_t create_cell;
			
 
				 
			
@@ -113,6 +118,17 @@ typedef struct cpuworker_reply_t {
 
				   /** True iff we got a successful request. */
			
 
				   uint8_t success;
			
 
				 
			
 
				+  /** Are we timing this request? */
			
 
				+  unsigned int timed : 1;
			
 
				+  /** What handshake type was the request? (Used for timing) */
			
 
				+  uint16_t handshake_type;
			
 
				+  /** When did we send the request to the cpuworker? */
			
 
				+  struct timeval started_at;
			
 
				+  /** Once the cpuworker received the request, how many microseconds did it
			
 
				+   * take? (This shouldn't overflow; 4 billion micoseconds is over an hour,
			
 
				+   * and we'll never have an onion handshake that takes so long.) */
			
 
				+  uint32_t n_usec;
			
 
				+
			
 
				   /** Output of processing a create cell
			
 
				    *
			
 
				    * @{
			
@@ -163,6 +179,110 @@ connection_cpu_reached_eof(connection_t *conn)
 
				   return 0;
			
 
				 }
			
 
				 
			
 
				+/** Indexed by handshake type: how many onionskins have we processed and
			
 
				+ * counted of that type? */
			
 
				+static uint64_t onionskins_n_processed[MAX_ONION_HANDSHAKE_TYPE+1];
			
 
				+/** Indexed by handshake type, corresponding to the onionskins counted in
			
 
				+ * onionskins_n_processed: how many microseconds have we spent in cpuworkers
			
 
				+ * processing that kind of onionskin? */
			
 
				+static uint64_t onionskins_usec_internal[MAX_ONION_HANDSHAKE_TYPE+1];
			
 
				+/** Indexed by handshake type, corresponding to onionskins counted in
			
 
				+ * onionskins_n_processed: how many microseconds have we spent waiting for
			
 
				+ * cpuworkers to give us answers for that kind of onionskin?
			
 
				+ */
			
 
				+static uint64_t onionskins_usec_roundtrip[MAX_ONION_HANDSHAKE_TYPE+1];
			
 
				+
			
 
				+/** If any onionskin takes longer than this, we clip them to this
			
 
				+ * time. (microseconds) */
			
 
				+#define MAX_BELIEVABLE_ONIONSKIN_DELAY (2*1000*1000)
			
 
				+
			
 
				+/** Return true iff we'd like to measure a handshake of type
			
 
				+ * <b>onionskin_type</b>. */
			
 
				+static int
			
 
				+should_time_request(uint16_t onionskin_type)
			
 
				+{
			
 
				+  /* If we've never heard of this type, we shouldn't even be here. */
			
 
				+  if (onionskin_type > MAX_ONION_HANDSHAKE_TYPE)
			
 
				+    return 0;
			
 
				+  /* Measure the first N handshakes of each type, to ensure we have a
			
 
				+   * sample */
			
 
				+  if (onionskins_n_processed[onionskin_type] < 4096)
			
 
				+    return 1;
			
 
				+  /** Otherwise, measure with P=1/128.  We avoid doing this for every
			
 
				+   * handshake, since the measurement itself can take a little time. */
			
 
				+  return tor_weak_random() < (TOR_RAND_MAX/128);
			
 
				+}
			
 
				+
			
 
				+/** Return an estimate of how many microseconds we will need for a single
			
 
				+ * cpuworker to to process <b>n_requests</b> onionskins of type
			
 
				+ * <b>onionskin_type</b>. */
			
 
				+uint64_t
			
 
				+estimated_usec_for_onionskins(uint32_t n_requests, uint16_t onionskin_type)
			
 
				+{
			
 
				+  if (onionskin_type > MAX_ONION_HANDSHAKE_TYPE) /* should be impossible */
			
 
				+    return 1000 * n_requests;
			
 
				+  if (PREDICT_UNLIKELY(onionskins_n_processed[onionskin_type] < 100)) {
			
 
				+    /* Until we have 100 data points, just asssume everything takes 1 msec. */
			
 
				+    return 1000 * n_requests;
			
 
				+  } else {
			
 
				+    /* This can't overflow: we'll never have more than 500000 onionskins
			
 
				+     * measured in onionskin_usec_internal, and they won't take anything near
			
 
				+     * 1 sec each, and we won't have anything like 1 million queued
			
 
				+     * onionskins.  But that's 5e5 * 1e6 * 1e6, which is still less than
			
 
				+     * UINT64_MAX. */
			
 
				+    return (onionskins_usec_internal[onionskin_type] * n_requests) /
			
 
				+      onionskins_n_processed[onionskin_type];
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+/** Compute the absolute and relative overhead of using the cpuworker
			
 
				+ * framework for onionskins of type <b>onionskin_type</b>.*/
			
 
				+static int
			
 
				+get_overhead_for_onionskins(uint32_t *usec_out, double *frac_out,
			
 
				+                            uint16_t onionskin_type)
			
 
				+{
			
 
				+  uint64_t overhead;
			
 
				+
			
 
				+  *usec_out = 0;
			
 
				+  *frac_out = 0.0;
			
 
				+
			
 
				+  if (onionskin_type > MAX_ONION_HANDSHAKE_TYPE) /* should be impossible */
			
 
				+    return -1;
			
 
				+  if (onionskins_n_processed[onionskin_type] == 0 ||
			
 
				+      onionskins_usec_internal[onionskin_type] == 0 ||
			
 
				+      onionskins_usec_roundtrip[onionskin_type] == 0)
			
 
				+    return -1;
			
 
				+
			
 
				+  overhead = onionskins_usec_roundtrip[onionskin_type] -
			
 
				+    onionskins_usec_internal[onionskin_type];
			
 
				+
			
 
				+  *usec_out = (uint32_t)(overhead / onionskins_n_processed[onionskin_type]);
			
 
				+  *frac_out = U64_TO_DBL(overhead) / onionskins_usec_internal[onionskin_type];
			
 
				+
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+/** If we've measured overhead for onionskins of type <b>onionskin_type</b>,
			
 
				+ * log it. */
			
 
				+void
			
 
				+cpuworker_log_onionskin_overhead(int severity, int onionskin_type,
			
 
				+                                 const char *onionskin_type_name)
			
 
				+{
			
 
				+  uint32_t overhead;
			
 
				+  double relative_overhead;
			
 
				+  int r;
			
 
				+
			
 
				+  r = get_overhead_for_onionskins(&overhead,  &relative_overhead,
			
 
				+                                  onionskin_type);
			
 
				+  if (!overhead || r<0)
			
 
				+    return;
			
 
				+
			
 
				+  log_fn(severity, LD_OR,
			
 
				+         "%s onionskins have averaged %u usec overhead (%.2f%%) in "
			
 
				+         "cpuworker code ",
			
 
				+         onionskin_type_name, (unsigned)overhead, relative_overhead*100);
			
 
				+}
			
 
				+
			
 
				 /** Called when we get data from a cpuworker.  If the answer is not complete,
			
 
				  * wait for a complete answer. If the answer is complete,
			
 
				  * process it as appropriate.
			
@@ -190,6 +310,30 @@ connection_cpu_process_inbuf(connection_t *conn)
 
				     connection_fetch_from_buf((void*)&rpl,sizeof(cpuworker_reply_t),conn);
			
 
				 
			
 
				     tor_assert(rpl.magic == CPUWORKER_REPLY_MAGIC);
			
 
				+
			
 
				+    if (rpl.timed && rpl.success &&
			
 
				+        rpl.handshake_type <= MAX_ONION_HANDSHAKE_TYPE) {
			
 
				+      /* Time how long this request took. The handshake_type check should be
			
 
				+         needless, but let's leave it in to be safe. */
			
 
				+      struct timeval tv_end, tv_diff;
			
 
				+      int64_t usec_roundtrip;
			
 
				+      tor_gettimeofday(&tv_end);
			
 
				+      timersub(&tv_end, &rpl.started_at, &tv_diff);
			
 
				+      usec_roundtrip = ((int64_t)tv_diff.tv_sec)*1000000 + tv_diff.tv_usec;
			
 
				+      if (usec_roundtrip >= 0 &&
			
 
				+          usec_roundtrip < MAX_BELIEVABLE_ONIONSKIN_DELAY) {
			
 
				+        ++onionskins_n_processed[rpl.handshake_type];
			
 
				+        onionskins_usec_internal[rpl.handshake_type] += rpl.n_usec;
			
 
				+        onionskins_usec_roundtrip[rpl.handshake_type] += usec_roundtrip;
			
 
				+        if (onionskins_n_processed[rpl.handshake_type] >= 500000) {
			
 
				+          /* Scale down every 500000 handshakes.  On a busy server, that's
			
 
				+           * less impressive than it sounds. */
			
 
				+          onionskins_n_processed[rpl.handshake_type] /= 2;
			
 
				+          onionskins_usec_internal[rpl.handshake_type] /= 2;
			
 
				+          onionskins_usec_roundtrip[rpl.handshake_type] /= 2;
			
 
				+        }
			
 
				+      }
			
 
				+    }
			
 
				     /* parse out the circ it was talking about */
			
 
				     tag_unpack(rpl.tag, &chan_id, &circ_id);
			
 
				     circ = NULL;
			
@@ -289,7 +433,13 @@ cpuworker_main(void *data)
 
				     if (req.task == CPUWORKER_TASK_ONION) {
			
 
				       const create_cell_t *cc = &req.create_cell;
			
 
				       created_cell_t *cell_out = &rpl.created_cell;
			
 
				+      struct timeval tv_start, tv_end;
			
 
				       int n;
			
 
				+      rpl.timed = req.timed;
			
 
				+      rpl.started_at = req.started_at;
			
 
				+      rpl.handshake_type = cc->handshake_type;
			
 
				+      if (req.timed)
			
 
				+        tor_gettimeofday(&tv_start);
			
 
				       n = onion_skin_server_handshake(cc->handshake_type,
			
 
				                                       cc->onionskin, cc->handshake_len,
			
 
				                                       &onion_keys,
			
@@ -321,6 +471,17 @@ cpuworker_main(void *data)
 
				         rpl.success = 1;
			
 
				       }
			
 
				       rpl.magic = CPUWORKER_REPLY_MAGIC;
			
 
				+      if (req.timed) {
			
 
				+        struct timeval tv_diff;
			
 
				+        int64_t usec;
			
 
				+        tor_gettimeofday(&tv_end);
			
 
				+        timersub(&tv_end, &tv_start, &tv_diff);
			
 
				+        usec = ((int64_t)tv_diff.tv_sec)*1000000 + tv_diff.tv_usec;
			
 
				+        if (usec < 0 || usec > MAX_BELIEVABLE_ONIONSKIN_DELAY)
			
 
				+          rpl.n_usec = MAX_BELIEVABLE_ONIONSKIN_DELAY;
			
 
				+        else
			
 
				+          rpl.n_usec = (uint32_t) usec;
			
 
				+      }
			
 
				       if (write_all(fd, (void*)&rpl, sizeof(rpl), 1) != sizeof(rpl)) {
			
 
				         log_err(LD_BUG,"writing response buf failed. Exiting.");
			
 
				         goto end;
			
@@ -478,6 +639,7 @@ assign_onionskin_to_cpuworker(connection_t *cpuworker,
 
				   cpuworker_request_t req;
			
 
				   time_t now = approx_time();
			
 
				   static time_t last_culled_cpuworkers = 0;
			
 
				+  int should_time;
			
 
				 
			
 
				   /* Checking for wedged cpuworkers requires a linear search over all
			
 
				    * connections, so let's do it only once a minute.
			
@@ -512,16 +674,18 @@ assign_onionskin_to_cpuworker(connection_t *cpuworker,
 
				       return -1;
			
 
				     }
			
 
				 
			
 
				+    should_time = should_time_request(onionskin->handshake_type);
			
 
				     memset(&req, 0, sizeof(req));
			
 
				     req.magic = CPUWORKER_REQUEST_MAGIC;
			
 
				     tag_pack(req.tag, circ->p_chan->global_identifier,
			
 
				              circ->p_circ_id);
			
 
				+    req.timed = should_time;
			
 
				 
			
 
				     cpuworker->state = CPUWORKER_STATE_BUSY_ONION;
			
 
				     /* touch the lastwritten timestamp, since that's how we check to
			
 
				      * see how long it's been since we asked the question, and sometimes
			
 
				      * we check before the first call to connection_handle_write(). */
			
 
				-    cpuworker->timestamp_lastwritten = time(NULL);
			
 
				+    cpuworker->timestamp_lastwritten = now;
			
 
				     num_cpuworkers_busy++;
			
 
				 
			
 
				     req.task = CPUWORKER_TASK_ONION;
			
@@ -529,6 +693,9 @@ assign_onionskin_to_cpuworker(connection_t *cpuworker,
 
				 
			
 
				     tor_free(onionskin);
			
 
				 
			
 
				+    if (should_time)
			
 
				+      tor_gettimeofday(&req.started_at);
			
 
				+
			
 
				     connection_write_to_buf((void*)&req, sizeof(req), cpuworker);
			
 
				     memwipe(&req, 0, sizeof(req));
			
 
				   }
			
--- a/src/or/cpuworker.h
+++ b/src/or/cpuworker.h
@@ -22,5 +22,10 @@ int assign_onionskin_to_cpuworker(connection_t *cpuworker,
 
				                                   or_circuit_t *circ,
			
 
				                                   struct create_cell_t *onionskin);
			
 
				 
			
 
				+uint64_t estimated_usec_for_onionskins(uint32_t n_requests,
			
 
				+                                       uint16_t onionskin_type);
			
 
				+void cpuworker_log_onionskin_overhead(int severity, int onionskin_type,
			
 
				+                                      const char *onionskin_type_name);
			
 
				+
			
 
				 #endif
			
 
				 
			
--- a/src/or/main.c
+++ b/src/or/main.c
@@ -2202,6 +2202,9 @@ dumpstats(int severity)
 
				         100*(U64_TO_DBL(stats_n_data_bytes_received) /
			
 
				              U64_TO_DBL(stats_n_data_cells_received*RELAY_PAYLOAD_SIZE)) );
			
 
				 
			
 
				+  cpuworker_log_onionskin_overhead(severity, ONION_HANDSHAKE_TYPE_TAP, "TAP");
			
 
				+  cpuworker_log_onionskin_overhead(severity, ONION_HANDSHAKE_TYPE_NTOR,"ntor");
			
 
				+
			
 
				   if (now - time_of_process_start >= 0)
			
 
				     elapsed = now - time_of_process_start;
			
 
				   else
			
--- a/src/or/onion.c
+++ b/src/or/onion.c
@@ -13,6 +13,7 @@
 
				 #include "or.h"
			
 
				 #include "circuitlist.h"
			
 
				 #include "config.h"
			
 
				+#include "cpuworker.h"
			
 
				 #include "onion.h"
			
 
				 #include "onion_fast.h"
			
 
				 #include "onion_ntor.h"
			
@@ -20,29 +21,71 @@
 
				 #include "relay.h"
			
 
				 #include "rephist.h"
			
 
				 #include "router.h"
			
 
				+#include "tor_queue.h"
			
 
				 
			
 
				 /** Type for a linked list of circuits that are waiting for a free CPU worker
			
 
				  * to process a waiting onion handshake. */
			
 
				 typedef struct onion_queue_t {
			
 
				+  TAILQ_ENTRY(onion_queue_t) next;
			
 
				   or_circuit_t *circ;
			
 
				   create_cell_t *onionskin;
			
 
				   time_t when_added;
			
 
				-  struct onion_queue_t *next;
			
 
				 } onion_queue_t;
			
 
				 
			
 
				 /** 5 seconds on the onion queue til we just send back a destroy */
			
 
				 #define ONIONQUEUE_WAIT_CUTOFF 5
			
 
				 
			
 
				-/** First and last elements in the linked list of circuits waiting for CPU
			
 
				- * workers, or NULL if the list is empty.
			
 
				- * @{ */
			
 
				-static onion_queue_t *ol_list=NULL;
			
 
				-static onion_queue_t *ol_tail=NULL;
			
 
				-/**@}*/
			
 
				-/** Length of ol_list */
			
 
				-static int ol_length=0;
			
 
				+/** Queue of circuits waiting for CPU workers, or NULL if the list is empty.*/
			
 
				+TAILQ_HEAD(onion_queue_head_t, onion_queue_t) ol_list =
			
 
				+  TAILQ_HEAD_INITIALIZER(ol_list);
			
 
				 
			
 
				-/* XXXX Check lengths vs MAX_ONIONSKIN_{CHALLENGE,REPLY}_LEN */
			
 
				+/** Number of entries of each type currently in ol_list. */
			
 
				+static int ol_entries[MAX_ONION_HANDSHAKE_TYPE+1];
			
 
				+
			
 
				+static void onion_queue_entry_remove(onion_queue_t *victim);
			
 
				+
			
 
				+/* XXXX024 Check lengths vs MAX_ONIONSKIN_{CHALLENGE,REPLY}_LEN.
			
 
				+ *
			
 
				+ * (By which I think I meant, "make sure that no
			
 
				+ * X_ONIONSKIN_CHALLENGE/REPLY_LEN is greater than
			
 
				+ * MAX_ONIONSKIN_CHALLENGE/REPLY_LEN."  Also, make sure that we can pass
			
 
				+ * over-large values via EXTEND2/EXTENDED2, for future-compatibility.*/
			
 
				+
			
 
				+/** Return true iff we have room to queue another oninoskin of type
			
 
				+ * <b>type</b>. */
			
 
				+static int
			
 
				+have_room_for_onionskin(uint16_t type)
			
 
				+{
			
 
				+  const or_options_t *options = get_options();
			
 
				+  int num_cpus;
			
 
				+  uint64_t tap_usec, ntor_usec;
			
 
				+  /* If we've got fewer than 50 entries, we always have room for one more. */
			
 
				+  if (ol_entries[ONION_HANDSHAKE_TYPE_TAP] +
			
 
				+      ol_entries[ONION_HANDSHAKE_TYPE_NTOR] < 50)
			
 
				+    return 1;
			
 
				+  num_cpus = get_num_cpus(options);
			
 
				+  /* Compute how many microseconds we'd expect to need to clear all
			
 
				+   * onionskins in the current queue. */
			
 
				+  tap_usec  = estimated_usec_for_onionskins(
			
 
				+                                    ol_entries[ONION_HANDSHAKE_TYPE_TAP],
			
 
				+                                    ONION_HANDSHAKE_TYPE_TAP) / num_cpus;
			
 
				+  ntor_usec = estimated_usec_for_onionskins(
			
 
				+                                    ol_entries[ONION_HANDSHAKE_TYPE_NTOR],
			
 
				+                                    ONION_HANDSHAKE_TYPE_NTOR) / num_cpus;
			
 
				+  /* See whether that exceeds MaxOnionQueueDelay. If so, we can't queue
			
 
				+   * this. */
			
 
				+  if ((tap_usec + ntor_usec) / 1000 > (uint64_t)options->MaxOnionQueueDelay)
			
 
				+    return 0;
			
 
				+#ifdef CURVE25519_ENABLED
			
 
				+  /* If we support the ntor handshake, then don't let TAP handshakes use
			
 
				+   * more than 2/3 of the space on the queue. */
			
 
				+  if (type == ONION_HANDSHAKE_TYPE_TAP &&
			
 
				+      tap_usec / 1000 > (uint64_t)options->MaxOnionQueueDelay * 2 / 3)
			
 
				+    return 0;
			
 
				+#endif
			
 
				+
			
 
				+  return 1;
			
 
				+}
			
 
				 
			
 
				 /** Add <b>circ</b> to the end of ol_list and return 0, except
			
 
				  * if ol_list is too long, in which case do nothing and return -1.
			
@@ -58,19 +101,7 @@ onion_pending_add(or_circuit_t *circ, create_cell_t *onionskin)
 
				   tmp->onionskin = onionskin;
			
 
				   tmp->when_added = now;
			
 
				 
			
 
				-  if (!ol_tail) {
			
 
				-    tor_assert(!ol_list);
			
 
				-    tor_assert(!ol_length);
			
 
				-    ol_list = tmp;
			
 
				-    ol_tail = tmp;
			
 
				-    ol_length++;
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  tor_assert(ol_list);
			
 
				-  tor_assert(!ol_tail->next);
			
 
				-
			
 
				-  if (ol_length >= get_options()->MaxOnionsPending) {
			
 
				+  if (!have_room_for_onionskin(onionskin->handshake_type)) {
			
 
				 #define WARN_TOO_MANY_CIRC_CREATIONS_INTERVAL (60)
			
 
				     static ratelim_t last_warned =
			
 
				       RATELIM_INIT(WARN_TOO_MANY_CIRC_CREATIONS_INTERVAL);
			
@@ -87,13 +118,19 @@ onion_pending_add(or_circuit_t *circ, create_cell_t *onionskin)
 
				     return -1;
			
 
				   }
			
 
				 
			
 
				-  ol_length++;
			
 
				-  ol_tail->next = tmp;
			
 
				-  ol_tail = tmp;
			
 
				-  while ((int)(now - ol_list->when_added) >= ONIONQUEUE_WAIT_CUTOFF) {
			
 
				-    /* cull elderly requests. */
			
 
				-    circ = ol_list->circ;
			
 
				-    onion_pending_remove(ol_list->circ);
			
 
				+  ++ol_entries[onionskin->handshake_type];
			
 
				+  circ->onionqueue_entry = tmp;
			
 
				+  TAILQ_INSERT_TAIL(&ol_list, tmp, next);
			
 
				+
			
 
				+  /* cull elderly requests. */
			
 
				+  while (1) {
			
 
				+    onion_queue_t *head = TAILQ_FIRST(&ol_list);
			
 
				+    if (now - head->when_added < (time_t)ONIONQUEUE_WAIT_CUTOFF)
			
 
				+      break;
			
 
				+
			
 
				+    circ = head->circ;
			
 
				+    circ->onionqueue_entry = NULL;
			
 
				+    onion_queue_entry_remove(head);
			
 
				     log_info(LD_CIRC,
			
 
				              "Circuit create request is too old; canceling due to overload.");
			
 
				     circuit_mark_for_close(TO_CIRCUIT(circ), END_CIRC_REASON_RESOURCELIMIT);
			
@@ -108,17 +145,21 @@ or_circuit_t *
 
				 onion_next_task(create_cell_t **onionskin_out)
			
 
				 {
			
 
				   or_circuit_t *circ;
			
 
				+  onion_queue_t *head = TAILQ_FIRST(&ol_list);
			
 
				 
			
 
				-  if (!ol_list)
			
 
				+  if (!head)
			
 
				     return NULL; /* no onions pending, we're done */
			
 
				 
			
 
				-  tor_assert(ol_list->circ);
			
 
				-  tor_assert(ol_list->circ->p_chan); /* make sure it's still valid */
			
 
				-  tor_assert(ol_length > 0);
			
 
				-  circ = ol_list->circ;
			
 
				-  *onionskin_out = ol_list->onionskin;
			
 
				-  ol_list->onionskin = NULL; /* prevent free. */
			
 
				-  onion_pending_remove(ol_list->circ);
			
 
				+  tor_assert(head->circ);
			
 
				+  tor_assert(head->circ->p_chan); /* make sure it's still valid */
			
 
				+  circ = head->circ;
			
 
				+  if (head->onionskin &&
			
 
				+      head->onionskin->handshake_type <= MAX_ONION_HANDSHAKE_TYPE)
			
 
				+    --ol_entries[head->onionskin->handshake_type];
			
 
				+  *onionskin_out = head->onionskin;
			
 
				+  head->onionskin = NULL; /* prevent free. */
			
 
				+  circ->onionqueue_entry = NULL;
			
 
				+  onion_queue_entry_remove(head);
			
 
				   return circ;
			
 
				 }
			
 
				 
			
@@ -128,37 +169,29 @@ onion_next_task(create_cell_t **onionskin_out)
 
				 void
			
 
				 onion_pending_remove(or_circuit_t *circ)
			
 
				 {
			
 
				-  onion_queue_t *tmpo, *victim;
			
 
				-
			
 
				-  if (!ol_list)
			
 
				-    return; /* nothing here. */
			
 
				-
			
 
				-  /* first check to see if it's the first entry */
			
 
				-  tmpo = ol_list;
			
 
				-  if (tmpo->circ == circ) {
			
 
				-    /* it's the first one. remove it from the list. */
			
 
				-    ol_list = tmpo->next;
			
 
				-    if (!ol_list)
			
 
				-      ol_tail = NULL;
			
 
				-    ol_length--;
			
 
				-    victim = tmpo;
			
 
				-  } else { /* we need to hunt through the rest of the list */
			
 
				-    for ( ;tmpo->next && tmpo->next->circ != circ; tmpo=tmpo->next) ;
			
 
				-    if (!tmpo->next) {
			
 
				-      log_debug(LD_GENERAL,
			
 
				-                "circ (p_circ_id %d) not in list, probably at cpuworker.",
			
 
				-                circ->p_circ_id);
			
 
				-      return;
			
 
				-    }
			
 
				-    /* now we know tmpo->next->circ == circ */
			
 
				-    victim = tmpo->next;
			
 
				-    tmpo->next = victim->next;
			
 
				-    if (ol_tail == victim)
			
 
				-      ol_tail = tmpo;
			
 
				-    ol_length--;
			
 
				-  }
			
 
				+  onion_queue_t *victim;
			
 
				+
			
 
				+  if (!circ)
			
 
				+    return;
			
 
				+
			
 
				+  victim = circ->onionqueue_entry;
			
 
				+  if (victim)
			
 
				+    onion_queue_entry_remove(victim);
			
 
				+}
			
 
				+
			
 
				+/** Remove a queue entry <b>victim</b> from the queue, unlinking it from
			
 
				+ * its circuit and freeing it and any structures it owns.*/
			
 
				+static void
			
 
				+onion_queue_entry_remove(onion_queue_t *victim)
			
 
				+{
			
 
				+  TAILQ_REMOVE(&ol_list, victim, next);
			
 
				+
			
 
				+  if (victim->circ)
			
 
				+    victim->circ->onionqueue_entry = NULL;
			
 
				 
			
 
				-  /* now victim points to the element that needs to be removed */
			
 
				+  if (victim->onionskin &&
			
 
				+      victim->onionskin->handshake_type <= MAX_ONION_HANDSHAKE_TYPE)
			
 
				+    --ol_entries[victim->onionskin->handshake_type];
			
 
				 
			
 
				   tor_free(victim->onionskin);
			
 
				   tor_free(victim);
			
@@ -168,14 +201,11 @@ onion_pending_remove(or_circuit_t *circ)
 
				 void
			
 
				 clear_pending_onions(void)
			
 
				 {
			
 
				-  while (ol_list) {
			
 
				-    onion_queue_t *victim = ol_list;
			
 
				-    ol_list = victim->next;
			
 
				-    tor_free(victim->onionskin);
			
 
				-    tor_free(victim);
			
 
				+  onion_queue_t *victim;
			
 
				+  while ((victim = TAILQ_FIRST(&ol_list))) {
			
 
				+    onion_queue_entry_remove(victim);
			
 
				   }
			
 
				-  ol_list = ol_tail = NULL;
			
 
				-  ol_length = 0;
			
 
				+  memset(ol_entries, 0, sizeof(ol_entries));
			
 
				 }
			
 
				 
			
 
				 /* ============================================================ */
			
--- a/src/or/or.h
+++ b/src/or/or.h
@@ -2584,6 +2584,7 @@ struct ntor_handshake_state_t;
 
				 #define ONION_HANDSHAKE_TYPE_TAP  0x0000
			
 
				 #define ONION_HANDSHAKE_TYPE_FAST 0x0001
			
 
				 #define ONION_HANDSHAKE_TYPE_NTOR 0x0002
			
 
				+#define MAX_ONION_HANDSHAKE_TYPE 0x0002
			
 
				 typedef struct {
			
 
				   uint16_t tag;
			
 
				   union {
			
@@ -3000,6 +3001,8 @@ typedef struct origin_circuit_t {
 
				 
			
 
				 } origin_circuit_t;
			
 
				 
			
 
				+struct onion_queue_t;
			
 
				+
			
 
				 /** An or_circuit_t holds information needed to implement a circuit at an
			
 
				  * OR. */
			
 
				 typedef struct or_circuit_t {
			
@@ -3013,6 +3016,9 @@ typedef struct or_circuit_t {
 
				    * cells to p_chan.  NULL if we have no cells pending, or if we're not
			
 
				    * linked to an OR connection. */
			
 
				   struct circuit_t *prev_active_on_p_chan;
			
 
				+  /** Pointer to an entry on the onion queue, if this circuit is waiting for a
			
 
				+   * chance to give an onionskin to a cpuworker. Used only in onion.c */
			
 
				+  struct onion_queue_t *onionqueue_entry;
			
 
				 
			
 
				   /** The circuit_id used in the previous (backward) hop of this circuit. */
			
 
				   circid_t p_circ_id;
			
@@ -3496,9 +3502,7 @@ typedef struct {
 
				                              * and try a new circuit if the stream has been
			
 
				                              * waiting for this many seconds. If zero, use
			
 
				                              * our default internal timeout schedule. */
			
 
				-  int MaxOnionsPending; /**< How many circuit CREATE requests do we allow
			
 
				-                         * to wait simultaneously before we start dropping
			
 
				-                         * them? */
			
 
				+  int MaxOnionQueueDelay; /**<DOCDOC*/
			
 
				   int NewCircuitPeriod; /**< How long do we use a circuit before building
			
 
				                          * a new one? */
			
 
				   int MaxCircuitDirtiness; /**< Never use circs that were first used more than