Browse Source

Merge branch 'ntor-resquashed'

Conflicts:
	src/or/cpuworker.c
	src/or/or.h
	src/test/bench.c
Nick Mathewson 11 years ago
parent
commit
b1bdecd703
59 changed files with 5876 additions and 601 deletions
  1. 6 1
      .gitignore
  2. 40 0
      changes/ntor
  3. 99 0
      configure.ac
  4. 10 0
      doc/tor.1.txt
  5. 124 39
      src/common/crypto.c
  6. 11 2
      src/common/crypto.h
  7. 180 0
      src/common/crypto_curve25519.c
  8. 61 0
      src/common/crypto_curve25519.h
  9. 89 0
      src/common/di_ops.c
  10. 16 0
      src/common/di_ops.h
  11. 25 1
      src/common/include.am
  12. 4 1
      src/ext/README
  13. 44 0
      src/ext/curve25519_donna/README
  14. 449 0
      src/ext/curve25519_donna/curve25519-donna-c64.c
  15. 730 0
      src/ext/curve25519_donna/curve25519-donna.c
  16. 2 0
      src/or/channeltls.c
  17. 211 119
      src/or/circuitbuild.c
  18. 10 6
      src/or/circuitbuild.h
  19. 8 5
      src/or/circuitlist.c
  20. 2 2
      src/or/circuituse.c
  21. 59 16
      src/or/command.c
  22. 1 0
      src/or/config.c
  23. 119 86
      src/or/cpuworker.c
  24. 2 1
      src/or/cpuworker.h
  25. 2 1
      src/or/dirserv.c
  26. 9 0
      src/or/dirvote.c
  27. 5 1
      src/or/dirvote.h
  28. 1 1
      src/or/entrynodes.c
  29. 14 1
      src/or/include.am
  30. 1 0
      src/or/microdesc.c
  31. 12 0
      src/or/nodelist.c
  32. 1 0
      src/or/nodelist.h
  33. 795 237
      src/or/onion.c
  34. 94 24
      src/or/onion.h
  35. 123 0
      src/or/onion_fast.c
  36. 38 0
      src/or/onion_fast.h
  37. 295 0
      src/or/onion_ntor.c
  38. 63 0
      src/or/onion_ntor.h
  39. 218 0
      src/or/onion_tap.c
  40. 37 0
      src/or/onion_tap.h
  41. 46 18
      src/or/or.h
  42. 21 7
      src/or/relay.c
  43. 7 7
      src/or/rendclient.c
  44. 2 2
      src/or/rendmid.c
  45. 7 7
      src/or/rendservice.c
  46. 219 5
      src/or/router.c
  47. 6 0
      src/or/router.h
  48. 1 0
      src/or/routerlist.c
  49. 36 0
      src/or/routerparse.c
  50. 142 1
      src/test/bench.c
  51. 17 2
      src/test/include.am
  52. 387 0
      src/test/ntor_ref.py
  53. 69 8
      src/test/test.c
  54. 502 0
      src/test/test_cell_formats.c
  55. 45 0
      src/test/test_containers.c
  56. 181 0
      src/test/test_crypto.c
  57. 166 0
      src/test/test_ntor_cl.c
  58. 10 0
      src/test/test_util.c
  59. 2 0
      src/tools/include.am

+ 6 - 1
.gitignore

@@ -24,6 +24,8 @@
 .dirstamp
 # Stuff made by our makefiles
 *.bak
+# Python droppings
+*.pyc
 
 # /
 /Makefile
@@ -130,6 +132,8 @@
 /src/common/libor-crypto.lib
 /src/common/libor-event.a
 /src/common/libor-event.lib
+/src/common/libcurve25519_donna.a
+/src/common/libcurve25519_donna.lib
 
 # /src/config/
 /src/config/Makefile
@@ -154,9 +158,10 @@
 /src/test/bench.exe
 /src/test/test
 /src/test/test-child
+/src/test/test-ntor-cl
 /src/test/test.exe
 /src/test/test-child.exe
-
+/src/test/test-ntor-cl.exe
 
 # /src/tools/
 /src/tools/tor-checkkey

+ 40 - 0
changes/ntor

@@ -0,0 +1,40 @@
+  o Major features:
+
+    - Tor now supports a new circuit extension handshake designed by Ian
+      Goldberg, Douglas Stebila, and Berkant Ustaoglu. Our original
+      circuit extension handshake, later called "TAP", was a bit slow
+      (especially on the server side), had a fragile security proof, and
+      used weaker keys than we'd now prefer. The new circuit handshake
+      uses Dan Bernstein's "curve25519" elliptic-curve Diffie-Hellman
+      function, making it significantly more secure than the older
+      handshake, and significantly faster. Tor can either use one of two
+      built-in pure-C curve25519-donna implementations by Adam Langley,
+      or link against the "nacl" library for a tuned version if present.
+
+      The built-in version is very fast for 64-bit systems building with
+      GCC. (About 10-14x faster on the server side, and about 7x faster
+      on the client side.) The built-in 32-bit version is still faster
+      than the old TAP protocol (about 3x), but using libnacl would be
+      better on most 32-bit x86 hosts.
+
+      Clients don't currently use this protocol by default, since
+      comparatively few clients support it so far. To try it, set
+      UseNTorHandshake to 1.
+
+      Implements proposal 216; closes ticket #7202.
+
+    - Tor servers and clients now support a better CREATE/EXTEND cell
+      format, allowing the sender to specify multiple address, identity,
+      and handshake types.  Implements Robert Ransom's proposal 200;
+      closes ticket #7199.
+
+  o Code simplification and refactoring:
+    - Split the onion.c file into separate modules for the onion queue
+      and the different handshakes it supports.
+    - Remove the marshalling/unmarshalling code for sending requests to
+      cpuworkers over a socket, and instead just send structs.  The
+      recipient will always be the same Tor binary as the sender, so
+      any encoding is overkill.
+
+  o Testing:
+    - Add benchmark functions to test onion handshake performance.

+ 99 - 0
configure.ac

@@ -36,6 +36,8 @@ AC_ARG_ENABLE(static-zlib,
    AS_HELP_STRING(--enable-static-zlib, Link against a static zlib library. Requires --with-zlib-dir))
 AC_ARG_ENABLE(static-tor,
    AS_HELP_STRING(--enable-static-tor, Create an entirely static Tor binary. Requires --with-openssl-dir and --with-libevent-dir and --with-zlib-dir))
+AC_ARG_ENABLE(curve25519,
+   AS_HELP_STRING(--disable-curve25519, Build Tor with no curve25519 elliptic-curve crypto support))
 
 if test "$enable_static_tor" = "yes"; then
   enable_static_libevent="yes";
@@ -639,6 +641,103 @@ if test "$upnp" = "true"; then
     fi
 fi
 
+dnl ============================================================
+dnl We need an implementation of curve25519.
+
+dnl set these defaults.
+have_a_curve25519=no
+build_curve25519_donna=no
+build_curve25519_donna_c64=no
+use_curve25519_donna=no
+use_curve25519_nacl=no
+CURVE25519_LIBS=
+
+if test x$enable_curve25519 != xno; then
+
+  dnl The best choice is using curve25519-donna-c64, but that requires
+  dnl that we
+  AC_CACHE_CHECK([whether we can use curve25519-donna-c64],
+    tor_cv_can_use_curve25519_donna_c64,
+    [AC_RUN_IFELSE(
+      [AC_LANG_PROGRAM([dnl
+        #include <stdint.h>
+        typedef unsigned uint128_t __attribute__((mode(TI)));
+	], [dnl
+          uint64_t a = ((uint64_t)2000000000) * 1000000000;
+	  uint64_t b = ((uint64_t)1234567890) << 24;
+	  uint128_t c = ((uint128_t)a) * b;
+	  return ((uint64_t)(c>>96)) == 522859 &&
+	         ((uint64_t)(c>>64))&0xffffffffL == 3604448702L &&
+                 ((uint64_t)(c>>32))&0xffffffffL == 2351960064L &&
+                 ((uint64_t)(c))&0xffffffffL == 0;
+        ])],
+	[tor_cv_can_use_curve25519_donna_c64=yes],
+        [tor_cv_can_use_curve25519_donna_c64=no],
+	[AC_COMPILE_IFELSE(
+          [AC_LANG_PROGRAM([dnl
+            #include <stdint.h>
+            typedef unsigned uint128_t __attribute__((mode(TI)));
+            ], [dnl
+              uint64_t a = ((uint64_t)2000000000) * 1000000000;
+	      uint64_t b = ((uint64_t)1234567890) << 24;
+	      uint128_t c = ((uint128_t)a) * b;
+	      return ((uint64_t)(c>>96)) == 522859 &&
+	             ((uint64_t)(c>>64))&0xffffffffL == 3604448702L &&
+                     ((uint64_t)(c>>32))&0xffffffffL == 2351960064L &&
+                     ((uint64_t)(c))&0xffffffffL == 0;
+            ])],
+            [tor_cv_can_use_curve25519_donna_c64=cross],
+	    [tor_cv_can_use_curve25519_donna_c64=no])])])
+
+  AC_CACHE_CHECK([whether we can use curve25519 from nacl],
+    tor_cv_can_use_curve25519_nacl,
+    [tor_saved_LIBS="$LIBS"
+     LIBS="$LIBS -lnacl"
+     AC_LINK_IFELSE(
+       [AC_LANG_PROGRAM([dnl
+         #include <crypto_scalarmult_curve25519.h>
+         #ifdef crypto_scalarmult_curve25519_ref_BYTES
+	 #error Hey, this is the reference implementation!
+	 #endif
+       ], [
+	 unsigned char *a, *b, *c; crypto_scalarmult_curve25519(a,b,c);
+       ])], [tor_cv_can_use_curve25519_nacl=yes],
+       [tor_cv_can_use_curve25519_nacl=no])
+     LIBS="$tor_saved_LIBS" ])
+
+   dnl Okay, now we need to figure out which one to actually use. Fall back
+   dnl to curve25519-donna.c
+
+   if test x$tor_cv_can_use_curve25519_donna_c64 != xno; then
+     build_curve25519_donna_c64=yes
+     use_curve25519_donna=yes
+   elif test x$tor_cv_can_use_curve25519_nacl = xyes; then
+     use_curve25519_nacl=yes
+     CURVE25519_LIBS=-lnacl
+   else
+     build_curve25519_donna=yes
+     use_curve25519_donna=yes
+   fi
+   have_a_curve25519=yes
+fi
+
+if test x$have_a_curve25519 = xyes; then
+  AC_DEFINE(CURVE25519_ENABLED, 1,
+            [Defined if we have a curve25519 implementation])
+fi
+if test x$use_curve25519_donna = xyes; then
+  AC_DEFINE(USE_CURVE25519_DONNA, 1,
+            [Defined if we should use an internal curve25519_donna{,_c64} implementation])
+fi
+if test x$use_curve25519_nacl = xyes; then
+  AC_DEFINE(USE_CURVE25519_NACL, 1,
+            [Defined if we should use a curve25519 from nacl])
+fi
+AM_CONDITIONAL(BUILD_CURVE25519_DONNA, test x$build_curve25519_donna = xyes)
+AM_CONDITIONAL(BUILD_CURVE25519_DONNA_C64, test x$build_curve25519_donna_c64 = xyes)
+AM_CONDITIONAL(CURVE25519_ENABLED, test x$have_a_curve25519 = xyes)
+AC_SUBST(CURVE25519_LIBS)
+
 dnl Make sure to enable support for large off_t if available.
 AC_SYS_LARGEFILE
 

+ 10 - 0
doc/tor.1.txt

@@ -1218,6 +1218,16 @@ The following options are useful only for clients (that is, if
     "auto" (recommended) then it is on for all clients that do not set
     FetchUselessDescriptors. (Default: auto)
 
+**UseNTorHandshake** **0**|**1**|**auto**::
+    The "ntor" circuit-creation handshake is faster and (we think) more
+    secure than the original ("TAP") circuit handshake, but starting to use
+    it too early might make your client stand out. If this option is 0, your
+    Tor client won't use the ntor handshake. If it's 1, your Tor client
+    will use the ntor handshake to extend circuits through servers that
+    support it. If this option is "auto" (recommended), then your client
+    will use the ntor handshake once enough directory authorities recommend
+    it. (Default: auto)
+
 **PathBiasCircThreshold** __NUM__ +
 
 **PathBiasNoticeRate** __NUM__ +

+ 124 - 39
src/common/crypto.c

@@ -2036,6 +2036,16 @@ crypto_dh_new(int dh_type)
   return NULL;
 }
 
+/** Return a copy of <b>dh</b>, sharing its internal state. */
+crypto_dh_t *
+crypto_dh_dup(const crypto_dh_t *dh)
+{
+  crypto_dh_t *dh_new = tor_malloc_zero(sizeof(crypto_dh_t));
+  dh_new->dh = dh->dh;
+  DH_up_ref(dh->dh);
+  return dh_new;
+}
+
 /** Return the length of the DH key in <b>dh</b>, in bytes.
  */
 int
@@ -2174,8 +2184,8 @@ crypto_dh_compute_secret(int severity, crypto_dh_t *dh,
     goto error;
   }
   secret_len = result;
-  if (crypto_expand_key_material(secret_tmp, secret_len,
-                                 secret_out, secret_bytes_out)<0)
+  if (crypto_expand_key_material_TAP((uint8_t*)secret_tmp, secret_len,
+                                     (uint8_t*)secret_out, secret_bytes_out)<0)
     goto error;
   secret_len = secret_bytes_out;
 
@@ -2201,15 +2211,18 @@ crypto_dh_compute_secret(int severity, crypto_dh_t *dh,
  * <b>key_out</b> by taking the first <b>key_out_len</b> bytes of
  *    H(K | [00]) | H(K | [01]) | ....
  *
+ * This is the key expansion algorithm used in the "TAP" circuit extension
+ * mechanism; it shouldn't be used for new protocols.
+ *
  * Return 0 on success, -1 on failure.
  */
 int
-crypto_expand_key_material(const char *key_in, size_t key_in_len,
-                           char *key_out, size_t key_out_len)
+crypto_expand_key_material_TAP(const uint8_t *key_in, size_t key_in_len,
+                               uint8_t *key_out, size_t key_out_len)
 {
   int i;
-  char *cp, *tmp = tor_malloc(key_in_len+1);
-  char digest[DIGEST_LEN];
+  uint8_t *cp, *tmp = tor_malloc(key_in_len+1);
+  uint8_t digest[DIGEST_LEN];
 
   /* If we try to get more than this amount of key data, we'll repeat blocks.*/
   tor_assert(key_out_len <= DIGEST_LEN*256);
@@ -2218,7 +2231,7 @@ crypto_expand_key_material(const char *key_in, size_t key_in_len,
   for (cp = key_out, i=0; cp < key_out+key_out_len;
        ++i, cp += DIGEST_LEN) {
     tmp[key_in_len] = i;
-    if (crypto_digest(digest, tmp, key_in_len+1))
+    if (crypto_digest((char*)digest, (const char *)tmp, key_in_len+1))
       goto err;
     memcpy(cp, digest, MIN(DIGEST_LEN, key_out_len-(cp-key_out)));
   }
@@ -2234,6 +2247,65 @@ crypto_expand_key_material(const char *key_in, size_t key_in_len,
   return -1;
 }
 
+/** Expand some secret key material according to RFC5869, using SHA256 as the
+ * underlying hash.  The <b>key_in_len</b> bytes at <b>key_in</b> are the
+ * secret key material; the <b>salt_in_len</b> bytes at <b>salt_in</b> and the
+ * <b>info_in_len</b> bytes in <b>info_in_len</b> are the algorithm's "salt"
+ * and "info" parameters respectively.  On success, write <b>key_out_len</b>
+ * bytes to <b>key_out</b> and return 0.  On failure, return -1.
+ */
+int
+crypto_expand_key_material_rfc5869_sha256(
+                                    const uint8_t *key_in, size_t key_in_len,
+                                    const uint8_t *salt_in, size_t salt_in_len,
+                                    const uint8_t *info_in, size_t info_in_len,
+                                    uint8_t *key_out, size_t key_out_len)
+{
+  uint8_t prk[DIGEST256_LEN];
+  uint8_t tmp[DIGEST256_LEN + 128 + 1];
+  uint8_t mac[DIGEST256_LEN];
+  int i;
+  uint8_t *outp;
+  size_t tmp_len;
+
+  crypto_hmac_sha256((char*)prk,
+                     (const char*)salt_in, salt_in_len,
+                     (const char*)key_in, key_in_len);
+
+  /* If we try to get more than this amount of key data, we'll repeat blocks.*/
+  tor_assert(key_out_len <= DIGEST256_LEN * 256);
+  tor_assert(info_in_len <= 128);
+  memset(tmp, 0, sizeof(tmp));
+  outp = key_out;
+  i = 1;
+
+  while (key_out_len) {
+    size_t n;
+    if (i > 1) {
+      memcpy(tmp, mac, DIGEST256_LEN);
+      memcpy(tmp+DIGEST256_LEN, info_in, info_in_len);
+      tmp[DIGEST256_LEN+info_in_len] = i;
+      tmp_len = DIGEST256_LEN + info_in_len + 1;
+    } else {
+      memcpy(tmp, info_in, info_in_len);
+      tmp[info_in_len] = i;
+      tmp_len = info_in_len + 1;
+    }
+    crypto_hmac_sha256((char*)mac,
+                       (const char*)prk, DIGEST256_LEN,
+                       (const char*)tmp, tmp_len);
+    n = key_out_len < DIGEST256_LEN ? key_out_len : DIGEST256_LEN;
+    memcpy(outp, mac, n);
+    key_out_len -= n;
+    outp += n;
+    ++i;
+  }
+
+  memwipe(tmp, 0, sizeof(tmp));
+  memwipe(mac, 0, sizeof(mac));
+  return 0;
+}
+
 /** Free a DH key exchange object.
  */
 void
@@ -2272,22 +2344,16 @@ seed_weak_rng(void)
   tor_init_weak_random(seed);
 }
 
-/** Seed OpenSSL's random number generator with bytes from the operating
- * system.  <b>startup</b> should be true iff we have just started Tor and
- * have not yet allocated a bunch of fds.  Return 0 on success, -1 on failure.
+/** Try to get <b>out_len</b> bytes of the strongest entropy we can generate,
+ * storing it into <b>out</b>.
  */
 int
-crypto_seed_rng(int startup)
+crypto_strongest_rand(uint8_t *out, size_t out_len)
 {
-  int rand_poll_status = 0;
-
-  /* local variables */
 #ifdef _WIN32
-  unsigned char buf[ADD_ENTROPY];
   static int provider_set = 0;
   static HCRYPTPROV provider;
 #else
-  char buf[ADD_ENTROPY];
   static const char *filenames[] = {
     "/dev/srandom", "/dev/urandom", "/dev/random", NULL
   };
@@ -2295,58 +2361,77 @@ crypto_seed_rng(int startup)
   size_t n;
 #endif
 
-  /* OpenSSL has a RAND_poll function that knows about more kinds of
-   * entropy than we do.  We'll try calling that, *and* calling our own entropy
-   * functions.  If one succeeds, we'll accept the RNG as seeded. */
-  if (startup || RAND_POLL_IS_SAFE) {
-    rand_poll_status = RAND_poll();
-    if (rand_poll_status == 0)
-      log_warn(LD_CRYPTO, "RAND_poll() failed.");
-  }
-
 #ifdef _WIN32
   if (!provider_set) {
     if (!CryptAcquireContext(&provider, NULL, NULL, PROV_RSA_FULL,
                              CRYPT_VERIFYCONTEXT)) {
       if ((unsigned long)GetLastError() != (unsigned long)NTE_BAD_KEYSET) {
         log_warn(LD_CRYPTO, "Can't get CryptoAPI provider [1]");
-        return rand_poll_status ? 0 : -1;
+        return -1;
       }
     }
     provider_set = 1;
   }
-  if (!CryptGenRandom(provider, sizeof(buf), buf)) {
+  if (!CryptGenRandom(provider, out_len, out)) {
     log_warn(LD_CRYPTO, "Can't get entropy from CryptoAPI.");
-    return rand_poll_status ? 0 : -1;
+    return -1;
   }
-  RAND_seed(buf, sizeof(buf));
-  memwipe(buf, 0, sizeof(buf));
-  seed_weak_rng();
+
   return 0;
 #else
   for (i = 0; filenames[i]; ++i) {
     fd = open(filenames[i], O_RDONLY, 0);
     if (fd<0) continue;
-    log_info(LD_CRYPTO, "Seeding RNG from \"%s\"", filenames[i]);
-    n = read_all(fd, buf, sizeof(buf), 0);
+    log_info(LD_CRYPTO, "Reading entropy from \"%s\"", filenames[i]);
+    n = read_all(fd, (char*)out, out_len, 0);
     close(fd);
-    if (n != sizeof(buf)) {
+    if (n != out_len) {
       log_warn(LD_CRYPTO,
                "Error reading from entropy source (read only %lu bytes).",
                (unsigned long)n);
       return -1;
     }
-    RAND_seed(buf, (int)sizeof(buf));
-    memwipe(buf, 0, sizeof(buf));
-    seed_weak_rng();
+
     return 0;
   }
 
-  log_warn(LD_CRYPTO, "Cannot seed RNG -- no entropy source found.");
-  return rand_poll_status ? 0 : -1;
+  log_warn(LD_CRYPTO, "Cannot get strong entropy: no entropy source found.");
+  return -1;
 #endif
 }
 
+/** Seed OpenSSL's random number generator with bytes from the operating
+ * system.  <b>startup</b> should be true iff we have just started Tor and
+ * have not yet allocated a bunch of fds.  Return 0 on success, -1 on failure.
+ */
+int
+crypto_seed_rng(int startup)
+{
+  int rand_poll_ok = 0, load_entropy_ok = 0;
+  uint8_t buf[ADD_ENTROPY];
+
+  /* OpenSSL has a RAND_poll function that knows about more kinds of
+   * entropy than we do.  We'll try calling that, *and* calling our own entropy
+   * functions.  If one succeeds, we'll accept the RNG as seeded. */
+  if (startup || RAND_POLL_IS_SAFE) {
+    rand_poll_ok = RAND_poll();
+    if (rand_poll_ok == 0)
+      log_warn(LD_CRYPTO, "RAND_poll() failed.");
+  }
+
+  load_entropy_ok = !crypto_strongest_rand(buf, sizeof(buf));
+  if (load_entropy_ok) {
+    RAND_seed(buf, sizeof(buf));
+  }
+
+  memwipe(buf, 0, sizeof(buf));
+  seed_weak_rng();
+  if (rand_poll_ok || load_entropy_ok)
+    return 0;
+  else
+    return -1;
+}
+
 /** Write <b>n</b> bytes of strong random data to <b>to</b>. Return 0 on
  * success, -1 on failure.
  */

+ 11 - 2
src/common/crypto.h

@@ -230,6 +230,7 @@ void crypto_hmac_sha256(char *hmac_out,
 #define DH_TYPE_REND 2
 #define DH_TYPE_TLS 3
 crypto_dh_t *crypto_dh_new(int dh_type);
+crypto_dh_t *crypto_dh_dup(const crypto_dh_t *dh);
 int crypto_dh_get_bytes(crypto_dh_t *dh);
 int crypto_dh_generate_public(crypto_dh_t *dh);
 int crypto_dh_get_public(crypto_dh_t *dh, char *pubkey_out,
@@ -238,12 +239,20 @@ ssize_t crypto_dh_compute_secret(int severity, crypto_dh_t *dh,
                              const char *pubkey, size_t pubkey_len,
                              char *secret_out, size_t secret_out_len);
 void crypto_dh_free(crypto_dh_t *dh);
-int crypto_expand_key_material(const char *key_in, size_t in_len,
-                               char *key_out, size_t key_out_len);
+
+int crypto_expand_key_material_TAP(const uint8_t *key_in,
+                                   size_t key_in_len,
+                                   uint8_t *key_out, size_t key_out_len);
+int crypto_expand_key_material_rfc5869_sha256(
+                                    const uint8_t *key_in, size_t key_in_len,
+                                    const uint8_t *salt_in, size_t salt_in_len,
+                                    const uint8_t *info_in, size_t info_in_len,
+                                    uint8_t *key_out, size_t key_out_len);
 
 /* random numbers */
 int crypto_seed_rng(int startup);
 int crypto_rand(char *to, size_t n);
+int crypto_strongest_rand(uint8_t *out, size_t out_len);
 int crypto_rand_int(unsigned int max);
 uint64_t crypto_rand_uint64(uint64_t max);
 double crypto_rand_double(void);

+ 180 - 0
src/common/crypto_curve25519.c

@@ -0,0 +1,180 @@
+/* Copyright (c) 2012, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/* Wrapper code for a curve25519 implementation. */
+
+#define CRYPTO_CURVE25519_PRIVATE
+#include "orconfig.h"
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#include "crypto.h"
+#include "crypto_curve25519.h"
+#include "util.h"
+#include "torlog.h"
+
+/* ==============================
+   Part 1: wrap a suitable curve25519 implementation as curve25519_impl
+   ============================== */
+
+#ifdef USE_CURVE25519_DONNA
+int curve25519_donna(uint8_t *mypublic,
+                     const uint8_t *secret, const uint8_t *basepoint);
+#endif
+#ifdef USE_CURVE25519_NACL
+#include <crypto_scalarmult_curve25519.h>
+#endif
+
+int
+curve25519_impl(uint8_t *output, const uint8_t *secret,
+                const uint8_t *basepoint)
+{
+#ifdef USE_CURVE25519_DONNA
+  return curve25519_donna(output, secret, basepoint);
+#elif defined(USE_CURVE25519_NACL)
+  return crypto_scalarmult_curve25519(output, secret, basepoint);
+#else
+#error "No implementation of curve25519 is available."
+#endif
+}
+
+/* ==============================
+   Part 2: Wrap curve25519_impl with some convenience types and functions.
+   ============================== */
+
+/**
+ * Return true iff a curve25519_public_key_t seems valid. (It's not necessary
+ * to see if the point is on the curve, since the twist is also secure, but we
+ * do need to make sure that it isn't the point at infinity.) */
+int
+curve25519_public_key_is_ok(const curve25519_public_key_t *key)
+{
+  return !safe_mem_is_zero(key->public_key, CURVE25519_PUBKEY_LEN);
+}
+
+/** Generate a new keypair and return the secret key.  If <b>extra_strong</b>
+ * is true, this key is possibly going to get used more than once, so
+ * use a better-than-usual RNG. Return 0 on success, -1 on failure. */
+int
+curve25519_secret_key_generate(curve25519_secret_key_t *key_out,
+                               int extra_strong)
+{
+  uint8_t k_tmp[CURVE25519_SECKEY_LEN];
+
+  if (crypto_rand((char*)key_out->secret_key, CURVE25519_SECKEY_LEN) < 0)
+    return -1;
+  if (extra_strong && !crypto_strongest_rand(k_tmp, CURVE25519_SECKEY_LEN)) {
+    /* If they asked for extra-strong entropy and we have some, use it as an
+     * HMAC key to improve not-so-good entopy rather than using it directly,
+     * just in case the extra-strong entropy is less amazing than we hoped. */
+    crypto_hmac_sha256((char *)key_out->secret_key,
+                    (const char *)k_tmp, sizeof(k_tmp),
+                    (const char *)key_out->secret_key, CURVE25519_SECKEY_LEN);
+  }
+  memwipe(k_tmp, 0, sizeof(k_tmp));
+  key_out->secret_key[0] &= 248;
+  key_out->secret_key[31] &= 127;
+  key_out->secret_key[31] |= 64;
+
+  return 0;
+}
+
+void
+curve25519_public_key_generate(curve25519_public_key_t *key_out,
+                               const curve25519_secret_key_t *seckey)
+{
+  static const uint8_t basepoint[32] = {9};
+
+  curve25519_impl(key_out->public_key, seckey->secret_key, basepoint);
+}
+
+int
+curve25519_keypair_generate(curve25519_keypair_t *keypair_out,
+                            int extra_strong)
+{
+  if (curve25519_secret_key_generate(&keypair_out->seckey, extra_strong) < 0)
+    return -1;
+  curve25519_public_key_generate(&keypair_out->pubkey, &keypair_out->seckey);
+  return 0;
+}
+
+int
+curve25519_keypair_write_to_file(const curve25519_keypair_t *keypair,
+                                 const char *fname,
+                                 const char *tag)
+{
+  char contents[32 + CURVE25519_SECKEY_LEN + CURVE25519_PUBKEY_LEN];
+  int r;
+
+  memset(contents, 0, sizeof(contents));
+  tor_snprintf(contents, sizeof(contents), "== c25519v1: %s ==", tag);
+  tor_assert(strlen(contents) <= 32);
+  memcpy(contents+32, keypair->seckey.secret_key, CURVE25519_SECKEY_LEN);
+  memcpy(contents+32+CURVE25519_SECKEY_LEN,
+         keypair->pubkey.public_key, CURVE25519_PUBKEY_LEN);
+
+  r = write_bytes_to_file(fname, contents, sizeof(contents), 1);
+
+  memwipe(contents, 0, sizeof(contents));
+  return r;
+}
+
+int
+curve25519_keypair_read_from_file(curve25519_keypair_t *keypair_out,
+                                  char **tag_out,
+                                  const char *fname)
+{
+  char prefix[33];
+  char *content;
+  struct stat st;
+  int r = -1;
+
+  *tag_out = NULL;
+
+  st.st_size = 0;
+  content = read_file_to_str(fname, RFTS_BIN|RFTS_IGNORE_MISSING, &st);
+  if (! content)
+    goto end;
+  if (st.st_size != 32 + CURVE25519_SECKEY_LEN + CURVE25519_PUBKEY_LEN)
+    goto end;
+
+  memcpy(prefix, content, 32);
+  prefix[32] = '\0';
+  if (strcmpstart(prefix, "== c25519v1: ") ||
+      strcmpend(prefix, " =="))
+    goto end;
+
+  *tag_out = tor_strndup(prefix+strlen("== c25519v1: "),
+                         strlen(prefix) - strlen("== c25519v1:  =="));
+
+  memcpy(keypair_out->seckey.secret_key, content+32, CURVE25519_SECKEY_LEN);
+  curve25519_public_key_generate(&keypair_out->pubkey, &keypair_out->seckey);
+  if (tor_memneq(keypair_out->pubkey.public_key,
+                 content + 32 + CURVE25519_SECKEY_LEN,
+                 CURVE25519_PUBKEY_LEN))
+    goto end;
+
+  r = 0;
+
+ end:
+  if (content) {
+    memwipe(content, 0, st.st_size);
+    tor_free(content);
+  }
+  if (r != 0) {
+    memset(keypair_out, 0, sizeof(*keypair_out));
+    tor_free(*tag_out);
+  }
+  return r;
+}
+
+/** Perform the curve25519 ECDH handshake with <b>skey</b> and <b>pkey</b>,
+ * writing CURVE25519_OUTPUT_LEN bytes of output into <b>output</b>. */
+void
+curve25519_handshake(uint8_t *output,
+                     const curve25519_secret_key_t *skey,
+                     const curve25519_public_key_t *pkey)
+{
+  curve25519_impl(output, skey->secret_key, pkey->public_key);
+}
+

+ 61 - 0
src/common/crypto_curve25519.h

@@ -0,0 +1,61 @@
+/* Copyright (c) 2012, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#ifndef TOR_CRYPTO_CURVE25519_H
+#define TOR_CRYPTO_CURVE25519_H
+
+#include "torint.h"
+
+/** Length of a curve25519 public key when encoded. */
+#define CURVE25519_PUBKEY_LEN 32
+/** Length of a curve25519 secret key when encoded. */
+#define CURVE25519_SECKEY_LEN 32
+/** Length of the result of a curve25519 handshake. */
+#define CURVE25519_OUTPUT_LEN 32
+
+/** Wrapper type for a curve25519 public key */
+typedef struct curve25519_public_key_t {
+  uint8_t public_key[CURVE25519_PUBKEY_LEN];
+} curve25519_public_key_t;
+
+/** Wrapper type for a curve25519 secret key */
+typedef struct curve25519_secret_key_t {
+  uint8_t secret_key[CURVE25519_SECKEY_LEN];
+} curve25519_secret_key_t;
+
+/** A paired public and private key for curve25519. **/
+typedef struct curve25519_keypair_t {
+  curve25519_public_key_t pubkey;
+  curve25519_secret_key_t seckey;
+} curve25519_keypair_t;
+
+#ifdef CURVE25519_ENABLED
+int curve25519_public_key_is_ok(const curve25519_public_key_t *);
+
+int curve25519_secret_key_generate(curve25519_secret_key_t *key_out,
+                                   int extra_strong);
+void curve25519_public_key_generate(curve25519_public_key_t *key_out,
+                                    const curve25519_secret_key_t *seckey);
+int curve25519_keypair_generate(curve25519_keypair_t *keypair_out,
+                                int extra_strong);
+
+void curve25519_handshake(uint8_t *output,
+                          const curve25519_secret_key_t *,
+                          const curve25519_public_key_t *);
+
+int curve25519_keypair_write_to_file(const curve25519_keypair_t *keypair,
+                                     const char *fname,
+                                     const char *tag);
+
+int curve25519_keypair_read_from_file(curve25519_keypair_t *keypair_out,
+                                      char **tag_out,
+                                      const char *fname);
+
+#ifdef CRYPTO_CURVE25519_PRIVATE
+int curve25519_impl(uint8_t *output, const uint8_t *secret,
+                    const uint8_t *basepoint);
+#endif
+#endif
+
+#endif
+

+ 89 - 0
src/common/di_ops.c

@@ -8,6 +8,8 @@
 
 #include "orconfig.h"
 #include "di_ops.h"
+#include "torlog.h"
+#include "util.h"
 
 /**
  * Timing-safe version of memcmp.  As memcmp, compare the <b>sz</b> bytes at
@@ -131,3 +133,90 @@ tor_memeq(const void *a, const void *b, size_t sz)
   return 1 & ((any_difference - 1) >> 8);
 }
 
+/* Implement di_digest256_map_t as a linked list of entries. */
+struct di_digest256_map_t {
+  struct di_digest256_map_t *next;
+  uint8_t key[32];
+  void *val;
+};
+
+/** Release all storage held in <b>map</b>, calling free_fn on each value
+ * as we go. */
+void
+dimap_free(di_digest256_map_t *map, dimap_free_fn free_fn)
+{
+  while (map) {
+    di_digest256_map_t *victim = map;
+    map = map->next;
+    if (free_fn)
+      free_fn(victim->val);
+    tor_free(victim);
+  }
+}
+
+/** Adjust the map at *<b>map</b>, adding an entry for <b>key</b> ->
+ * <b>val</b>, where <b>key</b> is a DIGEST256_LEN-byte key.
+ *
+ * The caller MUST NOT add a key that already appears in the map.
+ */
+void
+dimap_add_entry(di_digest256_map_t **map,
+                const uint8_t *key, void *val)
+{
+  di_digest256_map_t *new_ent;
+  {
+    void *old_val = dimap_search(*map, key, NULL);
+    tor_assert(! old_val);
+    tor_assert(val);
+  }
+  new_ent = tor_malloc_zero(sizeof(di_digest256_map_t));
+  new_ent->next = *map;
+  memcpy(new_ent->key, key, 32);
+  new_ent->val = val;
+  *map = new_ent;
+}
+
+/** Search the map at <b>map</b> for an entry whose key is <b>key</b> (a
+ * DIGEST256_LEN-byte key) returning the corresponding value if we found one,
+ * and returning <b>dflt_val</b> if the key wasn't found.
+ *
+ * This operation takes an amount of time dependent only on the length of
+ * <b>map</b>, not on the position or presence of <b>key</b> within <b>map</b>.
+ */
+void *
+dimap_search(const di_digest256_map_t *map, const uint8_t *key,
+             void *dflt_val)
+{
+  uintptr_t result = (uintptr_t)dflt_val;
+
+  while (map) {
+    uintptr_t r = (uintptr_t) tor_memeq(map->key, key, 32);
+    r -= 1; /* Now r is (uintptr_t)-1 if memeq returned false, and
+             * 0 if memeq returned true. */
+
+    result &= r;
+    result |= ((uintptr_t)(map->val)) & ~r;
+
+    map = map->next;
+  }
+
+  return (void *)result;
+}
+
+/**
+ * Return true iff the <b>sz</b> bytes at <b>mem</b> are all zero. Runs in
+ * time independent of the contents of <b>mem</b>.
+ */
+int
+safe_mem_is_zero(const void *mem, size_t sz)
+{
+  uint32_t total = 0;
+  const uint8_t *ptr = mem;
+
+  while (sz--) {
+    total |= *ptr++;
+  }
+
+  return 1 & ((total - 1) >> 8);
+}
+

+ 16 - 0
src/common/di_ops.h

@@ -27,5 +27,21 @@ int tor_memeq(const void *a, const void *b, size_t sz);
 #define fast_memeq(a,b,c)  (0==memcmp((a),(b),(c)))
 #define fast_memneq(a,b,c) (0!=memcmp((a),(b),(c)))
 
+int safe_mem_is_zero(const void *mem, size_t sz);
+
+/** A type for a map from DIGEST256_LEN-byte blobs to void*, such that
+ * data lookups take an amount of time proportional only to the size
+ * of the map, and not to the position or presence of the item in the map.
+ *
+ * Not efficient for large maps! */
+typedef struct di_digest256_map_t di_digest256_map_t;
+typedef void (*dimap_free_fn)(void *);
+
+void dimap_free(di_digest256_map_t *map, dimap_free_fn free_fn);
+void dimap_add_entry(di_digest256_map_t **map,
+                     const uint8_t *key, void *val);
+void *dimap_search(const di_digest256_map_t *map, const uint8_t *key,
+                   void *dflt_val);
+
 #endif
 

+ 25 - 1
src/common/include.am

@@ -14,6 +14,28 @@ else
 libor_extra_source=
 endif
 
+if BUILD_CURVE25519_DONNA
+src_common_libcurve25519_donna_a_SOURCES=\
+	src/ext/curve25519_donna/curve25519-donna.c
+noinst_LIBRARIES+=src/common/libcurve25519_donna.a
+LIBDONNA=src/common/libcurve25519_donna.a
+else
+if BUILD_CURVE25519_DONNA_C64
+src_common_libcurve25519_donna_a_SOURCES=\
+	src/ext/curve25519_donna/curve25519-donna-c64.c
+noinst_LIBRARIES+=src/common/libcurve25519_donna.a
+LIBDONNA=src/common/libcurve25519_donna.a
+else
+LIBDONNA=
+endif
+endif
+
+src_common_libcurve25519_donna_a_CFLAGS =
+
+if CURVE25519_ENABLED
+libcrypto_extra_source=src/common/crypto_curve25519.c
+endif
+
 src_common_libor_a_SOURCES = \
   src/common/address.c					\
   src/common/compat.c					\
@@ -31,7 +53,8 @@ src_common_libor_crypto_a_SOURCES = \
   src/common/aes.c		\
   src/common/crypto.c		\
   src/common/torgzip.c		\
-  src/common/tortls.c
+  src/common/tortls.c		\
+  $(libcrypto_extra_source)
 
 src_common_libor_event_a_SOURCES = src/common/compat_libevent.c
 
@@ -43,6 +66,7 @@ COMMONHEADERS = \
   src/common/compat_libevent.h			\
   src/common/container.h			\
   src/common/crypto.h				\
+  src/common/crypto_curve25519.h		\
   src/common/di_ops.h				\
   src/common/memarea.h				\
   src/common/mempool.h				\

+ 4 - 1
src/ext/README

@@ -36,4 +36,7 @@ tor_queue.h
     sys/queue.h, and the ones that do have diverged in incompatible
     ways.  (CIRCLEQ or no CIRCLEQ? SIMPLQ or STAILQ?)
 
-    
+curve25519_donna/*.c
+
+    A copy of Adam Langley's curve25519-donna mostly-portable
+    implementations of curve25519.

+ 44 - 0
src/ext/curve25519_donna/README

@@ -0,0 +1,44 @@
+See http://code.google.com/p/curve25519-donna/ for details.
+
+BUILDING:
+
+If you run `make`, two .a archives will be built, similar to djb's curve25519
+code. Alternatively, read on:
+
+The C implementation is contained within curve25519-donna.c. It has no external
+dependancies and is BSD licenced. You can copy/include/link it directly in with
+your program. Recommended C flags: -O2
+
+The x86-64 bit implementation is contained within curve25519-donna-x86-64.c and
+curve25519-donna-x86-64.s. Build like this:
+
+% cpp curve25519-donna-x86-64.s > curve25519-donna-x86-64.s.pp
+% as -o curve25519-donna-x86-64.s.o curve25519-donna-x86-64.s.pp
+% gcc -O2 -c curve25519-donna-x86-64.c
+
+Then the two .o files can be linked in
+
+USAGE:
+
+The usage is exactly the same as djb's code (as described at
+http://cr.yp.to/ecdh.html) expect that the function is called curve25519_donna.
+
+In short,
+
+To generate a private key, generate 32 random bytes and:
+
+  mysecret[0] &= 248;
+  mysecret[31] &= 127;
+  mysecret[31] |= 64;
+
+To generate the public key, just do
+
+  static const uint8_t basepoint[32] = {9};
+  curve25519_donna(mypublic, mysecret, basepoint);
+
+To generate an agreed key do:
+  uint8_t shared_key[32];
+  curve25519_donna(shared_key, mysecret, theirpublic);
+
+And hash the shared_key with a cryptographic hash function before using.
+

+ 449 - 0
src/ext/curve25519_donna/curve25519-donna-c64.c

@@ -0,0 +1,449 @@
+/* Copyright 2008, Google Inc.
+ * All rights reserved.
+ *
+ * Code released into the public domain.
+ *
+ * curve25519-donna: Curve25519 elliptic curve, public key function
+ *
+ * http://code.google.com/p/curve25519-donna/
+ *
+ * Adam Langley <agl@imperialviolet.org>
+ *
+ * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to>
+ *
+ * More information about curve25519 can be found here
+ *   http://cr.yp.to/ecdh.html
+ *
+ * djb's sample implementation of curve25519 is written in a special assembly
+ * language called qhasm and uses the floating point registers.
+ *
+ * This is, almost, a clean room reimplementation from the curve25519 paper. It
+ * uses many of the tricks described therein. Only the crecip function is taken
+ * from the sample implementation.
+ */
+
+#include <string.h>
+#include <stdint.h>
+
+typedef uint8_t u8;
+typedef uint64_t limb;
+typedef limb felem[5];
+// This is a special gcc mode for 128-bit integers. It's implemented on 64-bit
+// platforms only as far as I know.
+typedef unsigned uint128_t __attribute__((mode(TI)));
+
+#undef force_inline
+#define force_inline __attribute__((always_inline))
+
+/* Sum two numbers: output += in */
+static inline void force_inline
+fsum(limb *output, const limb *in) {
+  output[0] += in[0];
+  output[1] += in[1];
+  output[2] += in[2];
+  output[3] += in[3];
+  output[4] += in[4];
+}
+
+/* Find the difference of two numbers: output = in - output
+ * (note the order of the arguments!)
+ *
+ * Assumes that out[i] < 2**52
+ * On return, out[i] < 2**55
+ */
+static inline void force_inline
+fdifference_backwards(felem out, const felem in) {
+  /* 152 is 19 << 3 */
+  static const limb two54m152 = (((limb)1) << 54) - 152;
+  static const limb two54m8 = (((limb)1) << 54) - 8;
+
+  out[0] = in[0] + two54m152 - out[0];
+  out[1] = in[1] + two54m8 - out[1];
+  out[2] = in[2] + two54m8 - out[2];
+  out[3] = in[3] + two54m8 - out[3];
+  out[4] = in[4] + two54m8 - out[4];
+}
+
+/* Multiply a number by a scalar: output = in * scalar */
+static inline void force_inline
+fscalar_product(felem output, const felem in, const limb scalar) {
+  uint128_t a;
+
+  a = ((uint128_t) in[0]) * scalar;
+  output[0] = ((limb)a) & 0x7ffffffffffff;
+
+  a = ((uint128_t) in[1]) * scalar + ((limb) (a >> 51));
+  output[1] = ((limb)a) & 0x7ffffffffffff;
+
+  a = ((uint128_t) in[2]) * scalar + ((limb) (a >> 51));
+  output[2] = ((limb)a) & 0x7ffffffffffff;
+
+  a = ((uint128_t) in[3]) * scalar + ((limb) (a >> 51));
+  output[3] = ((limb)a) & 0x7ffffffffffff;
+
+  a = ((uint128_t) in[4]) * scalar + ((limb) (a >> 51));
+  output[4] = ((limb)a) & 0x7ffffffffffff;
+
+  output[0] += (a >> 51) * 19;
+}
+
+/* Multiply two numbers: output = in2 * in
+ *
+ * output must be distinct to both inputs. The inputs are reduced coefficient
+ * form, the output is not.
+ *
+ * Assumes that in[i] < 2**55 and likewise for in2.
+ * On return, output[i] < 2**52
+ */
+static inline void force_inline
+fmul(felem output, const felem in2, const felem in) {
+  uint128_t t[5];
+  limb r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;
+
+  r0 = in[0];
+  r1 = in[1];
+  r2 = in[2];
+  r3 = in[3];
+  r4 = in[4];
+
+  s0 = in2[0];
+  s1 = in2[1];
+  s2 = in2[2];
+  s3 = in2[3];
+  s4 = in2[4];
+
+  t[0]  =  ((uint128_t) r0) * s0;
+  t[1]  =  ((uint128_t) r0) * s1 + ((uint128_t) r1) * s0;
+  t[2]  =  ((uint128_t) r0) * s2 + ((uint128_t) r2) * s0 + ((uint128_t) r1) * s1;
+  t[3]  =  ((uint128_t) r0) * s3 + ((uint128_t) r3) * s0 + ((uint128_t) r1) * s2 + ((uint128_t) r2) * s1;
+  t[4]  =  ((uint128_t) r0) * s4 + ((uint128_t) r4) * s0 + ((uint128_t) r3) * s1 + ((uint128_t) r1) * s3 + ((uint128_t) r2) * s2;
+
+  r4 *= 19;
+  r1 *= 19;
+  r2 *= 19;
+  r3 *= 19;
+
+  t[0] += ((uint128_t) r4) * s1 + ((uint128_t) r1) * s4 + ((uint128_t) r2) * s3 + ((uint128_t) r3) * s2;
+  t[1] += ((uint128_t) r4) * s2 + ((uint128_t) r2) * s4 + ((uint128_t) r3) * s3;
+  t[2] += ((uint128_t) r4) * s3 + ((uint128_t) r3) * s4;
+  t[3] += ((uint128_t) r4) * s4;
+
+                  r0 = (limb)t[0] & 0x7ffffffffffff; c = (limb)(t[0] >> 51);
+  t[1] += c;      r1 = (limb)t[1] & 0x7ffffffffffff; c = (limb)(t[1] >> 51);
+  t[2] += c;      r2 = (limb)t[2] & 0x7ffffffffffff; c = (limb)(t[2] >> 51);
+  t[3] += c;      r3 = (limb)t[3] & 0x7ffffffffffff; c = (limb)(t[3] >> 51);
+  t[4] += c;      r4 = (limb)t[4] & 0x7ffffffffffff; c = (limb)(t[4] >> 51);
+  r0 +=   c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffff;
+  r1 +=   c;      c = r1 >> 51; r1 = r1 & 0x7ffffffffffff;
+  r2 +=   c;
+
+  output[0] = r0;
+  output[1] = r1;
+  output[2] = r2;
+  output[3] = r3;
+  output[4] = r4;
+}
+
+static inline void force_inline
+fsquare_times(felem output, const felem in, limb count) {
+  uint128_t t[5];
+  limb r0,r1,r2,r3,r4,c;
+  limb d0,d1,d2,d4,d419;
+
+  r0 = in[0];
+  r1 = in[1];
+  r2 = in[2];
+  r3 = in[3];
+  r4 = in[4];
+
+  do {
+    d0 = r0 * 2;
+    d1 = r1 * 2;
+    d2 = r2 * 2 * 19;
+    d419 = r4 * 19;
+    d4 = d419 * 2;
+
+    t[0] = ((uint128_t) r0) * r0 + ((uint128_t) d4) * r1 + (((uint128_t) d2) * (r3     ));
+    t[1] = ((uint128_t) d0) * r1 + ((uint128_t) d4) * r2 + (((uint128_t) r3) * (r3 * 19));
+    t[2] = ((uint128_t) d0) * r2 + ((uint128_t) r1) * r1 + (((uint128_t) d4) * (r3     ));
+    t[3] = ((uint128_t) d0) * r3 + ((uint128_t) d1) * r2 + (((uint128_t) r4) * (d419   ));
+    t[4] = ((uint128_t) d0) * r4 + ((uint128_t) d1) * r3 + (((uint128_t) r2) * (r2     ));
+
+                    r0 = (limb)t[0] & 0x7ffffffffffff; c = (limb)(t[0] >> 51);
+    t[1] += c;      r1 = (limb)t[1] & 0x7ffffffffffff; c = (limb)(t[1] >> 51);
+    t[2] += c;      r2 = (limb)t[2] & 0x7ffffffffffff; c = (limb)(t[2] >> 51);
+    t[3] += c;      r3 = (limb)t[3] & 0x7ffffffffffff; c = (limb)(t[3] >> 51);
+    t[4] += c;      r4 = (limb)t[4] & 0x7ffffffffffff; c = (limb)(t[4] >> 51);
+    r0 +=   c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffff;
+    r1 +=   c;      c = r1 >> 51; r1 = r1 & 0x7ffffffffffff;
+    r2 +=   c;
+  } while(--count);
+
+  output[0] = r0;
+  output[1] = r1;
+  output[2] = r2;
+  output[3] = r3;
+  output[4] = r4;
+}
+
+/* Load a little-endian 64-bit number  */
+static limb
+load_limb(const u8 *in) {
+  return
+    ((limb)in[0]) |
+    (((limb)in[1]) << 8) |
+    (((limb)in[2]) << 16) |
+    (((limb)in[3]) << 24) |
+    (((limb)in[4]) << 32) |
+    (((limb)in[5]) << 40) |
+    (((limb)in[6]) << 48) |
+    (((limb)in[7]) << 56);
+}
+
+static void
+store_limb(u8 *out, limb in) {
+  out[0] = in & 0xff;
+  out[1] = (in >> 8) & 0xff;
+  out[2] = (in >> 16) & 0xff;
+  out[3] = (in >> 24) & 0xff;
+  out[4] = (in >> 32) & 0xff;
+  out[5] = (in >> 40) & 0xff;
+  out[6] = (in >> 48) & 0xff;
+  out[7] = (in >> 56) & 0xff;
+}
+
+/* Take a little-endian, 32-byte number and expand it into polynomial form */
+static void
+fexpand(limb *output, const u8 *in) {
+  output[0] = load_limb(in) & 0x7ffffffffffff;
+  output[1] = (load_limb(in+6) >> 3) & 0x7ffffffffffff;
+  output[2] = (load_limb(in+12) >> 6) & 0x7ffffffffffff;
+  output[3] = (load_limb(in+19) >> 1) & 0x7ffffffffffff;
+  output[4] = (load_limb(in+24) >> 12) & 0x7ffffffffffff;
+}
+
+/* Take a fully reduced polynomial form number and contract it into a
+ * little-endian, 32-byte array
+ */
+static void
+fcontract(u8 *output, const felem input) {
+  uint128_t t[5];
+
+  t[0] = input[0];
+  t[1] = input[1];
+  t[2] = input[2];
+  t[3] = input[3];
+  t[4] = input[4];
+
+  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
+  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
+  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
+  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
+  t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
+
+  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
+  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
+  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
+  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
+  t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
+
+  /* now t is between 0 and 2^255-1, properly carried. */
+  /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
+
+  t[0] += 19;
+
+  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
+  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
+  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
+  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
+  t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
+
+  /* now between 19 and 2^255-1 in both cases, and offset by 19. */
+
+  t[0] += 0x8000000000000 - 19;
+  t[1] += 0x8000000000000 - 1;
+  t[2] += 0x8000000000000 - 1;
+  t[3] += 0x8000000000000 - 1;
+  t[4] += 0x8000000000000 - 1;
+
+  /* now between 2^255 and 2^256-20, and offset by 2^255. */
+
+  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
+  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
+  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
+  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
+  t[4] &= 0x7ffffffffffff;
+
+  store_limb(output,    t[0] | (t[1] << 51));
+  store_limb(output+8,  (t[1] >> 13) | (t[2] << 38));
+  store_limb(output+16, (t[2] >> 26) | (t[3] << 25));
+  store_limb(output+24, (t[3] >> 39) | (t[4] << 12));
+}
+
+/* Input: Q, Q', Q-Q'
+ * Output: 2Q, Q+Q'
+ *
+ *   x2 z3: long form
+ *   x3 z3: long form
+ *   x z: short form, destroyed
+ *   xprime zprime: short form, destroyed
+ *   qmqp: short form, preserved
+ */
+static void
+fmonty(limb *x2, limb *z2, /* output 2Q */
+       limb *x3, limb *z3, /* output Q + Q' */
+       limb *x, limb *z,   /* input Q */
+       limb *xprime, limb *zprime, /* input Q' */
+       const limb *qmqp /* input Q - Q' */) {
+  limb origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5],
+        zzprime[5], zzzprime[5];
+
+  memcpy(origx, x, 5 * sizeof(limb));
+  fsum(x, z);
+  fdifference_backwards(z, origx);  // does x - z
+
+  memcpy(origxprime, xprime, sizeof(limb) * 5);
+  fsum(xprime, zprime);
+  fdifference_backwards(zprime, origxprime);
+  fmul(xxprime, xprime, z);
+  fmul(zzprime, x, zprime);
+  memcpy(origxprime, xxprime, sizeof(limb) * 5);
+  fsum(xxprime, zzprime);
+  fdifference_backwards(zzprime, origxprime);
+  fsquare_times(x3, xxprime, 1);
+  fsquare_times(zzzprime, zzprime, 1);
+  fmul(z3, zzzprime, qmqp);
+
+  fsquare_times(xx, x, 1);
+  fsquare_times(zz, z, 1);
+  fmul(x2, xx, zz);
+  fdifference_backwards(zz, xx);  // does zz = xx - zz
+  fscalar_product(zzz, zz, 121665);
+  fsum(zzz, xx);
+  fmul(z2, zz, zzz);
+}
+
+// -----------------------------------------------------------------------------
+// Maybe swap the contents of two limb arrays (@a and @b), each @len elements
+// long. Perform the swap iff @swap is non-zero.
+//
+// This function performs the swap without leaking any side-channel
+// information.
+// -----------------------------------------------------------------------------
+static void
+swap_conditional(limb a[5], limb b[5], limb iswap) {
+  unsigned i;
+  const limb swap = -iswap;
+
+  for (i = 0; i < 5; ++i) {
+    const limb x = swap & (a[i] ^ b[i]);
+    a[i] ^= x;
+    b[i] ^= x;
+  }
+}
+
+/* Calculates nQ where Q is the x-coordinate of a point on the curve
+ *
+ *   resultx/resultz: the x coordinate of the resulting curve point (short form)
+ *   n: a little endian, 32-byte number
+ *   q: a point of the curve (short form)
+ */
+static void
+cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q) {
+  limb a[5] = {0}, b[5] = {1}, c[5] = {1}, d[5] = {0};
+  limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
+  limb e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1};
+  limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
+
+  unsigned i, j;
+
+  memcpy(nqpqx, q, sizeof(limb) * 5);
+
+  for (i = 0; i < 32; ++i) {
+    u8 byte = n[31 - i];
+    for (j = 0; j < 8; ++j) {
+      const limb bit = byte >> 7;
+
+      swap_conditional(nqx, nqpqx, bit);
+      swap_conditional(nqz, nqpqz, bit);
+      fmonty(nqx2, nqz2,
+             nqpqx2, nqpqz2,
+             nqx, nqz,
+             nqpqx, nqpqz,
+             q);
+      swap_conditional(nqx2, nqpqx2, bit);
+      swap_conditional(nqz2, nqpqz2, bit);
+
+      t = nqx;
+      nqx = nqx2;
+      nqx2 = t;
+      t = nqz;
+      nqz = nqz2;
+      nqz2 = t;
+      t = nqpqx;
+      nqpqx = nqpqx2;
+      nqpqx2 = t;
+      t = nqpqz;
+      nqpqz = nqpqz2;
+      nqpqz2 = t;
+
+      byte <<= 1;
+    }
+  }
+
+  memcpy(resultx, nqx, sizeof(limb) * 5);
+  memcpy(resultz, nqz, sizeof(limb) * 5);
+}
+
+
+// -----------------------------------------------------------------------------
+// Shamelessly copied from djb's code, tightened a little
+// -----------------------------------------------------------------------------
+static void
+crecip(felem out, const felem z) {
+  felem a,t0,b,c;
+
+  /* 2 */ fsquare_times(a, z, 1); // a = 2
+  /* 8 */ fsquare_times(t0, a, 2);
+  /* 9 */ fmul(b, t0, z); // b = 9
+  /* 11 */ fmul(a, b, a); // a = 11
+  /* 22 */ fsquare_times(t0, a, 1);
+  /* 2^5 - 2^0 = 31 */ fmul(b, t0, b);
+  /* 2^10 - 2^5 */ fsquare_times(t0, b, 5);
+  /* 2^10 - 2^0 */ fmul(b, t0, b);
+  /* 2^20 - 2^10 */ fsquare_times(t0, b, 10);
+  /* 2^20 - 2^0 */ fmul(c, t0, b);
+  /* 2^40 - 2^20 */ fsquare_times(t0, c, 20);
+  /* 2^40 - 2^0 */ fmul(t0, t0, c);
+  /* 2^50 - 2^10 */ fsquare_times(t0, t0, 10);
+  /* 2^50 - 2^0 */ fmul(b, t0, b);
+  /* 2^100 - 2^50 */ fsquare_times(t0, b, 50);
+  /* 2^100 - 2^0 */ fmul(c, t0, b);
+  /* 2^200 - 2^100 */ fsquare_times(t0, c, 100);
+  /* 2^200 - 2^0 */ fmul(t0, t0, c);
+  /* 2^250 - 2^50 */ fsquare_times(t0, t0, 50);
+  /* 2^250 - 2^0 */ fmul(t0, t0, b);
+  /* 2^255 - 2^5 */ fsquare_times(t0, t0, 5);
+  /* 2^255 - 21 */ fmul(out, t0, a);
+}
+
+int curve25519_donna(u8 *, const u8 *, const u8 *);
+
+int
+curve25519_donna(u8 *mypublic, const u8 *secret, const u8 *basepoint) {
+  limb bp[5], x[5], z[5], zmone[5];
+  uint8_t e[32];
+  int i;
+
+  for (i = 0;i < 32;++i) e[i] = secret[i];
+  e[0] &= 248;
+  e[31] &= 127;
+  e[31] |= 64;
+
+  fexpand(bp, basepoint);
+  cmult(x, z, e, bp);
+  crecip(zmone, z);
+  fmul(z, x, zmone);
+  fcontract(mypublic, z);
+  return 0;
+}

+ 730 - 0
src/ext/curve25519_donna/curve25519-donna.c

@@ -0,0 +1,730 @@
+/* Copyright 2008, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * curve25519-donna: Curve25519 elliptic curve, public key function
+ *
+ * http://code.google.com/p/curve25519-donna/
+ *
+ * Adam Langley <agl@imperialviolet.org>
+ *
+ * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to>
+ *
+ * More information about curve25519 can be found here
+ *   http://cr.yp.to/ecdh.html
+ *
+ * djb's sample implementation of curve25519 is written in a special assembly
+ * language called qhasm and uses the floating point registers.
+ *
+ * This is, almost, a clean room reimplementation from the curve25519 paper. It
+ * uses many of the tricks described therein. Only the crecip function is taken
+ * from the sample implementation.
+ */
+
+#include <string.h>
+#include <stdint.h>
+
+typedef uint8_t u8;
+typedef int32_t s32;
+typedef int64_t limb;
+
+/* Field element representation:
+ *
+ * Field elements are written as an array of signed, 64-bit limbs, least
+ * significant first. The value of the field element is:
+ *   x[0] + 2^26·x[1] + x^51·x[2] + 2^102·x[3] + ...
+ *
+ * i.e. the limbs are 26, 25, 26, 25, ... bits wide.
+ */
+
+/* Sum two numbers: output += in */
+static void fsum(limb *output, const limb *in) {
+  unsigned i;
+  for (i = 0; i < 10; i += 2) {
+    output[0+i] = (output[0+i] + in[0+i]);
+    output[1+i] = (output[1+i] + in[1+i]);
+  }
+}
+
+/* Find the difference of two numbers: output = in - output
+ * (note the order of the arguments!)
+ */
+static void fdifference(limb *output, const limb *in) {
+  unsigned i;
+  for (i = 0; i < 10; ++i) {
+    output[i] = (in[i] - output[i]);
+  }
+}
+
+/* Multiply a number by a scalar: output = in * scalar */
+static void fscalar_product(limb *output, const limb *in, const limb scalar) {
+  unsigned i;
+  for (i = 0; i < 10; ++i) {
+    output[i] = in[i] * scalar;
+  }
+}
+
+/* Multiply two numbers: output = in2 * in
+ *
+ * output must be distinct to both inputs. The inputs are reduced coefficient
+ * form, the output is not.
+ */
+static void fproduct(limb *output, const limb *in2, const limb *in) {
+  output[0] =       ((limb) ((s32) in2[0])) * ((s32) in[0]);
+  output[1] =       ((limb) ((s32) in2[0])) * ((s32) in[1]) +
+                    ((limb) ((s32) in2[1])) * ((s32) in[0]);
+  output[2] =  2 *  ((limb) ((s32) in2[1])) * ((s32) in[1]) +
+                    ((limb) ((s32) in2[0])) * ((s32) in[2]) +
+                    ((limb) ((s32) in2[2])) * ((s32) in[0]);
+  output[3] =       ((limb) ((s32) in2[1])) * ((s32) in[2]) +
+                    ((limb) ((s32) in2[2])) * ((s32) in[1]) +
+                    ((limb) ((s32) in2[0])) * ((s32) in[3]) +
+                    ((limb) ((s32) in2[3])) * ((s32) in[0]);
+  output[4] =       ((limb) ((s32) in2[2])) * ((s32) in[2]) +
+               2 * (((limb) ((s32) in2[1])) * ((s32) in[3]) +
+                    ((limb) ((s32) in2[3])) * ((s32) in[1])) +
+                    ((limb) ((s32) in2[0])) * ((s32) in[4]) +
+                    ((limb) ((s32) in2[4])) * ((s32) in[0]);
+  output[5] =       ((limb) ((s32) in2[2])) * ((s32) in[3]) +
+                    ((limb) ((s32) in2[3])) * ((s32) in[2]) +
+                    ((limb) ((s32) in2[1])) * ((s32) in[4]) +
+                    ((limb) ((s32) in2[4])) * ((s32) in[1]) +
+                    ((limb) ((s32) in2[0])) * ((s32) in[5]) +
+                    ((limb) ((s32) in2[5])) * ((s32) in[0]);
+  output[6] =  2 * (((limb) ((s32) in2[3])) * ((s32) in[3]) +
+                    ((limb) ((s32) in2[1])) * ((s32) in[5]) +
+                    ((limb) ((s32) in2[5])) * ((s32) in[1])) +
+                    ((limb) ((s32) in2[2])) * ((s32) in[4]) +
+                    ((limb) ((s32) in2[4])) * ((s32) in[2]) +
+                    ((limb) ((s32) in2[0])) * ((s32) in[6]) +
+                    ((limb) ((s32) in2[6])) * ((s32) in[0]);
+  output[7] =       ((limb) ((s32) in2[3])) * ((s32) in[4]) +
+                    ((limb) ((s32) in2[4])) * ((s32) in[3]) +
+                    ((limb) ((s32) in2[2])) * ((s32) in[5]) +
+                    ((limb) ((s32) in2[5])) * ((s32) in[2]) +
+                    ((limb) ((s32) in2[1])) * ((s32) in[6]) +
+                    ((limb) ((s32) in2[6])) * ((s32) in[1]) +
+                    ((limb) ((s32) in2[0])) * ((s32) in[7]) +
+                    ((limb) ((s32) in2[7])) * ((s32) in[0]);
+  output[8] =       ((limb) ((s32) in2[4])) * ((s32) in[4]) +
+               2 * (((limb) ((s32) in2[3])) * ((s32) in[5]) +
+                    ((limb) ((s32) in2[5])) * ((s32) in[3]) +
+                    ((limb) ((s32) in2[1])) * ((s32) in[7]) +
+                    ((limb) ((s32) in2[7])) * ((s32) in[1])) +
+                    ((limb) ((s32) in2[2])) * ((s32) in[6]) +
+                    ((limb) ((s32) in2[6])) * ((s32) in[2]) +
+                    ((limb) ((s32) in2[0])) * ((s32) in[8]) +
+                    ((limb) ((s32) in2[8])) * ((s32) in[0]);
+  output[9] =       ((limb) ((s32) in2[4])) * ((s32) in[5]) +
+                    ((limb) ((s32) in2[5])) * ((s32) in[4]) +
+                    ((limb) ((s32) in2[3])) * ((s32) in[6]) +
+                    ((limb) ((s32) in2[6])) * ((s32) in[3]) +
+                    ((limb) ((s32) in2[2])) * ((s32) in[7]) +
+                    ((limb) ((s32) in2[7])) * ((s32) in[2]) +
+                    ((limb) ((s32) in2[1])) * ((s32) in[8]) +
+                    ((limb) ((s32) in2[8])) * ((s32) in[1]) +
+                    ((limb) ((s32) in2[0])) * ((s32) in[9]) +
+                    ((limb) ((s32) in2[9])) * ((s32) in[0]);
+  output[10] = 2 * (((limb) ((s32) in2[5])) * ((s32) in[5]) +
+                    ((limb) ((s32) in2[3])) * ((s32) in[7]) +
+                    ((limb) ((s32) in2[7])) * ((s32) in[3]) +
+                    ((limb) ((s32) in2[1])) * ((s32) in[9]) +
+                    ((limb) ((s32) in2[9])) * ((s32) in[1])) +
+                    ((limb) ((s32) in2[4])) * ((s32) in[6]) +
+                    ((limb) ((s32) in2[6])) * ((s32) in[4]) +
+                    ((limb) ((s32) in2[2])) * ((s32) in[8]) +
+                    ((limb) ((s32) in2[8])) * ((s32) in[2]);
+  output[11] =      ((limb) ((s32) in2[5])) * ((s32) in[6]) +
+                    ((limb) ((s32) in2[6])) * ((s32) in[5]) +
+                    ((limb) ((s32) in2[4])) * ((s32) in[7]) +
+                    ((limb) ((s32) in2[7])) * ((s32) in[4]) +
+                    ((limb) ((s32) in2[3])) * ((s32) in[8]) +
+                    ((limb) ((s32) in2[8])) * ((s32) in[3]) +
+                    ((limb) ((s32) in2[2])) * ((s32) in[9]) +
+                    ((limb) ((s32) in2[9])) * ((s32) in[2]);
+  output[12] =      ((limb) ((s32) in2[6])) * ((s32) in[6]) +
+               2 * (((limb) ((s32) in2[5])) * ((s32) in[7]) +
+                    ((limb) ((s32) in2[7])) * ((s32) in[5]) +
+                    ((limb) ((s32) in2[3])) * ((s32) in[9]) +
+                    ((limb) ((s32) in2[9])) * ((s32) in[3])) +
+                    ((limb) ((s32) in2[4])) * ((s32) in[8]) +
+                    ((limb) ((s32) in2[8])) * ((s32) in[4]);
+  output[13] =      ((limb) ((s32) in2[6])) * ((s32) in[7]) +
+                    ((limb) ((s32) in2[7])) * ((s32) in[6]) +
+                    ((limb) ((s32) in2[5])) * ((s32) in[8]) +
+                    ((limb) ((s32) in2[8])) * ((s32) in[5]) +
+                    ((limb) ((s32) in2[4])) * ((s32) in[9]) +
+                    ((limb) ((s32) in2[9])) * ((s32) in[4]);
+  output[14] = 2 * (((limb) ((s32) in2[7])) * ((s32) in[7]) +
+                    ((limb) ((s32) in2[5])) * ((s32) in[9]) +
+                    ((limb) ((s32) in2[9])) * ((s32) in[5])) +
+                    ((limb) ((s32) in2[6])) * ((s32) in[8]) +
+                    ((limb) ((s32) in2[8])) * ((s32) in[6]);
+  output[15] =      ((limb) ((s32) in2[7])) * ((s32) in[8]) +
+                    ((limb) ((s32) in2[8])) * ((s32) in[7]) +
+                    ((limb) ((s32) in2[6])) * ((s32) in[9]) +
+                    ((limb) ((s32) in2[9])) * ((s32) in[6]);
+  output[16] =      ((limb) ((s32) in2[8])) * ((s32) in[8]) +
+               2 * (((limb) ((s32) in2[7])) * ((s32) in[9]) +
+                    ((limb) ((s32) in2[9])) * ((s32) in[7]));
+  output[17] =      ((limb) ((s32) in2[8])) * ((s32) in[9]) +
+                    ((limb) ((s32) in2[9])) * ((s32) in[8]);
+  output[18] = 2 *  ((limb) ((s32) in2[9])) * ((s32) in[9]);
+}
+
+/* Reduce a long form to a short form by taking the input mod 2^255 - 19. */
+static void freduce_degree(limb *output) {
+  /* Each of these shifts and adds ends up multiplying the value by 19. */
+  output[8] += output[18] << 4;
+  output[8] += output[18] << 1;
+  output[8] += output[18];
+  output[7] += output[17] << 4;
+  output[7] += output[17] << 1;
+  output[7] += output[17];
+  output[6] += output[16] << 4;
+  output[6] += output[16] << 1;
+  output[6] += output[16];
+  output[5] += output[15] << 4;
+  output[5] += output[15] << 1;
+  output[5] += output[15];
+  output[4] += output[14] << 4;
+  output[4] += output[14] << 1;
+  output[4] += output[14];
+  output[3] += output[13] << 4;
+  output[3] += output[13] << 1;
+  output[3] += output[13];
+  output[2] += output[12] << 4;
+  output[2] += output[12] << 1;
+  output[2] += output[12];
+  output[1] += output[11] << 4;
+  output[1] += output[11] << 1;
+  output[1] += output[11];
+  output[0] += output[10] << 4;
+  output[0] += output[10] << 1;
+  output[0] += output[10];
+}
+
+#if (-1 & 3) != 3
+#error "This code only works on a two's complement system"
+#endif
+
+/* return v / 2^26, using only shifts and adds. */
+static inline limb
+div_by_2_26(const limb v)
+{
+  /* High word of v; no shift needed*/
+  const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32);
+  /* Set to all 1s if v was negative; else set to 0s. */
+  const int32_t sign = ((int32_t) highword) >> 31;
+  /* Set to 0x3ffffff if v was negative; else set to 0. */
+  const int32_t roundoff = ((uint32_t) sign) >> 6;
+  /* Should return v / (1<<26) */
+  return (v + roundoff) >> 26;
+}
+
+/* return v / (2^25), using only shifts and adds. */
+static inline limb
+div_by_2_25(const limb v)
+{
+  /* High word of v; no shift needed*/
+  const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32);
+  /* Set to all 1s if v was negative; else set to 0s. */
+  const int32_t sign = ((int32_t) highword) >> 31;
+  /* Set to 0x1ffffff if v was negative; else set to 0. */
+  const int32_t roundoff = ((uint32_t) sign) >> 7;
+  /* Should return v / (1<<25) */
+  return (v + roundoff) >> 25;
+}
+
+static inline s32
+div_s32_by_2_25(const s32 v)
+{
+   const s32 roundoff = ((uint32_t)(v >> 31)) >> 7;
+   return (v + roundoff) >> 25;
+}
+
+/* Reduce all coefficients of the short form input so that |x| < 2^26.
+ *
+ * On entry: |output[i]| < 2^62
+ */
+static void freduce_coefficients(limb *output) {
+  unsigned i;
+
+  output[10] = 0;
+
+  for (i = 0; i < 10; i += 2) {
+    limb over = div_by_2_26(output[i]);
+    output[i] -= over << 26;
+    output[i+1] += over;
+
+    over = div_by_2_25(output[i+1]);
+    output[i+1] -= over << 25;
+    output[i+2] += over;
+  }
+  /* Now |output[10]| < 2 ^ 38 and all other coefficients are reduced. */
+  output[0] += output[10] << 4;
+  output[0] += output[10] << 1;
+  output[0] += output[10];
+
+  output[10] = 0;
+
+  /* Now output[1..9] are reduced, and |output[0]| < 2^26 + 19 * 2^38
+   * So |over| will be no more than 77825  */
+  {
+    limb over = div_by_2_26(output[0]);
+    output[0] -= over << 26;
+    output[1] += over;
+  }
+
+  /* Now output[0,2..9] are reduced, and |output[1]| < 2^25 + 77825
+   * So |over| will be no more than 1. */
+  {
+    /* output[1] fits in 32 bits, so we can use div_s32_by_2_25 here. */
+    s32 over32 = div_s32_by_2_25((s32) output[1]);
+    output[1] -= over32 << 25;
+    output[2] += over32;
+  }
+
+  /* Finally, output[0,1,3..9] are reduced, and output[2] is "nearly reduced":
+   * we have |output[2]| <= 2^26.  This is good enough for all of our math,
+   * but it will require an extra freduce_coefficients before fcontract. */
+}
+
+/* A helpful wrapper around fproduct: output = in * in2.
+ *
+ * output must be distinct to both inputs. The output is reduced degree and
+ * reduced coefficient.
+ */
+static void
+fmul(limb *output, const limb *in, const limb *in2) {
+  limb t[19];
+  fproduct(t, in, in2);
+  freduce_degree(t);
+  freduce_coefficients(t);
+  memcpy(output, t, sizeof(limb) * 10);
+}
+
+static void fsquare_inner(limb *output, const limb *in) {
+  output[0] =       ((limb) ((s32) in[0])) * ((s32) in[0]);
+  output[1] =  2 *  ((limb) ((s32) in[0])) * ((s32) in[1]);
+  output[2] =  2 * (((limb) ((s32) in[1])) * ((s32) in[1]) +
+                    ((limb) ((s32) in[0])) * ((s32) in[2]));
+  output[3] =  2 * (((limb) ((s32) in[1])) * ((s32) in[2]) +
+                    ((limb) ((s32) in[0])) * ((s32) in[3]));
+  output[4] =       ((limb) ((s32) in[2])) * ((s32) in[2]) +
+               4 *  ((limb) ((s32) in[1])) * ((s32) in[3]) +
+               2 *  ((limb) ((s32) in[0])) * ((s32) in[4]);
+  output[5] =  2 * (((limb) ((s32) in[2])) * ((s32) in[3]) +
+                    ((limb) ((s32) in[1])) * ((s32) in[4]) +
+                    ((limb) ((s32) in[0])) * ((s32) in[5]));
+  output[6] =  2 * (((limb) ((s32) in[3])) * ((s32) in[3]) +
+                    ((limb) ((s32) in[2])) * ((s32) in[4]) +
+                    ((limb) ((s32) in[0])) * ((s32) in[6]) +
+               2 *  ((limb) ((s32) in[1])) * ((s32) in[5]));
+  output[7] =  2 * (((limb) ((s32) in[3])) * ((s32) in[4]) +
+                    ((limb) ((s32) in[2])) * ((s32) in[5]) +
+                    ((limb) ((s32) in[1])) * ((s32) in[6]) +
+                    ((limb) ((s32) in[0])) * ((s32) in[7]));
+  output[8] =       ((limb) ((s32) in[4])) * ((s32) in[4]) +
+               2 * (((limb) ((s32) in[2])) * ((s32) in[6]) +
+                    ((limb) ((s32) in[0])) * ((s32) in[8]) +
+               2 * (((limb) ((s32) in[1])) * ((s32) in[7]) +
+                    ((limb) ((s32) in[3])) * ((s32) in[5])));
+  output[9] =  2 * (((limb) ((s32) in[4])) * ((s32) in[5]) +
+                    ((limb) ((s32) in[3])) * ((s32) in[6]) +
+                    ((limb) ((s32) in[2])) * ((s32) in[7]) +
+                    ((limb) ((s32) in[1])) * ((s32) in[8]) +
+                    ((limb) ((s32) in[0])) * ((s32) in[9]));
+  output[10] = 2 * (((limb) ((s32) in[5])) * ((s32) in[5]) +
+                    ((limb) ((s32) in[4])) * ((s32) in[6]) +
+                    ((limb) ((s32) in[2])) * ((s32) in[8]) +
+               2 * (((limb) ((s32) in[3])) * ((s32) in[7]) +
+                    ((limb) ((s32) in[1])) * ((s32) in[9])));
+  output[11] = 2 * (((limb) ((s32) in[5])) * ((s32) in[6]) +
+                    ((limb) ((s32) in[4])) * ((s32) in[7]) +
+                    ((limb) ((s32) in[3])) * ((s32) in[8]) +
+                    ((limb) ((s32) in[2])) * ((s32) in[9]));
+  output[12] =      ((limb) ((s32) in[6])) * ((s32) in[6]) +
+               2 * (((limb) ((s32) in[4])) * ((s32) in[8]) +
+               2 * (((limb) ((s32) in[5])) * ((s32) in[7]) +
+                    ((limb) ((s32) in[3])) * ((s32) in[9])));
+  output[13] = 2 * (((limb) ((s32) in[6])) * ((s32) in[7]) +
+                    ((limb) ((s32) in[5])) * ((s32) in[8]) +
+                    ((limb) ((s32) in[4])) * ((s32) in[9]));
+  output[14] = 2 * (((limb) ((s32) in[7])) * ((s32) in[7]) +
+                    ((limb) ((s32) in[6])) * ((s32) in[8]) +
+               2 *  ((limb) ((s32) in[5])) * ((s32) in[9]));
+  output[15] = 2 * (((limb) ((s32) in[7])) * ((s32) in[8]) +
+                    ((limb) ((s32) in[6])) * ((s32) in[9]));
+  output[16] =      ((limb) ((s32) in[8])) * ((s32) in[8]) +
+               4 *  ((limb) ((s32) in[7])) * ((s32) in[9]);
+  output[17] = 2 *  ((limb) ((s32) in[8])) * ((s32) in[9]);
+  output[18] = 2 *  ((limb) ((s32) in[9])) * ((s32) in[9]);
+}
+
+static void
+fsquare(limb *output, const limb *in) {
+  limb t[19];
+  fsquare_inner(t, in);
+  freduce_degree(t);
+  freduce_coefficients(t);
+  memcpy(output, t, sizeof(limb) * 10);
+}
+
+/* Take a little-endian, 32-byte number and expand it into polynomial form */
+static void
+fexpand(limb *output, const u8 *input) {
+#define F(n,start,shift,mask) \
+  output[n] = ((((limb) input[start + 0]) | \
+                ((limb) input[start + 1]) << 8 | \
+                ((limb) input[start + 2]) << 16 | \
+                ((limb) input[start + 3]) << 24) >> shift) & mask;
+  F(0, 0, 0, 0x3ffffff);
+  F(1, 3, 2, 0x1ffffff);
+  F(2, 6, 3, 0x3ffffff);
+  F(3, 9, 5, 0x1ffffff);
+  F(4, 12, 6, 0x3ffffff);
+  F(5, 16, 0, 0x1ffffff);
+  F(6, 19, 1, 0x3ffffff);
+  F(7, 22, 3, 0x1ffffff);
+  F(8, 25, 4, 0x3ffffff);
+  F(9, 28, 6, 0x1ffffff);
+#undef F
+}
+
+#if (-32 >> 1) != -16
+#error "This code only works when >> does sign-extension on negative numbers"
+#endif
+
+/* Take a fully reduced polynomial form number and contract it into a
+ * little-endian, 32-byte array
+ */
+static void
+fcontract(u8 *output, limb *input) {
+  int i;
+  int j;
+
+  for (j = 0; j < 2; ++j) {
+    for (i = 0; i < 9; ++i) {
+      if ((i & 1) == 1) {
+        /* This calculation is a time-invariant way to make input[i] positive
+           by borrowing from the next-larger limb.
+        */
+        const s32 mask = (s32)(input[i]) >> 31;
+        const s32 carry = -(((s32)(input[i]) & mask) >> 25);
+        input[i] = (s32)(input[i]) + (carry << 25);
+        input[i+1] = (s32)(input[i+1]) - carry;
+      } else {
+        const s32 mask = (s32)(input[i]) >> 31;
+        const s32 carry = -(((s32)(input[i]) & mask) >> 26);
+        input[i] = (s32)(input[i]) + (carry << 26);
+        input[i+1] = (s32)(input[i+1]) - carry;
+      }
+    }
+    {
+      const s32 mask = (s32)(input[9]) >> 31;
+      const s32 carry = -(((s32)(input[9]) & mask) >> 25);
+      input[9] = (s32)(input[9]) + (carry << 25);
+      input[0] = (s32)(input[0]) - (carry * 19);
+    }
+  }
+
+  /* The first borrow-propagation pass above ended with every limb
+     except (possibly) input[0] non-negative.
+
+     Since each input limb except input[0] is decreased by at most 1
+     by a borrow-propagation pass, the second borrow-propagation pass
+     could only have wrapped around to decrease input[0] again if the
+     first pass left input[0] negative *and* input[1] through input[9]
+     were all zero.  In that case, input[1] is now 2^25 - 1, and this
+     last borrow-propagation step will leave input[1] non-negative.
+  */
+  {
+    const s32 mask = (s32)(input[0]) >> 31;
+    const s32 carry = -(((s32)(input[0]) & mask) >> 26);
+    input[0] = (s32)(input[0]) + (carry << 26);
+    input[1] = (s32)(input[1]) - carry;
+  }
+
+  /* Both passes through the above loop, plus the last 0-to-1 step, are
+     necessary: if input[9] is -1 and input[0] through input[8] are 0,
+     negative values will remain in the array until the end.
+   */
+
+  input[1] <<= 2;
+  input[2] <<= 3;
+  input[3] <<= 5;
+  input[4] <<= 6;
+  input[6] <<= 1;
+  input[7] <<= 3;
+  input[8] <<= 4;
+  input[9] <<= 6;
+#define F(i, s) \
+  output[s+0] |=  input[i] & 0xff; \
+  output[s+1]  = (input[i] >> 8) & 0xff; \
+  output[s+2]  = (input[i] >> 16) & 0xff; \
+  output[s+3]  = (input[i] >> 24) & 0xff;
+  output[0] = 0;
+  output[16] = 0;
+  F(0,0);
+  F(1,3);
+  F(2,6);
+  F(3,9);
+  F(4,12);
+  F(5,16);
+  F(6,19);
+  F(7,22);
+  F(8,25);
+  F(9,28);
+#undef F
+}
+
+/* Input: Q, Q', Q-Q'
+ * Output: 2Q, Q+Q'
+ *
+ *   x2 z3: long form
+ *   x3 z3: long form
+ *   x z: short form, destroyed
+ *   xprime zprime: short form, destroyed
+ *   qmqp: short form, preserved
+ */
+static void fmonty(limb *x2, limb *z2,  /* output 2Q */
+                   limb *x3, limb *z3,  /* output Q + Q' */
+                   limb *x, limb *z,    /* input Q */
+                   limb *xprime, limb *zprime,  /* input Q' */
+                   const limb *qmqp /* input Q - Q' */) {
+  limb origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19],
+        zzprime[19], zzzprime[19], xxxprime[19];
+
+  memcpy(origx, x, 10 * sizeof(limb));
+  fsum(x, z);
+  fdifference(z, origx);  // does x - z
+
+  memcpy(origxprime, xprime, sizeof(limb) * 10);
+  fsum(xprime, zprime);
+  fdifference(zprime, origxprime);
+  fproduct(xxprime, xprime, z);
+  fproduct(zzprime, x, zprime);
+  freduce_degree(xxprime);
+  freduce_coefficients(xxprime);
+  freduce_degree(zzprime);
+  freduce_coefficients(zzprime);
+  memcpy(origxprime, xxprime, sizeof(limb) * 10);
+  fsum(xxprime, zzprime);
+  fdifference(zzprime, origxprime);
+  fsquare(xxxprime, xxprime);
+  fsquare(zzzprime, zzprime);
+  fproduct(zzprime, zzzprime, qmqp);
+  freduce_degree(zzprime);
+  freduce_coefficients(zzprime);
+  memcpy(x3, xxxprime, sizeof(limb) * 10);
+  memcpy(z3, zzprime, sizeof(limb) * 10);
+
+  fsquare(xx, x);
+  fsquare(zz, z);
+  fproduct(x2, xx, zz);
+  freduce_degree(x2);
+  freduce_coefficients(x2);
+  fdifference(zz, xx);  // does zz = xx - zz
+  memset(zzz + 10, 0, sizeof(limb) * 9);
+  fscalar_product(zzz, zz, 121665);
+  /* No need to call freduce_degree here:
+     fscalar_product doesn't increase the degree of its input. */
+  freduce_coefficients(zzz);
+  fsum(zzz, xx);
+  fproduct(z2, zz, zzz);
+  freduce_degree(z2);
+  freduce_coefficients(z2);
+}
+
+/* Conditionally swap two reduced-form limb arrays if 'iswap' is 1, but leave
+ * them unchanged if 'iswap' is 0.  Runs in data-invariant time to avoid
+ * side-channel attacks.
+ *
+ * NOTE that this function requires that 'iswap' be 1 or 0; other values give
+ * wrong results.  Also, the two limb arrays must be in reduced-coefficient,
+ * reduced-degree form: the values in a[10..19] or b[10..19] aren't swapped,
+ * and all all values in a[0..9],b[0..9] must have magnitude less than
+ * INT32_MAX.
+ */
+static void
+swap_conditional(limb a[19], limb b[19], limb iswap) {
+  unsigned i;
+  const s32 swap = (s32) -iswap;
+
+  for (i = 0; i < 10; ++i) {
+    const s32 x = swap & ( ((s32)a[i]) ^ ((s32)b[i]) );
+    a[i] = ((s32)a[i]) ^ x;
+    b[i] = ((s32)b[i]) ^ x;
+  }
+}
+
+/* Calculates nQ where Q is the x-coordinate of a point on the curve
+ *
+ *   resultx/resultz: the x coordinate of the resulting curve point (short form)
+ *   n: a little endian, 32-byte number
+ *   q: a point of the curve (short form)
+ */
+static void
+cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q) {
+  limb a[19] = {0}, b[19] = {1}, c[19] = {1}, d[19] = {0};
+  limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
+  limb e[19] = {0}, f[19] = {1}, g[19] = {0}, h[19] = {1};
+  limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
+
+  unsigned i, j;
+
+  memcpy(nqpqx, q, sizeof(limb) * 10);
+
+  for (i = 0; i < 32; ++i) {
+    u8 byte = n[31 - i];
+    for (j = 0; j < 8; ++j) {
+      const limb bit = byte >> 7;
+
+      swap_conditional(nqx, nqpqx, bit);
+      swap_conditional(nqz, nqpqz, bit);
+      fmonty(nqx2, nqz2,
+             nqpqx2, nqpqz2,
+             nqx, nqz,
+             nqpqx, nqpqz,
+             q);
+      swap_conditional(nqx2, nqpqx2, bit);
+      swap_conditional(nqz2, nqpqz2, bit);
+
+      t = nqx;
+      nqx = nqx2;
+      nqx2 = t;
+      t = nqz;
+      nqz = nqz2;
+      nqz2 = t;
+      t = nqpqx;
+      nqpqx = nqpqx2;
+      nqpqx2 = t;
+      t = nqpqz;
+      nqpqz = nqpqz2;
+      nqpqz2 = t;
+
+      byte <<= 1;
+    }
+  }
+
+  memcpy(resultx, nqx, sizeof(limb) * 10);
+  memcpy(resultz, nqz, sizeof(limb) * 10);
+}
+
+// -----------------------------------------------------------------------------
+// Shamelessly copied from djb's code
+// -----------------------------------------------------------------------------
+static void
+crecip(limb *out, const limb *z) {
+  limb z2[10];
+  limb z9[10];
+  limb z11[10];
+  limb z2_5_0[10];
+  limb z2_10_0[10];
+  limb z2_20_0[10];
+  limb z2_50_0[10];
+  limb z2_100_0[10];
+  limb t0[10];
+  limb t1[10];
+  int i;
+
+  /* 2 */ fsquare(z2,z);
+  /* 4 */ fsquare(t1,z2);
+  /* 8 */ fsquare(t0,t1);
+  /* 9 */ fmul(z9,t0,z);
+  /* 11 */ fmul(z11,z9,z2);
+  /* 22 */ fsquare(t0,z11);
+  /* 2^5 - 2^0 = 31 */ fmul(z2_5_0,t0,z9);
+
+  /* 2^6 - 2^1 */ fsquare(t0,z2_5_0);
+  /* 2^7 - 2^2 */ fsquare(t1,t0);
+  /* 2^8 - 2^3 */ fsquare(t0,t1);
+  /* 2^9 - 2^4 */ fsquare(t1,t0);
+  /* 2^10 - 2^5 */ fsquare(t0,t1);
+  /* 2^10 - 2^0 */ fmul(z2_10_0,t0,z2_5_0);
+
+  /* 2^11 - 2^1 */ fsquare(t0,z2_10_0);
+  /* 2^12 - 2^2 */ fsquare(t1,t0);
+  /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^20 - 2^0 */ fmul(z2_20_0,t1,z2_10_0);
+
+  /* 2^21 - 2^1 */ fsquare(t0,z2_20_0);
+  /* 2^22 - 2^2 */ fsquare(t1,t0);
+  /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^40 - 2^0 */ fmul(t0,t1,z2_20_0);
+
+  /* 2^41 - 2^1 */ fsquare(t1,t0);
+  /* 2^42 - 2^2 */ fsquare(t0,t1);
+  /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
+  /* 2^50 - 2^0 */ fmul(z2_50_0,t0,z2_10_0);
+
+  /* 2^51 - 2^1 */ fsquare(t0,z2_50_0);
+  /* 2^52 - 2^2 */ fsquare(t1,t0);
+  /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^100 - 2^0 */ fmul(z2_100_0,t1,z2_50_0);
+
+  /* 2^101 - 2^1 */ fsquare(t1,z2_100_0);
+  /* 2^102 - 2^2 */ fsquare(t0,t1);
+  /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
+  /* 2^200 - 2^0 */ fmul(t1,t0,z2_100_0);
+
+  /* 2^201 - 2^1 */ fsquare(t0,t1);
+  /* 2^202 - 2^2 */ fsquare(t1,t0);
+  /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^250 - 2^0 */ fmul(t0,t1,z2_50_0);
+
+  /* 2^251 - 2^1 */ fsquare(t1,t0);
+  /* 2^252 - 2^2 */ fsquare(t0,t1);
+  /* 2^253 - 2^3 */ fsquare(t1,t0);
+  /* 2^254 - 2^4 */ fsquare(t0,t1);
+  /* 2^255 - 2^5 */ fsquare(t1,t0);
+  /* 2^255 - 21 */ fmul(out,t1,z11);
+}
+
+int curve25519_donna(u8 *, const u8 *, const u8 *);
+
+int
+curve25519_donna(u8 *mypublic, const u8 *secret, const u8 *basepoint) {
+  limb bp[10], x[10], z[11], zmone[10];
+  uint8_t e[32];
+  int i;
+
+  for (i = 0; i < 32; ++i) e[i] = secret[i];
+  e[0] &= 248;
+  e[31] &= 127;
+  e[31] |= 64;
+
+  fexpand(bp, basepoint);
+  cmult(x, z, e, bp);
+  crecip(zmone, z);
+  fmul(z, x, zmone);
+  freduce_coefficients(z);
+  fcontract(mypublic, z);
+  return 0;
+}

+ 2 - 0
src/or/channeltls.c

@@ -914,6 +914,8 @@ channel_tls_handle_cell(cell_t *cell, or_connection_t *conn)
     case CELL_RELAY:
     case CELL_RELAY_EARLY:
     case CELL_DESTROY:
+    case CELL_CREATE2:
+    case CELL_CREATED2:
       /*
        * These are all transport independent and we pass them up through the
        * channel_t mechanism.  They are ultimately handled in command.c.

+ 211 - 119
src/or/circuitbuild.c

@@ -28,6 +28,8 @@
 #include "networkstatus.h"
 #include "nodelist.h"
 #include "onion.h"
+#include "onion_tap.h"
+#include "onion_fast.h"
 #include "policies.h"
 #include "transports.h"
 #include "relay.h"
@@ -53,7 +55,8 @@ static channel_t * channel_connect_for_circuit(const tor_addr_t *addr,
                                                uint16_t port,
                                                const char *id_digest);
 static int circuit_deliver_create_cell(circuit_t *circ,
-                                       uint8_t cell_type, const char *payload);
+                                       const create_cell_t *create_cell,
+                                       int relayed);
 static int onion_pick_cpath_exit(origin_circuit_t *circ, extend_info_t *exit);
 static crypt_path_t *onion_next_hop_in_cpath(crypt_path_t *cpath);
 static int onion_extend_cpath(origin_circuit_t *circ);
@@ -473,14 +476,13 @@ circuit_n_chan_done(channel_t *chan, int status)
            *     died? */
         }
       } else {
-        /* pull the create cell out of circ->onionskin, and send it */
-        tor_assert(circ->n_chan_onionskin);
-        if (circuit_deliver_create_cell(circ,CELL_CREATE,
-                                        circ->n_chan_onionskin)<0) {
+        /* pull the create cell out of circ->n_chan_create_cell, and send it */
+        tor_assert(circ->n_chan_create_cell);
+        if (circuit_deliver_create_cell(circ, circ->n_chan_create_cell, 1)<0) {
           circuit_mark_for_close(circ, END_CIRC_REASON_RESOURCELIMIT);
           continue;
         }
-        tor_free(circ->n_chan_onionskin);
+        tor_free(circ->n_chan_create_cell);
         circuit_set_state(circ, CIRCUIT_STATE_OPEN);
       }
     }
@@ -491,22 +493,25 @@ circuit_n_chan_done(channel_t *chan, int status)
 
 /** Find a new circid that isn't currently in use on the circ->n_chan
  * for the outgoing
- * circuit <b>circ</b>, and deliver a cell of type <b>cell_type</b>
- * (either CELL_CREATE or CELL_CREATE_FAST) with payload <b>payload</b>
- * to this circuit.
- * Return -1 if we failed to find a suitable circid, else return 0.
+ * circuit <b>circ</b>, and deliver the cell <b>create_cell</b> to this
+ * circuit.  If <b>relayed</b> is true, this is a create cell somebody
+ * gave us via an EXTEND cell, so we shouldn't worry if we don't understand
+ * it. Return -1 if we failed to find a suitable circid, else return 0.
  */
 static int
-circuit_deliver_create_cell(circuit_t *circ, uint8_t cell_type,
-                            const char *payload)
+circuit_deliver_create_cell(circuit_t *circ, const create_cell_t *create_cell,
+                            int relayed)
 {
   cell_t cell;
   circid_t id;
+  int r;
 
   tor_assert(circ);
   tor_assert(circ->n_chan);
-  tor_assert(payload);
-  tor_assert(cell_type == CELL_CREATE || cell_type == CELL_CREATE_FAST);
+  tor_assert(create_cell);
+  tor_assert(create_cell->cell_type == CELL_CREATE ||
+             create_cell->cell_type == CELL_CREATE_FAST ||
+             create_cell->cell_type == CELL_CREATE2);
 
   id = get_unique_circ_id_by_chan(circ->n_chan);
   if (!id) {
@@ -517,10 +522,14 @@ circuit_deliver_create_cell(circuit_t *circ, uint8_t cell_type,
   circuit_set_n_circid_chan(circ, id, circ->n_chan);
 
   memset(&cell, 0, sizeof(cell_t));
-  cell.command = cell_type;
+  r = relayed ? create_cell_format_relayed(&cell, create_cell)
+              : create_cell_format(&cell, create_cell);
+  if (r < 0) {
+    log_warn(LD_CIRC,"Couldn't format create cell");
+    return -1;
+  }
   cell.circ_id = circ->n_circ_id;
 
-  memcpy(cell.payload, payload, ONIONSKIN_CHALLENGE_LEN);
   append_cell_to_circuit_queue(circ, circ->n_chan, &cell,
                                CELL_DIRECTION_OUT, 0);
 
@@ -610,6 +619,73 @@ circuit_timeout_want_to_count_circ(origin_circuit_t *circ)
           && circ->build_state->desired_path_len == DEFAULT_ROUTE_LEN;
 }
 
+#ifdef CURVE25519_ENABLED
+/** Return true if the ntor handshake is enabled in the configuration, or if
+ * it's been set to "auto" in the configuration and it's enabled in the
+ * consensus. */
+static int
+circuits_can_use_ntor(void)
+{
+  const or_options_t *options = get_options();
+  if (options->UseNTorHandshake != -1)
+    return options->UseNTorHandshake;
+  return networkstatus_get_param(NULL, "UseNTorHandshake", 0, 0, 1);
+}
+#endif
+
+/** Decide whether to use a TAP or ntor handshake for connecting to <b>ei</b>
+ * directly, and set *<b>cell_type_out</b> and *<b>handshake_type_out</b>
+ * accordingly. */
+static void
+circuit_pick_create_handshake(uint8_t *cell_type_out,
+                              uint16_t *handshake_type_out,
+                              const extend_info_t *ei)
+{
+#ifdef CURVE25519_ENABLED
+  if (!tor_mem_is_zero((const char*)ei->curve25519_onion_key.public_key,
+                       CURVE25519_PUBKEY_LEN) &&
+      circuits_can_use_ntor()) {
+    *cell_type_out = CELL_CREATE2;
+    *handshake_type_out = ONION_HANDSHAKE_TYPE_NTOR;
+    return;
+  }
+#else
+  (void) ei;
+#endif
+
+  *cell_type_out = CELL_CREATE;
+  *handshake_type_out = ONION_HANDSHAKE_TYPE_TAP;
+}
+
+/** Decide whether to use a TAP or ntor handshake for connecting to <b>ei</b>
+ * directly, and set *<b>handshake_type_out</b> accordingly. Decide whether,
+ * in extending through <b>node</b> to do so, we should use an EXTEND2 or an
+ * EXTEND cell to do so, and set *<b>cell_type_out</b> and
+ * *<b>create_cell_type_out</b> accordingly. */
+static void
+circuit_pick_extend_handshake(uint8_t *cell_type_out,
+                              uint8_t *create_cell_type_out,
+                              uint16_t *handshake_type_out,
+                              const node_t *node_prev,
+                              const extend_info_t *ei)
+{
+  uint8_t t;
+  circuit_pick_create_handshake(&t, handshake_type_out, ei);
+  /* XXXX024 The check for whether the node has a curve25519 key is a bad
+   * proxy for whether it can do extend2 cells; once a version that
+   * handles extend2 cells is out, remove it. */
+  if (node_prev &&
+      *handshake_type_out != ONION_HANDSHAKE_TYPE_TAP &&
+      (node_has_curve25519_onion_key(node_prev) ||
+       (node_prev->rs && node_prev->rs->version_supports_extend2_cells))) {
+    *cell_type_out = RELAY_COMMAND_EXTEND2;
+    *create_cell_type_out = CELL_CREATE2;
+  } else {
+    *cell_type_out = RELAY_COMMAND_EXTEND;
+    *create_cell_type_out = CELL_CREATE;
+  }
+}
+
 /** This is the backbone function for building circuits.
  *
  * If circ's first hop is closed, then we need to build a create
@@ -625,16 +701,16 @@ circuit_send_next_onion_skin(origin_circuit_t *circ)
 {
   crypt_path_t *hop;
   const node_t *node;
-  char payload[2+4+DIGEST_LEN+ONIONSKIN_CHALLENGE_LEN];
-  char *onionskin;
-  size_t payload_len;
 
   tor_assert(circ);
 
   if (circ->cpath->state == CPATH_STATE_CLOSED) {
+    /* This is the first hop. */
+    create_cell_t cc;
     int fast;
-    uint8_t cell_type;
+    int len;
     log_debug(LD_CIRC,"First skin; sending create cell.");
+    memset(&cc, 0, sizeof(cc));
     if (circ->build_state->onehop_tunnel)
       control_event_bootstrap(BOOTSTRAP_STATUS_ONEHOP_CREATE, 0);
     else
@@ -644,30 +720,31 @@ circuit_send_next_onion_skin(origin_circuit_t *circ)
     fast = should_use_create_fast_for_circuit(circ);
     if (!fast) {
       /* We are an OR and we know the right onion key: we should
-       * send an old slow create cell.
+       * send a create cell.
        */
-      cell_type = CELL_CREATE;
-      if (onion_skin_create(circ->cpath->extend_info->onion_key,
-                            &(circ->cpath->dh_handshake_state),
-                            payload) < 0) {
-        log_warn(LD_CIRC,"onion_skin_create (first hop) failed.");
-        return - END_CIRC_REASON_INTERNAL;
-      }
+      circuit_pick_create_handshake(&cc.cell_type, &cc.handshake_type,
+                                    circ->cpath->extend_info);
       note_request("cell: create", 1);
     } else {
       /* We are not an OR, and we're building the first hop of a circuit to a
        * new OR: we can be speedy and use CREATE_FAST to save an RSA operation
        * and a DH operation. */
-      cell_type = CELL_CREATE_FAST;
-      memset(payload, 0, sizeof(payload));
-      crypto_rand((char*) circ->cpath->fast_handshake_state,
-                  sizeof(circ->cpath->fast_handshake_state));
-      memcpy(payload, circ->cpath->fast_handshake_state,
-             sizeof(circ->cpath->fast_handshake_state));
+      cc.cell_type = CELL_CREATE_FAST;
+      cc.handshake_type = ONION_HANDSHAKE_TYPE_FAST;
       note_request("cell: create fast", 1);
     }
 
-    if (circuit_deliver_create_cell(TO_CIRCUIT(circ), cell_type, payload) < 0)
+    len = onion_skin_create(cc.handshake_type,
+                            circ->cpath->extend_info,
+                            &circ->cpath->handshake_state,
+                            cc.onionskin);
+    if (len < 0) {
+      log_warn(LD_CIRC,"onion_skin_create (first hop) failed.");
+      return - END_CIRC_REASON_INTERNAL;
+    }
+    cc.handshake_len = len;
+
+    if (circuit_deliver_create_cell(TO_CIRCUIT(circ), &cc, 0) < 0)
       return - END_CIRC_REASON_RESOURCELIMIT;
 
     circ->cpath->state = CPATH_STATE_AWAITING_KEYS;
@@ -676,10 +753,13 @@ circuit_send_next_onion_skin(origin_circuit_t *circ)
              fast ? "CREATE_FAST" : "CREATE",
              node ? node_describe(node) : "<unnamed>");
   } else {
+    extend_cell_t ec;
+    int len;
     tor_assert(circ->cpath->state == CPATH_STATE_OPEN);
     tor_assert(circ->base_.state == CIRCUIT_STATE_BUILDING);
     log_debug(LD_CIRC,"starting to send subsequent skin.");
     hop = onion_next_hop_in_cpath(circ->cpath);
+    memset(&ec, 0, sizeof(ec));
     if (!hop) {
       /* done building the circuit. whew. */
       circuit_set_state(TO_CIRCUIT(circ), CIRCUIT_STATE_OPEN);
@@ -753,29 +833,50 @@ circuit_send_next_onion_skin(origin_circuit_t *circ)
       return - END_CIRC_REASON_INTERNAL;
     }
 
-    set_uint32(payload, tor_addr_to_ipv4n(&hop->extend_info->addr));
-    set_uint16(payload+4, htons(hop->extend_info->port));
+    {
+      const node_t *prev_node;
+      prev_node = node_get_by_id(hop->prev->extend_info->identity_digest);
+      circuit_pick_extend_handshake(&ec.cell_type,
+                                    &ec.create_cell.cell_type,
+                                    &ec.create_cell.handshake_type,
+                                    prev_node,
+                                    hop->extend_info);
+    }
 
-    onionskin = payload+2+4;
-    memcpy(payload+2+4+ONIONSKIN_CHALLENGE_LEN,
-           hop->extend_info->identity_digest, DIGEST_LEN);
-    payload_len = 2+4+ONIONSKIN_CHALLENGE_LEN+DIGEST_LEN;
+    tor_addr_copy(&ec.orport_ipv4.addr, &hop->extend_info->addr);
+    ec.orport_ipv4.port = hop->extend_info->port;
+    tor_addr_make_unspec(&ec.orport_ipv6.addr);
+    memcpy(ec.node_id, hop->extend_info->identity_digest, DIGEST_LEN);
 
-    if (onion_skin_create(hop->extend_info->onion_key,
-                          &(hop->dh_handshake_state), onionskin) < 0) {
+    len = onion_skin_create(ec.create_cell.handshake_type,
+                            hop->extend_info,
+                            &hop->handshake_state,
+                            ec.create_cell.onionskin);
+    if (len < 0) {
       log_warn(LD_CIRC,"onion_skin_create failed.");
       return - END_CIRC_REASON_INTERNAL;
     }
+    ec.create_cell.handshake_len = len;
 
     log_info(LD_CIRC,"Sending extend relay cell.");
     note_request("cell: extend", 1);
-    /* send it to hop->prev, because it will transfer
-     * it to a create cell and then send to hop */
-    if (relay_send_command_from_edge(0, TO_CIRCUIT(circ),
-                                     RELAY_COMMAND_EXTEND,
-                                     payload, payload_len, hop->prev) < 0)
-      return 0; /* circuit is closed */
+    {
+      uint8_t command = 0;
+      uint16_t payload_len=0;
+      uint8_t payload[RELAY_PAYLOAD_SIZE];
+      if (extend_cell_format(&command, &payload_len, payload, &ec)<0) {
+        log_warn(LD_CIRC,"Couldn't format extend cell");
+        return -END_CIRC_REASON_INTERNAL;
+      }
 
+      /* send it to hop->prev, because it will transfer
+       * it to a create cell and then send to hop */
+      if (relay_send_command_from_edge(0, TO_CIRCUIT(circ),
+                                       command,
+                                       (char*)payload, payload_len,
+                                       hop->prev) < 0)
+        return 0; /* circuit is closed */
+    }
     hop->state = CPATH_STATE_AWAITING_KEYS;
   }
   return 0;
@@ -814,11 +915,7 @@ circuit_extend(cell_t *cell, circuit_t *circ)
 {
   channel_t *n_chan;
   relay_header_t rh;
-  char *onionskin;
-  char *id_digest=NULL;
-  uint32_t n_addr32;
-  uint16_t n_port;
-  tor_addr_t n_addr;
+  extend_cell_t ec;
   const char *msg = NULL;
   int should_launch = 0;
 
@@ -841,27 +938,21 @@ circuit_extend(cell_t *cell, circuit_t *circ)
 
   relay_header_unpack(&rh, cell->payload);
 
-  if (rh.length < 4+2+ONIONSKIN_CHALLENGE_LEN+DIGEST_LEN) {
+  if (extend_cell_parse(&ec, rh.command,
+                        cell->payload+RELAY_HEADER_SIZE,
+                        rh.length) < 0) {
     log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL,
-           "Wrong length %d on extend cell. Closing circuit.",
-           rh.length);
+           "Can't parse extend cell. Closing circuit.");
     return -1;
   }
 
-  n_addr32 = ntohl(get_uint32(cell->payload+RELAY_HEADER_SIZE));
-  n_port = ntohs(get_uint16(cell->payload+RELAY_HEADER_SIZE+4));
-  onionskin = (char*) cell->payload+RELAY_HEADER_SIZE+4+2;
-  id_digest = (char*) cell->payload+RELAY_HEADER_SIZE+4+2+
-    ONIONSKIN_CHALLENGE_LEN;
-  tor_addr_from_ipv4h(&n_addr, n_addr32);
-
-  if (!n_port || !n_addr32) {
+  if (!ec.orport_ipv4.port || tor_addr_is_null(&ec.orport_ipv4.addr)) {
     log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL,
            "Client asked me to extend to zero destination port or addr.");
     return -1;
   }
 
-  if (tor_addr_is_internal(&n_addr, 0) &&
+  if (tor_addr_is_internal(&ec.orport_ipv4.addr, 0) &&
       !get_options()->ExtendAllowPrivateAddresses) {
     log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL,
            "Client asked me to extend to a private address");
@@ -874,7 +965,7 @@ circuit_extend(cell_t *cell, circuit_t *circ)
    * fingerprints -- a) because it opens the user up to a mitm attack,
    * and b) because it lets an attacker force the relay to hold open a
    * new TLS connection for each extend request. */
-  if (tor_digest_is_zero(id_digest)) {
+  if (tor_digest_is_zero((const char*)ec.node_id)) {
     log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL,
            "Client asked me to extend without specifying an id_digest.");
     return -1;
@@ -883,7 +974,7 @@ circuit_extend(cell_t *cell, circuit_t *circ)
   /* Next, check if we're being asked to connect to the hop that the
    * extend cell came from. There isn't any reason for that, and it can
    * assist circular-path attacks. */
-  if (tor_memeq(id_digest,
+  if (tor_memeq(ec.node_id,
                 TO_OR_CIRCUIT(circ)->p_chan->identity_digest,
                 DIGEST_LEN)) {
     log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL,
@@ -891,27 +982,33 @@ circuit_extend(cell_t *cell, circuit_t *circ)
     return -1;
   }
 
-  n_chan = channel_get_for_extend(id_digest,
-                                  &n_addr,
+  n_chan = channel_get_for_extend((const char*)ec.node_id,
+                                  &ec.orport_ipv4.addr,
                                   &msg,
                                   &should_launch);
 
   if (!n_chan) {
     log_debug(LD_CIRC|LD_OR,"Next router (%s): %s",
-              fmt_addrport(&n_addr, n_port), msg?msg:"????");
+              fmt_addrport(&ec.orport_ipv4.addr,ec.orport_ipv4.port),
+              msg?msg:"????");
 
     circ->n_hop = extend_info_new(NULL /*nickname*/,
-                                    id_digest,
-                                    NULL /*onion_key*/,
-                                    &n_addr, n_port);
+                                  (const char*)ec.node_id,
+                                  NULL /*onion_key*/,
+                                  NULL /*curve25519_key*/,
+                                  &ec.orport_ipv4.addr,
+                                  ec.orport_ipv4.port);
+
+    circ->n_chan_create_cell = tor_memdup(&ec.create_cell,
+                                          sizeof(ec.create_cell));
 
-    circ->n_chan_onionskin = tor_malloc(ONIONSKIN_CHALLENGE_LEN);
-    memcpy(circ->n_chan_onionskin, onionskin, ONIONSKIN_CHALLENGE_LEN);
     circuit_set_state(circ, CIRCUIT_STATE_CHAN_WAIT);
 
     if (should_launch) {
       /* we should try to open a connection */
-      n_chan = channel_connect_for_circuit(&n_addr, n_port, id_digest);
+      n_chan = channel_connect_for_circuit(&ec.orport_ipv4.addr,
+                                           ec.orport_ipv4.port,
+                                           (const char*)ec.node_id);
       if (!n_chan) {
         log_info(LD_CIRC,"Launching n_chan failed. Closing circuit.");
         circuit_mark_for_close(circ, END_CIRC_REASON_CONNECTFAILED);
@@ -932,8 +1029,9 @@ circuit_extend(cell_t *cell, circuit_t *circ)
             "n_chan is %s",
             channel_get_canonical_remote_descr(n_chan));
 
-  if (circuit_deliver_create_cell(circ, CELL_CREATE, onionskin) < 0)
+  if (circuit_deliver_create_cell(circ, &ec.create_cell, 1) < 0)
     return -1;
+
   return 0;
 }
 
@@ -1785,7 +1883,7 @@ entry_guard_inc_circ_attempt_count(entry_guard_t *guard)
 }
 
 /** A created or extended cell came back to us on the circuit, and it included
- * <b>reply</b> as its body.  (If <b>reply_type</b> is CELL_CREATED, the body
+ * reply_cell as its body.  (If <b>reply_type</b> is CELL_CREATED, the body
  * contains (the second DH key, plus KH).  If <b>reply_type</b> is
  * CELL_CREATED_FAST, the body contains a secret y and a hash H(x|y).)
  *
@@ -1795,8 +1893,8 @@ entry_guard_inc_circ_attempt_count(entry_guard_t *guard)
  * Return - reason if we want to mark circ for close, else return 0.
  */
 int
-circuit_finish_handshake(origin_circuit_t *circ, uint8_t reply_type,
-                         const uint8_t *reply)
+circuit_finish_handshake(origin_circuit_t *circ,
+                         const created_cell_t *reply)
 {
   char keys[CPATH_KEY_MATERIAL_LEN];
   crypt_path_t *hop;
@@ -1816,39 +1914,25 @@ circuit_finish_handshake(origin_circuit_t *circ, uint8_t reply_type,
   }
   tor_assert(hop->state == CPATH_STATE_AWAITING_KEYS);
 
-  if (reply_type == CELL_CREATED && hop->dh_handshake_state) {
-    if (onion_skin_client_handshake(hop->dh_handshake_state, (char*)reply,keys,
-                                    DIGEST_LEN*2+CIPHER_KEY_LEN*2) < 0) {
+  {
+    if (onion_skin_client_handshake(hop->handshake_state.tag,
+                                    &hop->handshake_state,
+                                    reply->reply, reply->handshake_len,
+                                    (uint8_t*)keys, sizeof(keys),
+                                    (uint8_t*)hop->rend_circ_nonce) < 0) {
       log_warn(LD_CIRC,"onion_skin_client_handshake failed.");
       return -END_CIRC_REASON_TORPROTOCOL;
     }
-    /* Remember hash of g^xy */
-    memcpy(hop->handshake_digest, reply+DH_KEY_LEN, DIGEST_LEN);
-  } else if (reply_type == CELL_CREATED_FAST && !hop->dh_handshake_state) {
-    if (fast_client_handshake(hop->fast_handshake_state, reply,
-                              (uint8_t*)keys,
-                              DIGEST_LEN*2+CIPHER_KEY_LEN*2) < 0) {
-      log_warn(LD_CIRC,"fast_client_handshake failed.");
-      return -END_CIRC_REASON_TORPROTOCOL;
-    }
-    memcpy(hop->handshake_digest, reply+DIGEST_LEN, DIGEST_LEN);
-  } else {
-    log_warn(LD_PROTOCOL,"CREATED cell type did not match CREATE cell type.");
-    return -END_CIRC_REASON_TORPROTOCOL;
   }
 
-  crypto_dh_free(hop->dh_handshake_state); /* don't need it anymore */
-  hop->dh_handshake_state = NULL;
-
-  memset(hop->fast_handshake_state, 0, sizeof(hop->fast_handshake_state));
+  onion_handshake_state_release(&hop->handshake_state);
 
   if (circuit_init_cpath_crypto(hop, keys, 0)<0) {
     return -END_CIRC_REASON_TORPROTOCOL;
   }
 
   hop->state = CPATH_STATE_OPEN;
-  log_info(LD_CIRC,"Finished building %scircuit hop:",
-           (reply_type == CELL_CREATED_FAST) ? "fast " : "");
+  log_info(LD_CIRC,"Finished building circuit hop:");
   circuit_log_path(LOG_INFO,LD_CIRC,circ);
   control_event_circuit_status(circ, CIRC_EVENT_EXTENDED, 0);
 
@@ -1908,24 +1992,25 @@ circuit_truncated(origin_circuit_t *circ, crypt_path_t *layer, int reason)
  * cell back.
  */
 int
-onionskin_answer(or_circuit_t *circ, uint8_t cell_type, const char *payload,
-                 const char *keys)
+onionskin_answer(or_circuit_t *circ,
+                 const created_cell_t *created_cell,
+                 const char *keys,
+                 const uint8_t *rend_circ_nonce)
 {
   cell_t cell;
   crypt_path_t *tmp_cpath;
 
+  if (created_cell_format(&cell, created_cell) < 0) {
+    log_warn(LD_BUG,"couldn't format created cell");
+    return -1;
+  }
+  cell.circ_id = circ->p_circ_id;
+
   tmp_cpath = tor_malloc_zero(sizeof(crypt_path_t));
   tmp_cpath->magic = CRYPT_PATH_MAGIC;
 
-  memset(&cell, 0, sizeof(cell_t));
-  cell.command = cell_type;
-  cell.circ_id = circ->p_circ_id;
-
   circuit_set_state(TO_CIRCUIT(circ), CIRCUIT_STATE_OPEN);
 
-  memcpy(cell.payload, payload,
-         cell_type == CELL_CREATED ? ONIONSKIN_REPLY_LEN : DIGEST_LEN*2);
-
   log_debug(LD_CIRC,"init digest forward 0x%.8x, backward 0x%.8x.",
             (unsigned int)get_uint32(keys),
             (unsigned int)get_uint32(keys+20));
@@ -1941,12 +2026,9 @@ onionskin_answer(or_circuit_t *circ, uint8_t cell_type, const char *payload,
   tmp_cpath->magic = 0;
   tor_free(tmp_cpath);
 
-  if (cell_type == CELL_CREATED)
-    memcpy(circ->handshake_digest, cell.payload+DH_KEY_LEN, DIGEST_LEN);
-  else
-    memcpy(circ->handshake_digest, cell.payload+DIGEST_LEN, DIGEST_LEN);
+  memcpy(circ->rend_circ_nonce, rend_circ_nonce, DIGEST_LEN);
 
-  circ->is_first_hop = (cell_type == CELL_CREATED_FAST);
+  circ->is_first_hop = (created_cell->cell_type == CELL_CREATED_FAST);
 
   append_cell_to_circuit_queue(TO_CIRCUIT(circ),
                                circ->p_chan, &cell, CELL_DIRECTION_IN, 0);
@@ -2751,8 +2833,9 @@ onion_append_hop(crypt_path_t **head_ptr, extend_info_t *choice)
 /** Allocate a new extend_info object based on the various arguments. */
 extend_info_t *
 extend_info_new(const char *nickname, const char *digest,
-                  crypto_pk_t *onion_key,
-                  const tor_addr_t *addr, uint16_t port)
+                crypto_pk_t *onion_key,
+                const curve25519_public_key_t *curve25519_key,
+                const tor_addr_t *addr, uint16_t port)
 {
   extend_info_t *info = tor_malloc_zero(sizeof(extend_info_t));
   memcpy(info->identity_digest, digest, DIGEST_LEN);
@@ -2760,6 +2843,13 @@ extend_info_new(const char *nickname, const char *digest,
     strlcpy(info->nickname, nickname, sizeof(info->nickname));
   if (onion_key)
     info->onion_key = crypto_pk_dup_key(onion_key);
+#ifdef CURVE25519_ENABLED
+  if (curve25519_key)
+    memcpy(&info->curve25519_onion_key, curve25519_key,
+           sizeof(curve25519_public_key_t));
+#else
+  (void)curve25519_key;
+#endif
   tor_addr_copy(&info->addr, addr);
   info->port = port;
   return info;
@@ -2794,12 +2884,14 @@ extend_info_from_node(const node_t *node, int for_direct_connect)
     return extend_info_new(node->ri->nickname,
                              node->identity,
                              node->ri->onion_pkey,
+                             node->ri->onion_curve25519_pkey,
                              &ap.addr,
                              ap.port);
   else if (node->rs && node->md)
     return extend_info_new(node->rs->nickname,
                              node->identity,
                              node->md->onion_pkey,
+                             node->md->onion_curve25519_pkey,
                              &ap.addr,
                              ap.port);
   else

+ 10 - 6
src/or/circuitbuild.h

@@ -30,12 +30,15 @@ void circuit_note_clock_jumped(int seconds_elapsed);
 int circuit_extend(cell_t *cell, circuit_t *circ);
 int circuit_init_cpath_crypto(crypt_path_t *cpath, const char *key_data,
                               int reverse);
-int circuit_finish_handshake(origin_circuit_t *circ, uint8_t cell_type,
-                             const uint8_t *reply);
+struct created_cell_t;
+int circuit_finish_handshake(origin_circuit_t *circ,
+                             const struct created_cell_t *created_cell);
 int circuit_truncated(origin_circuit_t *circ, crypt_path_t *layer,
                       int reason);
-int onionskin_answer(or_circuit_t *circ, uint8_t cell_type,
-                     const char *payload, const char *keys);
+int onionskin_answer(or_circuit_t *circ,
+                     const struct created_cell_t *created_cell,
+                     const char *keys,
+                     const uint8_t *rend_circ_nonce);
 int circuit_all_predicted_ports_handled(time_t now, int *need_uptime,
                                         int *need_capacity);
 
@@ -43,8 +46,9 @@ int circuit_append_new_exit(origin_circuit_t *circ, extend_info_t *info);
 int circuit_extend_to_new_exit(origin_circuit_t *circ, extend_info_t *info);
 void onion_append_to_cpath(crypt_path_t **head_ptr, crypt_path_t *new_hop);
 extend_info_t *extend_info_new(const char *nickname, const char *digest,
-                                 crypto_pk_t *onion_key,
-                                 const tor_addr_t *addr, uint16_t port);
+                               crypto_pk_t *onion_key,
+                               const curve25519_public_key_t *curve25519_key,
+                               const tor_addr_t *addr, uint16_t port);
 extend_info_t *extend_info_from_node(const node_t *r, int for_direct_connect);
 extend_info_t *extend_info_dup(extend_info_t *info);
 void extend_info_free(extend_info_t *info);

+ 8 - 5
src/or/circuitlist.c

@@ -23,6 +23,7 @@
 #include "networkstatus.h"
 #include "nodelist.h"
 #include "onion.h"
+#include "onion_fast.h"
 #include "relay.h"
 #include "rendclient.h"
 #include "rendcommon.h"
@@ -251,7 +252,7 @@ circuit_set_state(circuit_t *circ, uint8_t state)
     smartlist_add(circuits_pending_chans, circ);
   }
   if (state == CIRCUIT_STATE_OPEN)
-    tor_assert(!circ->n_chan_onionskin);
+    tor_assert(!circ->n_chan_create_cell);
   circ->state = state;
 }
 
@@ -678,7 +679,7 @@ circuit_free(circuit_t *circ)
   }
 
   extend_info_free(circ->n_hop);
-  tor_free(circ->n_chan_onionskin);
+  tor_free(circ->n_chan_create_cell);
 
   /* Remove from map. */
   circuit_set_n_circid_chan(circ, 0, NULL);
@@ -748,7 +749,8 @@ circuit_free_cpath_node(crypt_path_t *victim)
   crypto_cipher_free(victim->b_crypto);
   crypto_digest_free(victim->f_digest);
   crypto_digest_free(victim->b_digest);
-  crypto_dh_free(victim->dh_handshake_state);
+  onion_handshake_state_release(&victim->handshake_state);
+  crypto_dh_free(victim->rend_dh_handshake_state);
   extend_info_free(victim->extend_info);
 
   memwipe(victim, 0xBB, sizeof(crypt_path_t)); /* poison memory */
@@ -1505,7 +1507,8 @@ assert_cpath_layer_ok(const crypt_path_t *cp)
       tor_assert(cp->b_crypto);
       /* fall through */
     case CPATH_STATE_CLOSED:
-      tor_assert(!cp->dh_handshake_state);
+      /*XXXX Assert that there's no handshake_state either. */
+      tor_assert(!cp->rend_dh_handshake_state);
       break;
     case CPATH_STATE_AWAITING_KEYS:
       /* tor_assert(cp->dh_handshake_state); */
@@ -1592,7 +1595,7 @@ assert_circuit_ok(const circuit_t *c)
   tor_assert(c->deliver_window >= 0);
   tor_assert(c->package_window >= 0);
   if (c->state == CIRCUIT_STATE_OPEN) {
-    tor_assert(!c->n_chan_onionskin);
+    tor_assert(!c->n_chan_create_cell);
     if (or_circ) {
       tor_assert(or_circ->n_crypto);
       tor_assert(or_circ->p_crypto);

+ 2 - 2
src/or/circuituse.c

@@ -1697,8 +1697,8 @@ circuit_get_open_circ_or_launch(entry_connection_t *conn,
               return -1;
             }
             extend_info = extend_info_new(conn->chosen_exit_name+1,
-                                            digest, NULL, &addr,
-                                            conn->socks_request->port);
+                                          digest, NULL, NULL, &addr,
+                                          conn->socks_request->port);
           } else {
             /* We will need an onion key for the router, and we
              * don't have one. Refuse or relax requirements. */

+ 59 - 16
src/or/command.c

@@ -133,11 +133,13 @@ command_process_cell(channel_t *chan, cell_t *cell)
   switch (cell->command) {
     case CELL_CREATE:
     case CELL_CREATE_FAST:
+    case CELL_CREATE2:
       ++stats_n_create_cells_processed;
       PROCESS_CELL(create, cell, chan);
       break;
     case CELL_CREATED:
     case CELL_CREATED_FAST:
+    case CELL_CREATED2:
       ++stats_n_created_cells_processed;
       PROCESS_CELL(created, cell, chan);
       break;
@@ -187,6 +189,7 @@ command_process_create_cell(cell_t *cell, channel_t *chan)
   or_circuit_t *circ;
   const or_options_t *options = get_options();
   int id_is_high;
+  create_cell_t *create_cell;
 
   tor_assert(cell);
   tor_assert(chan);
@@ -252,12 +255,18 @@ command_process_create_cell(cell_t *cell, channel_t *chan)
   circ = or_circuit_new(cell->circ_id, chan);
   circ->base_.purpose = CIRCUIT_PURPOSE_OR;
   circuit_set_state(TO_CIRCUIT(circ), CIRCUIT_STATE_ONIONSKIN_PENDING);
-  if (cell->command == CELL_CREATE) {
-    char *onionskin = tor_malloc(ONIONSKIN_CHALLENGE_LEN);
-    memcpy(onionskin, cell->payload, ONIONSKIN_CHALLENGE_LEN);
+  create_cell = tor_malloc_zero(sizeof(create_cell_t));
+  if (create_cell_parse(create_cell, cell) < 0) {
+    tor_free(create_cell);
+    log_fn(LOG_PROTOCOL_WARN, LD_OR,
+           "Bogus/unrecognized create cell; closing.");
+    circuit_mark_for_close(TO_CIRCUIT(circ), END_CIRC_REASON_TORPROTOCOL);
+    return;
+  }
 
+  if (create_cell->handshake_type != ONION_HANDSHAKE_TYPE_FAST) {
     /* hand it off to the cpuworkers, and then return. */
-    if (assign_onionskin_to_cpuworker(NULL, circ, onionskin) < 0) {
+    if (assign_onionskin_to_cpuworker(NULL, circ, create_cell) < 0) {
       log_debug(LD_GENERAL,"Failed to hand off onionskin. Closing.");
       circuit_mark_for_close(TO_CIRCUIT(circ), END_CIRC_REASON_RESOURCELIMIT);
       return;
@@ -266,26 +275,40 @@ command_process_create_cell(cell_t *cell, channel_t *chan)
   } else {
     /* This is a CREATE_FAST cell; we can handle it immediately without using
      * a CPU worker. */
-    char keys[CPATH_KEY_MATERIAL_LEN];
-    char reply[DIGEST_LEN*2];
-
-    tor_assert(cell->command == CELL_CREATE_FAST);
+    uint8_t keys[CPATH_KEY_MATERIAL_LEN];
+    uint8_t rend_circ_nonce[DIGEST_LEN];
+    int len;
+    created_cell_t created_cell;
 
     /* Make sure we never try to use the OR connection on which we
      * received this cell to satisfy an EXTEND request,  */
     channel_mark_client(chan);
 
-    if (fast_server_handshake(cell->payload, (uint8_t*)reply,
-                              (uint8_t*)keys, sizeof(keys))<0) {
+    memset(&created_cell, 0, sizeof(created_cell));
+    len = onion_skin_server_handshake(ONION_HANDSHAKE_TYPE_FAST,
+                                       create_cell->onionskin,
+                                       create_cell->handshake_len,
+                                       NULL,
+                                       created_cell.reply,
+                                       keys, CPATH_KEY_MATERIAL_LEN,
+                                       rend_circ_nonce);
+    tor_free(create_cell);
+    if (len < 0) {
       log_warn(LD_OR,"Failed to generate key material. Closing.");
       circuit_mark_for_close(TO_CIRCUIT(circ), END_CIRC_REASON_INTERNAL);
+      tor_free(create_cell);
       return;
     }
-    if (onionskin_answer(circ, CELL_CREATED_FAST, reply, keys)<0) {
+    created_cell.cell_type = CELL_CREATED_FAST;
+    created_cell.handshake_len = len;
+
+    if (onionskin_answer(circ, &created_cell,
+                         (const char *)keys, rend_circ_nonce)<0) {
       log_warn(LD_OR,"Failed to reply to CREATE_FAST cell. Closing.");
       circuit_mark_for_close(TO_CIRCUIT(circ), END_CIRC_REASON_INTERNAL);
       return;
     }
+    memwipe(keys, 0, sizeof(keys));
   }
 }
 
@@ -301,6 +324,7 @@ static void
 command_process_created_cell(cell_t *cell, channel_t *chan)
 {
   circuit_t *circ;
+  extended_cell_t extended_cell;
 
   circ = circuit_get_by_circid_channel(cell->circ_id, chan);
 
@@ -318,12 +342,18 @@ command_process_created_cell(cell_t *cell, channel_t *chan)
     return;
   }
 
+  if (created_cell_parse(&extended_cell.created_cell, cell) < 0) {
+    log_fn(LOG_PROTOCOL_WARN, LD_OR, "Unparseable created cell.");
+    circuit_mark_for_close(circ, END_CIRC_REASON_TORPROTOCOL);
+    return;
+  }
+
   if (CIRCUIT_IS_ORIGIN(circ)) { /* we're the OP. Handshake this. */
     origin_circuit_t *origin_circ = TO_ORIGIN_CIRCUIT(circ);
     int err_reason = 0;
     log_debug(LD_OR,"at OP. Finishing handshake.");
-    if ((err_reason = circuit_finish_handshake(origin_circ, cell->command,
-                                               cell->payload)) < 0) {
+    if ((err_reason = circuit_finish_handshake(origin_circ,
+                                        &extended_cell.created_cell)) < 0) {
       log_warn(LD_OR,"circuit_finish_handshake failed.");
       circuit_mark_for_close(circ, -err_reason);
       return;
@@ -336,11 +366,24 @@ command_process_created_cell(cell_t *cell, channel_t *chan)
       return;
     }
   } else { /* pack it into an extended relay cell, and send it. */
+    uint8_t command=0;
+    uint16_t len=0;
+    uint8_t payload[RELAY_PAYLOAD_SIZE];
     log_debug(LD_OR,
               "Converting created cell to extended relay cell, sending.");
-    relay_send_command_from_edge(0, circ, RELAY_COMMAND_EXTENDED,
-                                 (char*)cell->payload, ONIONSKIN_REPLY_LEN,
-                                 NULL);
+    memset(payload, 0, sizeof(payload));
+    if (extended_cell.created_cell.cell_type == CELL_CREATED2)
+      extended_cell.cell_type = RELAY_COMMAND_EXTENDED2;
+    else
+      extended_cell.cell_type = RELAY_COMMAND_EXTENDED;
+    if (extended_cell_format(&command, &len, payload, &extended_cell) < 0) {
+      log_fn(LOG_PROTOCOL_WARN, LD_OR, "Can't format extended cell.");
+      circuit_mark_for_close(circ, END_CIRC_REASON_TORPROTOCOL);
+      return;
+    }
+
+    relay_send_command_from_edge(0, circ, command,
+                                 (const char*)payload, len, NULL);
   }
 }
 

+ 1 - 0
src/or/config.c

@@ -390,6 +390,7 @@ static config_var_t option_vars_[] = {
   V(UseEntryGuards,              BOOL,     "1"),
   V(UseEntryGuardsAsDirGuards,   BOOL,     "1"),
   V(UseMicrodescriptors,         AUTOBOOL, "auto"),
+  V(UseNTorHandshake,            AUTOBOOL, "auto"),
   V(User,                        STRING,   NULL),
   V(UserspaceIOCPBuffers,        BOOL,     "0"),
   VAR("V1AuthoritativeDirectory",BOOL, V1AuthoritativeDir,   "0"),

+ 119 - 86
src/or/cpuworker.c

@@ -32,9 +32,6 @@
 
 /** The tag specifies which circuit this onionskin was from. */
 #define TAG_LEN 10
-/** How many bytes are sent from the cpuworker back to tor? */
-#define LEN_ONION_RESPONSE \
-  (1+TAG_LEN+ONIONSKIN_REPLY_LEN+CPATH_KEY_MATERIAL_LEN)
 
 /** How many cpuworkers we have running right now. */
 static int num_cpuworkers=0;
@@ -70,7 +67,7 @@ connection_cpu_finished_flushing(connection_t *conn)
 /** Pack global_id and circ_id; set *tag to the result. (See note on
  * cpuworker_main for wire format.) */
 static void
-tag_pack(char *tag, uint64_t chan_id, circid_t circ_id)
+tag_pack(uint8_t *tag, uint64_t chan_id, circid_t circ_id)
 {
   /*XXXX RETHINK THIS WHOLE MESS !!!! !NM NM NM NM*/
   /*XXXX DOUBLEPLUSTHIS!!!! AS AS AS AS*/
@@ -81,12 +78,53 @@ tag_pack(char *tag, uint64_t chan_id, circid_t circ_id)
 /** Unpack <b>tag</b> into addr, port, and circ_id.
  */
 static void
-tag_unpack(const char *tag, uint64_t *chan_id, circid_t *circ_id)
+tag_unpack(const uint8_t *tag, uint64_t *chan_id, circid_t *circ_id)
 {
   *chan_id = get_uint64(tag);
   *circ_id = get_uint16(tag+8);
 }
 
+/** Magic numbers to make sure our cpuworker_requests don't grow any
+ * mis-framing bugs. */
+#define CPUWORKER_REQUEST_MAGIC 0xda4afeed
+#define CPUWORKER_REPLY_MAGIC 0x5eedf00d
+
+/** A request sent to a cpuworker. */
+typedef struct cpuworker_request_t {
+  /** Magic number; must be CPUWORKER_REQUEST_MAGIC. */
+  uint32_t magic;
+  /** Opaque tag to identify the job */
+  uint8_t tag[TAG_LEN];
+  /** Task code. Must be one of CPUWORKER_TASK_* */
+  uint8_t task;
+
+  /** A create cell for the cpuworker to process. */
+  create_cell_t create_cell;
+
+  /* Turn the above into a tagged union if needed. */
+} cpuworker_request_t;
+
+/** A reply sent by a cpuworker. */
+typedef struct cpuworker_reply_t {
+  /** Magic number; must be CPUWORKER_REPLY_MAGIC. */
+  uint32_t magic;
+  /** Opaque tag to identify the job; matches the request's tag.*/
+  uint8_t tag[TAG_LEN];
+  /** True iff we got a successful request. */
+  uint8_t success;
+
+  /** Output of processing a create cell
+   *
+   * @{
+   */
+  /** The created cell to send back. */
+  created_cell_t created_cell;
+  /** The keys to use on this circuit. */
+  uint8_t keys[CPATH_KEY_MATERIAL_LEN];
+  /** Input to use for authenticating introduce1 cells. */
+  uint8_t rend_auth_material[DIGEST_LEN];
+} cpuworker_reply_t;
+
 /** Called when the onion key has changed and we need to spawn new
  * cpuworkers.  Close all currently idle cpuworkers, and mark the last
  * rotation time as now.
@@ -132,8 +170,6 @@ connection_cpu_reached_eof(connection_t *conn)
 int
 connection_cpu_process_inbuf(connection_t *conn)
 {
-  char success;
-  char buf[LEN_ONION_RESPONSE];
   uint64_t chan_id;
   circid_t circ_id;
   channel_t *p_chan = NULL;
@@ -146,15 +182,16 @@ connection_cpu_process_inbuf(connection_t *conn)
     return 0;
 
   if (conn->state == CPUWORKER_STATE_BUSY_ONION) {
-    if (connection_get_inbuf_len(conn) < LEN_ONION_RESPONSE)
+    cpuworker_reply_t rpl;
+    if (connection_get_inbuf_len(conn) < sizeof(cpuworker_reply_t))
       return 0; /* not yet */
-    tor_assert(connection_get_inbuf_len(conn) == LEN_ONION_RESPONSE);
+    tor_assert(connection_get_inbuf_len(conn) == sizeof(cpuworker_reply_t));
 
-    connection_fetch_from_buf(&success,1,conn);
-    connection_fetch_from_buf(buf,LEN_ONION_RESPONSE-1,conn);
+    connection_fetch_from_buf((void*)&rpl,sizeof(cpuworker_reply_t),conn);
 
+    tor_assert(rpl.magic == CPUWORKER_REPLY_MAGIC);
     /* parse out the circ it was talking about */
-    tag_unpack(buf, &chan_id, &circ_id);
+    tag_unpack(rpl.tag, &chan_id, &circ_id);
     circ = NULL;
     log_debug(LD_OR,
               "Unpacking cpuworker reply, chan_id is " U64_FORMAT
@@ -165,7 +202,7 @@ connection_cpu_process_inbuf(connection_t *conn)
     if (p_chan)
       circ = circuit_get_by_circid_channel(circ_id, p_chan);
 
-    if (success == 0) {
+    if (rpl.success == 0) {
       log_debug(LD_OR,
                 "decoding onionskin failed. "
                 "(Old key or bad software.) Closing.");
@@ -183,8 +220,10 @@ connection_cpu_process_inbuf(connection_t *conn)
       goto done_processing;
     }
     tor_assert(! CIRCUIT_IS_ORIGIN(circ));
-    if (onionskin_answer(TO_OR_CIRCUIT(circ), CELL_CREATED, buf+TAG_LEN,
-                         buf+TAG_LEN+ONIONSKIN_REPLY_LEN) < 0) {
+    if (onionskin_answer(TO_OR_CIRCUIT(circ),
+                         &rpl.created_cell,
+                         (const char*)rpl.keys,
+                         rpl.rend_auth_material) < 0) {
       log_warn(LD_OR,"onionskin_answer failed. Closing.");
       circuit_mark_for_close(circ, END_CIRC_REASON_INTERNAL);
       goto done_processing;
@@ -211,32 +250,21 @@ connection_cpu_process_inbuf(connection_t *conn)
  * Read and writes from fdarray[1].  Reads requests, writes answers.
  *
  *   Request format:
- *          Task type           [1 byte, always CPUWORKER_TASK_ONION]
- *          Opaque tag          TAG_LEN
- *          Onionskin challenge ONIONSKIN_CHALLENGE_LEN
+ *          cpuworker_request_t.
  *   Response format:
- *          Success/failure     [1 byte, boolean.]
- *          Opaque tag          TAG_LEN
- *          Onionskin challenge ONIONSKIN_REPLY_LEN
- *          Negotiated keys     KEY_LEN*2+DIGEST_LEN*2
- *
- *  (Note: this _should_ be by addr/port, since we're concerned with specific
- * connections, not with routers (where we'd use identity).)
+ *          cpuworker_reply_t
  */
 static void
 cpuworker_main(void *data)
 {
-  char question[ONIONSKIN_CHALLENGE_LEN];
-  uint8_t question_type;
+  /* For talking to the parent thread/process */
   tor_socket_t *fdarray = data;
   tor_socket_t fd;
 
   /* variables for onion processing */
-  char keys[CPATH_KEY_MATERIAL_LEN];
-  char reply_to_proxy[ONIONSKIN_REPLY_LEN];
-  char buf[LEN_ONION_RESPONSE];
-  char tag[TAG_LEN];
-  crypto_pk_t *onion_key = NULL, *last_onion_key = NULL;
+  server_onion_keys_t onion_keys;
+  cpuworker_request_t req;
+  cpuworker_reply_t rpl;
 
   fd = fdarray[1]; /* this side is ours */
 #ifndef TOR_IS_MULTITHREADED
@@ -247,68 +275,68 @@ cpuworker_main(void *data)
 #endif
   tor_free(data);
 
-  dup_onion_keys(&onion_key, &last_onion_key);
+  setup_server_onion_keys(&onion_keys);
 
   for (;;) {
-    ssize_t r;
-
-    if ((r = recv(fd, (void *)&question_type, 1, 0)) != 1) {
-//      log_fn(LOG_ERR,"read type failed. Exiting.");
-      if (r == 0) {
-        log_info(LD_OR,
-                 "CPU worker exiting because Tor process closed connection "
-                 "(either rotated keys or died).");
-      } else {
-        log_info(LD_OR,
-                 "CPU worker exiting because of error on connection to Tor "
-                 "process.");
-        log_info(LD_OR,"(Error on "TOR_SOCKET_T_FORMAT" was %s)",
-                 fd, tor_socket_strerror(tor_socket_errno(fd)));
-      }
-      goto end;
-    }
-    tor_assert(question_type == CPUWORKER_TASK_ONION);
-
-    if (read_all(fd, tag, TAG_LEN, 1) != TAG_LEN) {
-      log_err(LD_BUG,"read tag failed. Exiting.");
+    if (read_all(fd, (void *)&req, sizeof(req), 1) != sizeof(req)) {
+      log_info(LD_OR, "read request failed. Exiting.");
       goto end;
     }
-
-    if (read_all(fd, question, ONIONSKIN_CHALLENGE_LEN, 1) !=
-        ONIONSKIN_CHALLENGE_LEN) {
-      log_err(LD_BUG,"read question failed. Exiting.");
-      goto end;
-    }
-
-    if (question_type == CPUWORKER_TASK_ONION) {
-      if (onion_skin_server_handshake(question, onion_key, last_onion_key,
-          reply_to_proxy, keys, CPATH_KEY_MATERIAL_LEN) < 0) {
+    tor_assert(req.magic == CPUWORKER_REQUEST_MAGIC);
+
+    memset(&rpl, 0, sizeof(rpl));
+
+    if (req.task == CPUWORKER_TASK_ONION) {
+      const create_cell_t *cc = &req.create_cell;
+      created_cell_t *cell_out = &rpl.created_cell;
+      int n;
+      n = onion_skin_server_handshake(cc->handshake_type,
+                                      cc->onionskin, cc->handshake_len,
+                                      &onion_keys,
+                                      cell_out->reply,
+                                      rpl.keys, CPATH_KEY_MATERIAL_LEN,
+                                      rpl.rend_auth_material);
+      if (n < 0) {
         /* failure */
         log_debug(LD_OR,"onion_skin_server_handshake failed.");
-        *buf = 0; /* indicate failure in first byte */
-        memcpy(buf+1,tag,TAG_LEN);
-        /* send all zeros as answer */
-        memset(buf+1+TAG_LEN, 0, LEN_ONION_RESPONSE-(1+TAG_LEN));
+        memset(&rpl, 0, sizeof(rpl));
+        memcpy(rpl.tag, req.tag, TAG_LEN);
+        rpl.success = 0;
       } else {
         /* success */
         log_debug(LD_OR,"onion_skin_server_handshake succeeded.");
-        buf[0] = 1; /* 1 means success */
-        memcpy(buf+1,tag,TAG_LEN);
-        memcpy(buf+1+TAG_LEN,reply_to_proxy,ONIONSKIN_REPLY_LEN);
-        memcpy(buf+1+TAG_LEN+ONIONSKIN_REPLY_LEN,keys,CPATH_KEY_MATERIAL_LEN);
+        memcpy(rpl.tag, req.tag, TAG_LEN);
+        cell_out->handshake_len = n;
+        switch (cc->cell_type) {
+        case CELL_CREATE:
+          cell_out->cell_type = CELL_CREATED; break;
+        case CELL_CREATE2:
+          cell_out->cell_type = CELL_CREATED2; break;
+        case CELL_CREATE_FAST:
+          cell_out->cell_type = CELL_CREATED_FAST; break;
+        default:
+          tor_assert(0);
+          goto end;
+        }
+        rpl.success = 1;
       }
-      if (write_all(fd, buf, LEN_ONION_RESPONSE, 1) != LEN_ONION_RESPONSE) {
+      rpl.magic = CPUWORKER_REPLY_MAGIC;
+      if (write_all(fd, (void*)&rpl, sizeof(rpl), 1) != sizeof(rpl)) {
         log_err(LD_BUG,"writing response buf failed. Exiting.");
         goto end;
       }
       log_debug(LD_OR,"finished writing response.");
+    } else if (req.task == CPUWORKER_TASK_SHUTDOWN) {
+      log_info(LD_OR,"Clean shutdown: exiting");
+      goto end;
     }
+    memwipe(&req, 0, sizeof(req));
+    memwipe(&rpl, 0, sizeof(req));
   }
  end:
-  if (onion_key)
-    crypto_pk_free(onion_key);
-  if (last_onion_key)
-    crypto_pk_free(last_onion_key);
+  memwipe(&req, 0, sizeof(req));
+  memwipe(&rpl, 0, sizeof(req));
+  release_server_onion_keys(&onion_keys);
   tor_close_socket(fd);
   crypto_thread_cleanup();
   spawn_exit();
@@ -391,7 +419,7 @@ static void
 process_pending_task(connection_t *cpuworker)
 {
   or_circuit_t *circ;
-  char *onionskin = NULL;
+  create_cell_t *onionskin = NULL;
 
   tor_assert(cpuworker);
 
@@ -444,10 +472,10 @@ cull_wedged_cpuworkers(void)
  */
 int
 assign_onionskin_to_cpuworker(connection_t *cpuworker,
-                              or_circuit_t *circ, char *onionskin)
+                              or_circuit_t *circ,
+                              create_cell_t *onionskin)
 {
-  char qbuf[1];
-  char tag[TAG_LEN];
+  cpuworker_request_t req;
   time_t now = approx_time();
   static time_t last_culled_cpuworkers = 0;
 
@@ -483,7 +511,10 @@ assign_onionskin_to_cpuworker(connection_t *cpuworker,
       tor_free(onionskin);
       return -1;
     }
-    tag_pack(tag, circ->p_chan->global_identifier,
+
+    memset(&req, 0, sizeof(req));
+    req.magic = CPUWORKER_REQUEST_MAGIC;
+    tag_pack(req.tag, circ->p_chan->global_identifier,
              circ->p_circ_id);
 
     cpuworker->state = CPUWORKER_STATE_BUSY_ONION;
@@ -493,11 +524,13 @@ assign_onionskin_to_cpuworker(connection_t *cpuworker,
     cpuworker->timestamp_lastwritten = time(NULL);
     num_cpuworkers_busy++;
 
-    qbuf[0] = CPUWORKER_TASK_ONION;
-    connection_write_to_buf(qbuf, 1, cpuworker);
-    connection_write_to_buf(tag, sizeof(tag), cpuworker);
-    connection_write_to_buf(onionskin, ONIONSKIN_CHALLENGE_LEN, cpuworker);
+    req.task = CPUWORKER_TASK_ONION;
+    memcpy(&req.create_cell, onionskin, sizeof(create_cell_t));
+
     tor_free(onionskin);
+
+    connection_write_to_buf((void*)&req, sizeof(req), cpuworker);
+    memwipe(&req, 0, sizeof(req));
   }
   return 0;
 }

+ 2 - 1
src/or/cpuworker.h

@@ -17,9 +17,10 @@ void cpuworkers_rotate(void);
 int connection_cpu_finished_flushing(connection_t *conn);
 int connection_cpu_reached_eof(connection_t *conn);
 int connection_cpu_process_inbuf(connection_t *conn);
+struct create_cell_t;
 int assign_onionskin_to_cpuworker(connection_t *cpuworker,
                                   or_circuit_t *circ,
-                                  char *onionskin);
+                                  struct create_cell_t *onionskin);
 
 #endif
 

+ 2 - 1
src/or/dirserv.c

@@ -74,7 +74,8 @@ static const struct consensus_method_range_t {
 } microdesc_consensus_methods[] = {
   {MIN_METHOD_FOR_MICRODESC, MIN_METHOD_FOR_A_LINES - 1},
   {MIN_METHOD_FOR_A_LINES, MIN_METHOD_FOR_P6_LINES - 1},
-  {MIN_METHOD_FOR_P6_LINES, MAX_SUPPORTED_CONSENSUS_METHOD},
+  {MIN_METHOD_FOR_P6_LINES, MIN_METHOD_FOR_NTOR_KEY - 1},
+  {MIN_METHOD_FOR_NTOR_KEY, MAX_SUPPORTED_CONSENSUS_METHOD},
   {-1, -1}
 };
 

+ 9 - 0
src/or/dirvote.c

@@ -3554,6 +3554,15 @@ dirvote_create_microdescriptor(const routerinfo_t *ri, int consensus_method)
 
   smartlist_add_asprintf(chunks, "onion-key\n%s", key);
 
+  if (consensus_method >= MIN_METHOD_FOR_NTOR_KEY &&
+      ri->onion_curve25519_pkey) {
+    char kbuf[128];
+    base64_encode(kbuf, sizeof(kbuf),
+                  (const char*)ri->onion_curve25519_pkey->public_key,
+                  CURVE25519_PUBKEY_LEN);
+    smartlist_add_asprintf(chunks, "ntor-onion-key %s", kbuf);
+  }
+
   if (consensus_method >= MIN_METHOD_FOR_A_LINES &&
       !tor_addr_is_null(&ri->ipv6_addr) && ri->ipv6_orport)
     smartlist_add_asprintf(chunks, "a %s\n",

+ 5 - 1
src/or/dirvote.h

@@ -20,7 +20,7 @@
 #define MIN_VOTE_INTERVAL 300
 
 /** The highest consensus method that we currently support. */
-#define MAX_SUPPORTED_CONSENSUS_METHOD 15
+#define MAX_SUPPORTED_CONSENSUS_METHOD 16
 
 /** Lowest consensus method that contains a 'directory-footer' marker */
 #define MIN_METHOD_FOR_FOOTER 9
@@ -48,6 +48,10 @@
 /** Lowest consensus method where microdescs may include a "p6" line. */
 #define MIN_METHOD_FOR_P6_LINES 15
 
+/** Lowest consensus method where microdescs may include an onion-key-ntor
+ * line */
+#define MIN_METHOD_FOR_NTOR_KEY 16
+
 void dirvote_free_all(void);
 
 /* vote manipulation */

+ 1 - 1
src/or/entrynodes.c

@@ -1611,7 +1611,7 @@ routerset_contains_bridge(const routerset_t *routerset,
     return 0;
 
   extinfo = extend_info_new(
-         NULL, bridge->identity, NULL, &bridge->addr, bridge->port);
+         NULL, bridge->identity, NULL, NULL, &bridge->addr, bridge->port);
   result = routerset_contains_extendinfo(routerset, extinfo);
   extend_info_free(extinfo);
   return result;

+ 14 - 1
src/or/include.am

@@ -15,6 +15,12 @@ else
 evdns_source=src/ext/eventdns.c
 endif
 
+if CURVE25519_ENABLED
+onion_ntor_source=src/or/onion_ntor.c
+else
+onion_ntor_source=
+endif
+
 src_or_libtor_a_SOURCES = \
 	src/or/addressmap.c				\
 	src/or/buffers.c				\
@@ -47,6 +53,8 @@ src_or_libtor_a_SOURCES = \
 	src/or/networkstatus.c				\
 	src/or/nodelist.c				\
 	src/or/onion.c					\
+	src/or/onion_fast.c				\
+	src/or/onion_tap.c				\
 	src/or/transports.c				\
 	src/or/policies.c				\
 	src/or/reasons.c				\
@@ -65,6 +73,7 @@ src_or_libtor_a_SOURCES = \
 	src/or/status.c					\
 	$(evdns_source)					\
 	$(tor_platform_source)				\
+	$(onion_ntor_source)				\
 	src/or/config_codedigest.c
 
 #libtor_a_LIBADD = ../common/libor.a ../common/libor-crypto.a \
@@ -86,7 +95,8 @@ AM_CPPFLAGS += -DSHARE_DATADIR="\"$(datadir)\"" \
 
 
 src_or_tor_LDFLAGS = @TOR_LDFLAGS_zlib@ @TOR_LDFLAGS_openssl@ @TOR_LDFLAGS_libevent@
-src_or_tor_LDADD = src/or/libtor.a src/common/libor.a src/common/libor-crypto.a \
+src_or_tor_LDADD = src/or/libtor.a src/common/libor.a \
+	src/common/libor-crypto.a $(LIBDONNA) \
 	src/common/libor-event.a \
 	@TOR_ZLIB_LIBS@ @TOR_LIB_MATH@ @TOR_LIBEVENT_LIBS@ @TOR_OPENSSL_LIBS@ \
 	@TOR_LIB_WS32@ @TOR_LIB_GDI@
@@ -125,6 +135,9 @@ ORHEADERS = \
 	src/or/nodelist.h				\
 	src/or/ntmain.h					\
 	src/or/onion.h					\
+	src/or/onion_fast.h				\
+	src/or/onion_ntor.h				\
+	src/or/onion_tap.h				\
 	src/or/or.h					\
 	src/or/transports.h				\
 	src/or/policies.h				\

+ 1 - 0
src/or/microdesc.c

@@ -575,6 +575,7 @@ microdesc_free(microdesc_t *md)
 
   if (md->onion_pkey)
     crypto_pk_free(md->onion_pkey);
+  tor_free(md->onion_curve25519_pkey);
   if (md->body && md->saved_location != SAVED_IN_CACHE)
     tor_free(md->body);
 

+ 12 - 0
src/or/nodelist.c

@@ -916,6 +916,18 @@ node_get_pref_ipv6_orport(const node_t *node, tor_addr_port_t *ap_out)
   }
 }
 
+/** Return true iff <b>node</b> has a curve25519 onion key. */
+int
+node_has_curve25519_onion_key(const node_t *node)
+{
+  if (node->ri)
+    return node->ri->onion_curve25519_pkey != NULL;
+  else if (node->md)
+    return node->md->onion_curve25519_pkey != NULL;
+  else
+    return 0;
+}
+
 /** Refresh the country code of <b>ri</b>.  This function MUST be called on
  * each router when the GeoIP database is reloaded, and on all new routers. */
 void

+ 1 - 0
src/or/nodelist.h

@@ -54,6 +54,7 @@ int node_ipv6_preferred(const node_t *node);
 int node_get_prim_orport(const node_t *node, tor_addr_port_t *ap_out);
 void node_get_pref_orport(const node_t *node, tor_addr_port_t *ap_out);
 void node_get_pref_ipv6_orport(const node_t *node, tor_addr_port_t *ap_out);
+int node_has_curve25519_onion_key(const node_t *node);
 
 smartlist_t *nodelist_get_list(void);
 

+ 795 - 237
src/or/onion.c

@@ -6,21 +6,26 @@
 
 /**
  * \file onion.c
- * \brief Functions to queue create cells, and handle onionskin
- * parsing and creation.
+ * \brief Functions to queue create cells, wrap the various onionskin types,
+ * and parse and create the CREATE cell and its allies.
  **/
 
 #include "or.h"
 #include "circuitlist.h"
 #include "config.h"
 #include "onion.h"
+#include "onion_fast.h"
+#include "onion_ntor.h"
+#include "onion_tap.h"
+#include "relay.h"
 #include "rephist.h"
+#include "router.h"
 
 /** Type for a linked list of circuits that are waiting for a free CPU worker
  * to process a waiting onion handshake. */
 typedef struct onion_queue_t {
   or_circuit_t *circ;
-  char *onionskin;
+  create_cell_t *onionskin;
   time_t when_added;
   struct onion_queue_t *next;
 } onion_queue_t;
@@ -37,11 +42,13 @@ static onion_queue_t *ol_tail=NULL;
 /** Length of ol_list */
 static int ol_length=0;
 
+/* XXXX Check lengths vs MAX_ONIONSKIN_{CHALLENGE,REPLY}_LEN */
+
 /** Add <b>circ</b> to the end of ol_list and return 0, except
  * if ol_list is too long, in which case do nothing and return -1.
  */
 int
-onion_pending_add(or_circuit_t *circ, char *onionskin)
+onion_pending_add(or_circuit_t *circ, create_cell_t *onionskin)
 {
   onion_queue_t *tmp;
   time_t now = time(NULL);
@@ -98,7 +105,7 @@ onion_pending_add(or_circuit_t *circ, char *onionskin)
  * NULL if the list is empty.
  */
 or_circuit_t *
-onion_next_task(char **onionskin_out)
+onion_next_task(create_cell_t **onionskin_out)
 {
   or_circuit_t *circ;
 
@@ -157,292 +164,843 @@ onion_pending_remove(or_circuit_t *circ)
   tor_free(victim);
 }
 
-/*----------------------------------------------------------------------*/
+/** Remove all circuits from the pending list.  Called from tor_free_all. */
+void
+clear_pending_onions(void)
+{
+  while (ol_list) {
+    onion_queue_t *victim = ol_list;
+    ol_list = victim->next;
+    tor_free(victim->onionskin);
+    tor_free(victim);
+  }
+  ol_list = ol_tail = NULL;
+  ol_length = 0;
+}
+
+/* ============================================================ */
 
-/** Given a router's 128 byte public key,
- * stores the following in onion_skin_out:
- *   - [42 bytes] OAEP padding
- *   - [16 bytes] Symmetric key for encrypting blob past RSA
- *   - [70 bytes] g^x part 1 (inside the RSA)
- *   - [58 bytes] g^x part 2 (symmetrically encrypted)
- *
- * Stores the DH private key into handshake_state_out for later completion
- * of the handshake.
- *
- * The meeting point/cookies and auth are zeroed out for now.
+/** Fill in a server_onion_keys_t object at <b>keys</b> with all of the keys
+ * and other info we might need to do onion handshakes.  (We make a copy of
+ * our keys for each cpuworker to avoid race conditions with the main thread,
+ * and to avoid locking) */
+void
+setup_server_onion_keys(server_onion_keys_t *keys)
+{
+  memset(keys, 0, sizeof(server_onion_keys_t));
+  memcpy(keys->my_identity, router_get_my_id_digest(), DIGEST_LEN);
+  dup_onion_keys(&keys->onion_key, &keys->last_onion_key);
+#ifdef CURVE25519_ENABLED
+  keys->curve25519_key_map = construct_ntor_key_map();
+  keys->junk_keypair = tor_malloc_zero(sizeof(curve25519_keypair_t));
+  curve25519_keypair_generate(keys->junk_keypair, 0);
+#endif
+}
+
+/** Release all storage held in <b>keys</b>, but do not free <b>keys</b>
+ * itself (as it's likely to be stack-allocated.) */
+void
+release_server_onion_keys(server_onion_keys_t *keys)
+{
+  if (! keys)
+    return;
+
+  crypto_pk_free(keys->onion_key);
+  crypto_pk_free(keys->last_onion_key);
+#ifdef CURVE25519_ENABLED
+  ntor_key_map_free(keys->curve25519_key_map);
+  tor_free(keys->junk_keypair);
+#endif
+  memset(keys, 0, sizeof(server_onion_keys_t));
+}
+
+/** Release whatever storage is held in <b>state</b>, depending on its
+ * type, and clear its pointer. */
+void
+onion_handshake_state_release(onion_handshake_state_t *state)
+{
+  switch (state->tag) {
+  case ONION_HANDSHAKE_TYPE_TAP:
+    crypto_dh_free(state->u.tap);
+    state->u.tap = NULL;
+    break;
+  case ONION_HANDSHAKE_TYPE_FAST:
+    fast_handshake_state_free(state->u.fast);
+    state->u.fast = NULL;
+    break;
+#ifdef CURVE25519_ENABLED
+  case ONION_HANDSHAKE_TYPE_NTOR:
+    ntor_handshake_state_free(state->u.ntor);
+    state->u.ntor = NULL;
+    break;
+#endif
+  default:
+    log_warn(LD_BUG, "called with unknown handshake state type %d",
+             (int)state->tag);
+    tor_fragile_assert();
+  }
+}
+
+/** Perform the first step of a circuit-creation handshake of type <b>type</b>
+ * (one of ONION_HANDSHAKE_TYPE_*): generate the initial "onion skin" in
+ * <b>onion_skin_out</b>, and store any state information in <b>state_out</b>.
+ * Return -1 on failure, and the length of the onionskin on acceptance.
  */
 int
-onion_skin_create(crypto_pk_t *dest_router_key,
-                  crypto_dh_t **handshake_state_out,
-                  char *onion_skin_out) /* ONIONSKIN_CHALLENGE_LEN bytes */
+onion_skin_create(int type,
+                  const extend_info_t *node,
+                  onion_handshake_state_t *state_out,
+                  uint8_t *onion_skin_out)
 {
-  char challenge[DH_KEY_LEN];
-  crypto_dh_t *dh = NULL;
-  int dhbytes, pkbytes;
+  int r = -1;
+
+  switch (type) {
+  case ONION_HANDSHAKE_TYPE_TAP:
+    if (!node->onion_key)
+      return -1;
+
+    if (onion_skin_TAP_create(node->onion_key,
+                              &state_out->u.tap,
+                              (char*)onion_skin_out) < 0)
+      return -1;
+
+    r = TAP_ONIONSKIN_CHALLENGE_LEN;
+    break;
+  case ONION_HANDSHAKE_TYPE_FAST:
+    if (fast_onionskin_create(&state_out->u.fast, onion_skin_out) < 0)
+      return -1;
+
+    r = CREATE_FAST_LEN;
+    break;
+  case ONION_HANDSHAKE_TYPE_NTOR:
+#ifdef CURVE25519_ENABLED
+    if (tor_mem_is_zero((const char*)node->curve25519_onion_key.public_key,
+                        CURVE25519_PUBKEY_LEN))
+      return -1;
+    if (onion_skin_ntor_create((const uint8_t*)node->identity_digest,
+                               &node->curve25519_onion_key,
+                               &state_out->u.ntor,
+                               onion_skin_out) < 0)
+      return -1;
+
+    r = NTOR_ONIONSKIN_LEN;
+#else
+    return -1;
+#endif
+    break;
+  default:
+    log_warn(LD_BUG, "called with unknown handshake state type %d", type);
+    tor_fragile_assert();
+    r = -1;
+  }
+
+  if (r > 0)
+    state_out->tag = (uint16_t) type;
+
+  return r;
+}
+
+/** Perform the second (server-side) step of a circuit-creation handshake of
+ * type <b>type</b>, responding to the client request in <b>onion_skin</b>
+ * using the keys in <b>keys</b>.  On success, write our response into
+ * <b>reply_out</b>, generate <b>keys_out_len</b> bytes worth of key material
+ * in <b>keys_out_len</b>, a hidden service nonce to <b>rend_nonce_out</b>,
+ * and return the length of the reply. On failure, return -1.
+ */
+int
+onion_skin_server_handshake(int type,
+                      const uint8_t *onion_skin, size_t onionskin_len,
+                      const server_onion_keys_t *keys,
+                      uint8_t *reply_out,
+                      uint8_t *keys_out, size_t keys_out_len,
+                      uint8_t *rend_nonce_out)
+{
+  int r = -1;
 
-  tor_assert(dest_router_key);
-  tor_assert(handshake_state_out);
-  tor_assert(onion_skin_out);
-  *handshake_state_out = NULL;
-  memset(onion_skin_out, 0, ONIONSKIN_CHALLENGE_LEN);
+  switch (type) {
+  case ONION_HANDSHAKE_TYPE_TAP:
+    if (onionskin_len != TAP_ONIONSKIN_CHALLENGE_LEN)
+      return -1;
+    if (onion_skin_TAP_server_handshake((const char*)onion_skin,
+                                        keys->onion_key, keys->last_onion_key,
+                                        (char*)reply_out,
+                                        (char*)keys_out, keys_out_len)<0)
+      return -1;
+    r = TAP_ONIONSKIN_REPLY_LEN;
+    memcpy(rend_nonce_out, reply_out+DH_KEY_LEN, DIGEST_LEN);
+    break;
+  case ONION_HANDSHAKE_TYPE_FAST:
+    if (onionskin_len != CREATE_FAST_LEN)
+      return -1;
+    if (fast_server_handshake(onion_skin, reply_out, keys_out, keys_out_len)<0)
+      return -1;
+    r = CREATED_FAST_LEN;
+    memcpy(rend_nonce_out, reply_out+DIGEST_LEN, DIGEST_LEN);
+    break;
+  case ONION_HANDSHAKE_TYPE_NTOR:
+#ifdef CURVE25519_ENABLED
+    if (onionskin_len < NTOR_ONIONSKIN_LEN)
+      return -1;
+    {
+      size_t keys_tmp_len = keys_out_len + DIGEST_LEN;
+      uint8_t *keys_tmp = tor_malloc(keys_out_len + DIGEST_LEN);
+
+      if (onion_skin_ntor_server_handshake(
+                                   onion_skin, keys->curve25519_key_map,
+                                   keys->junk_keypair,
+                                   keys->my_identity,
+                                   reply_out, keys_tmp, keys_tmp_len)<0) {
+        tor_free(keys_tmp);
+        return -1;
+      }
+      memcpy(keys_out, keys_tmp, keys_out_len);
+      memcpy(rend_nonce_out, keys_tmp+keys_out_len, DIGEST_LEN);
+      memwipe(keys_tmp, 0, keys_tmp_len);
+      tor_free(keys_tmp);
+      r = NTOR_REPLY_LEN;
+    }
+#else
+    return -1;
+#endif
+    break;
+  default:
+    log_warn(LD_BUG, "called with unknown handshake state type %d", type);
+    tor_fragile_assert();
+    return -1;
+  }
 
-  if (!(dh = crypto_dh_new(DH_TYPE_CIRCUIT)))
-    goto err;
+  return r;
+}
 
-  dhbytes = crypto_dh_get_bytes(dh);
-  pkbytes = (int) crypto_pk_keysize(dest_router_key);
-  tor_assert(dhbytes == 128);
-  tor_assert(pkbytes == 128);
+/** Perform the final (client-side) step of a circuit-creation handshake of
+ * type <b>type</b>, using our state in <b>handshake_state</b> and the
+ * server's response in <b>reply</b> On success, generate <b>keys_out_len</b>
+ * bytes worth of key material in <b>keys_out_len</b>, set
+ * <b>rend_authenticator_out</b> to the "KH" field that can be used to
+ * establish introduction points at this hop, and return 0.  On failure,
+ * return -1. */
+int
+onion_skin_client_handshake(int type,
+                      const onion_handshake_state_t *handshake_state,
+                      const uint8_t *reply, size_t reply_len,
+                      uint8_t *keys_out, size_t keys_out_len,
+                      uint8_t *rend_authenticator_out)
+{
+  if (handshake_state->tag != type)
+    return -1;
 
-  if (crypto_dh_get_public(dh, challenge, dhbytes))
-    goto err;
+  switch (type) {
+  case ONION_HANDSHAKE_TYPE_TAP:
+    if (reply_len != TAP_ONIONSKIN_REPLY_LEN)
+      return -1;
+    if (onion_skin_TAP_client_handshake(handshake_state->u.tap,
+                                        (const char*)reply,
+                                        (char *)keys_out, keys_out_len) < 0)
+      return -1;
 
-  note_crypto_pk_op(ENC_ONIONSKIN);
+    memcpy(rend_authenticator_out, reply+DH_KEY_LEN, DIGEST_LEN);
+
+    return 0;
+  case ONION_HANDSHAKE_TYPE_FAST:
+    if (reply_len != CREATED_FAST_LEN)
+      return -1;
+    if (fast_client_handshake(handshake_state->u.fast, reply,
+                              keys_out, keys_out_len) < 0)
+      return -1;
+
+    memcpy(rend_authenticator_out, reply+DIGEST_LEN, DIGEST_LEN);
+    return 0;
+#ifdef CURVE25519_ENABLED
+  case ONION_HANDSHAKE_TYPE_NTOR:
+    if (reply_len < NTOR_REPLY_LEN)
+      return -1;
+    {
+      size_t keys_tmp_len = keys_out_len + DIGEST_LEN;
+      uint8_t *keys_tmp = tor_malloc(keys_tmp_len);
+      if (onion_skin_ntor_client_handshake(handshake_state->u.ntor,
+                                           reply,
+                                           keys_tmp, keys_tmp_len) < 0) {
+        tor_free(keys_tmp);
+        return -1;
+      }
+      memcpy(keys_out, keys_tmp, keys_out_len);
+      memcpy(rend_authenticator_out, keys_tmp + keys_out_len, DIGEST_LEN);
+      memwipe(keys_tmp, 0, keys_tmp_len);
+      tor_free(keys_tmp);
+    }
+    return 0;
+#endif
+  default:
+    log_warn(LD_BUG, "called with unknown handshake state type %d", type);
+    tor_fragile_assert();
+    return -1;
+  }
+}
 
-  /* set meeting point, meeting cookie, etc here. Leave zero for now. */
-  if (crypto_pk_public_hybrid_encrypt(dest_router_key, onion_skin_out,
-                                      ONIONSKIN_CHALLENGE_LEN,
-                                      challenge, DH_KEY_LEN,
-                                      PK_PKCS1_OAEP_PADDING, 1)<0)
-    goto err;
+/** Helper: return 0 if <b>cell</b> appears valid, -1 otherwise. If
+ * <b>unknown_ok</b> is true, allow cells with handshake types we don't
+ * recognize. */
+static int
+check_create_cell(const create_cell_t *cell, int unknown_ok)
+{
+  switch (cell->cell_type) {
+  case CELL_CREATE:
+    if (cell->handshake_type != ONION_HANDSHAKE_TYPE_TAP &&
+        cell->handshake_type != ONION_HANDSHAKE_TYPE_NTOR)
+      return -1;
+    break;
+  case CELL_CREATE_FAST:
+    if (cell->handshake_type != ONION_HANDSHAKE_TYPE_FAST)
+      return -1;
+    break;
+  case CELL_CREATE2:
+    break;
+  default:
+    return -1;
+  }
 
-  memwipe(challenge, 0, sizeof(challenge));
-  *handshake_state_out = dh;
+  switch (cell->handshake_type) {
+  case ONION_HANDSHAKE_TYPE_TAP:
+    if (cell->handshake_len != TAP_ONIONSKIN_CHALLENGE_LEN)
+      return -1;
+    break;
+  case ONION_HANDSHAKE_TYPE_FAST:
+    if (cell->handshake_len != CREATE_FAST_LEN)
+      return -1;
+    break;
+#ifdef CURVE25519_ENABLED
+  case ONION_HANDSHAKE_TYPE_NTOR:
+    if (cell->handshake_len != NTOR_ONIONSKIN_LEN)
+      return -1;
+    break;
+#endif
+  default:
+    if (! unknown_ok)
+      return -1;
+  }
 
   return 0;
- err:
-  memwipe(challenge, 0, sizeof(challenge));
-  if (dh) crypto_dh_free(dh);
-  return -1;
 }
 
-/** Given an encrypted DH public key as generated by onion_skin_create,
- * and the private key for this onion router, generate the reply (128-byte
- * DH plus the first 20 bytes of shared key material), and store the
- * next key_out_len bytes of key material in key_out.
+/** Helper: parse the CREATE2 payload at <b>p</b>, which could be up to
+ * <b>p_len</b> bytes long, and use it to fill the fields of
+ * <b>cell_out</b>. Return 0 on success and -1 on failure.
+ *
+ * Note that part of the body of an EXTEND2 cell is a CREATE2 payload, so
+ * this function is also used for parsing those.
  */
+static int
+parse_create2_payload(create_cell_t *cell_out, const uint8_t *p, size_t p_len)
+{
+  if (p_len < 4)
+    return -1;
+  cell_out->cell_type = CELL_CREATE2;
+  cell_out->handshake_type = ntohs(get_uint16(p));
+  cell_out->handshake_len = ntohs(get_uint16(p+2));
+  if (cell_out->handshake_len > CELL_PAYLOAD_SIZE - 4 ||
+      cell_out->handshake_len > p_len - 4)
+    return -1;
+  if (cell_out->handshake_type == ONION_HANDSHAKE_TYPE_FAST)
+    return -1;
+  memcpy(cell_out->onionskin, p+4, cell_out->handshake_len);
+  return 0;
+}
+
+/** Magic string which, in a CREATE or EXTEND cell, indicates that a seeming
+ * TAP payload is really an ntor payload.  We'd do away with this if every
+ * relay supported EXTEND2, but we want to be able to extend from A to B with
+ * ntor even when A doesn't understand EXTEND2 and so can't generate a
+ * CREATE2 cell.
+ **/
+#define NTOR_CREATE_MAGIC "ntorNTORntorNTOR"
+
+/** Parse a CREATE, CREATE_FAST, or CREATE2 cell from <b>cell_in</b> into
+ * <b>cell_out</b>. Return 0 on success, -1 on failure. (We reject some
+ * syntactically valid CREATE2 cells that we can't generate or react to.) */
 int
-onion_skin_server_handshake(const char *onion_skin, /*ONIONSKIN_CHALLENGE_LEN*/
-                            crypto_pk_t *private_key,
-                            crypto_pk_t *prev_private_key,
-                            char *handshake_reply_out, /*ONIONSKIN_REPLY_LEN*/
-                            char *key_out,
-                            size_t key_out_len)
+create_cell_parse(create_cell_t *cell_out, const cell_t *cell_in)
 {
-  char challenge[ONIONSKIN_CHALLENGE_LEN];
-  crypto_dh_t *dh = NULL;
-  ssize_t len;
-  char *key_material=NULL;
-  size_t key_material_len=0;
-  int i;
-  crypto_pk_t *k;
-
-  len = -1;
-  for (i=0;i<2;++i) {
-    k = i==0?private_key:prev_private_key;
-    if (!k)
-      break;
-    note_crypto_pk_op(DEC_ONIONSKIN);
-    len = crypto_pk_private_hybrid_decrypt(k, challenge,
-                                           ONIONSKIN_CHALLENGE_LEN,
-                                           onion_skin, ONIONSKIN_CHALLENGE_LEN,
-                                           PK_PKCS1_OAEP_PADDING,0);
-    if (len>0)
-      break;
-  }
-  if (len<0) {
-    log_info(LD_PROTOCOL,
-             "Couldn't decrypt onionskin: client may be using old onion key");
-    goto err;
-  } else if (len != DH_KEY_LEN) {
-    log_warn(LD_PROTOCOL, "Unexpected onionskin length after decryption: %ld",
-             (long)len);
-    goto err;
+  memset(cell_out, 0, sizeof(*cell_out));
+
+  switch (cell_in->command) {
+  case CELL_CREATE:
+    cell_out->cell_type = CELL_CREATE;
+    if (tor_memeq(cell_in->payload, NTOR_CREATE_MAGIC, 16)) {
+      cell_out->handshake_type = ONION_HANDSHAKE_TYPE_NTOR;
+      cell_out->handshake_len = NTOR_ONIONSKIN_LEN;
+      memcpy(cell_out->onionskin, cell_in->payload+16, NTOR_ONIONSKIN_LEN);
+    } else {
+      cell_out->handshake_type = ONION_HANDSHAKE_TYPE_TAP;
+      cell_out->handshake_len = TAP_ONIONSKIN_CHALLENGE_LEN;
+      memcpy(cell_out->onionskin, cell_in->payload,
+             TAP_ONIONSKIN_CHALLENGE_LEN);
+    }
+    break;
+  case CELL_CREATE_FAST:
+    cell_out->cell_type = CELL_CREATE_FAST;
+    cell_out->handshake_type = ONION_HANDSHAKE_TYPE_FAST;
+    cell_out->handshake_len = CREATE_FAST_LEN;
+    memcpy(cell_out->onionskin, cell_in->payload, CREATE_FAST_LEN);
+    break;
+  case CELL_CREATE2:
+    if (parse_create2_payload(cell_out, cell_in->payload,
+                              CELL_PAYLOAD_SIZE) < 0)
+      return -1;
+    break;
+  default:
+    return -1;
   }
 
-  dh = crypto_dh_new(DH_TYPE_CIRCUIT);
-  if (!dh) {
-    log_warn(LD_BUG, "Couldn't allocate DH key");
-    goto err;
+  return check_create_cell(cell_out, 0);
+}
+
+/** Helper: return 0 if <b>cell</b> appears valid, -1 otherwise. */
+static int
+check_created_cell(const created_cell_t *cell)
+{
+  switch (cell->cell_type) {
+  case CELL_CREATED:
+    if (cell->handshake_len != TAP_ONIONSKIN_REPLY_LEN)
+      return -1;
+    break;
+  case CELL_CREATED_FAST:
+    if (cell->handshake_len != CREATED_FAST_LEN)
+      return -1;
+    break;
+  case CELL_CREATED2:
+    if (cell->handshake_len > RELAY_PAYLOAD_SIZE-2)
+      return -1;
+    break;
   }
-  if (crypto_dh_get_public(dh, handshake_reply_out, DH_KEY_LEN)) {
-    log_info(LD_GENERAL, "crypto_dh_get_public failed.");
-    goto err;
+
+  return 0;
+}
+
+/** Parse a CREATED, CREATED_FAST, or CREATED2 cell from <b>cell_in</b> into
+ * <b>cell_out</b>. Return 0 on success, -1 on failure. */
+int
+created_cell_parse(created_cell_t *cell_out, const cell_t *cell_in)
+{
+  memset(cell_out, 0, sizeof(*cell_out));
+
+  switch (cell_in->command) {
+  case CELL_CREATED:
+    cell_out->cell_type = CELL_CREATED;
+    cell_out->handshake_len = TAP_ONIONSKIN_REPLY_LEN;
+    memcpy(cell_out->reply, cell_in->payload, TAP_ONIONSKIN_REPLY_LEN);
+    break;
+  case CELL_CREATED_FAST:
+    cell_out->cell_type = CELL_CREATED_FAST;
+    cell_out->handshake_len = CREATED_FAST_LEN;
+    memcpy(cell_out->reply, cell_in->payload, CREATED_FAST_LEN);
+    break;
+  case CELL_CREATED2:
+    {
+      const uint8_t *p = cell_in->payload;
+      cell_out->cell_type = CELL_CREATED2;
+      cell_out->handshake_len = ntohs(get_uint16(p));
+      if (cell_out->handshake_len > CELL_PAYLOAD_SIZE - 2)
+        return -1;
+      memcpy(cell_out->reply, p+2, cell_out->handshake_len);
+      break;
+    }
   }
 
-  key_material_len = DIGEST_LEN+key_out_len;
-  key_material = tor_malloc(key_material_len);
-  len = crypto_dh_compute_secret(LOG_PROTOCOL_WARN, dh, challenge,
-                                 DH_KEY_LEN, key_material,
-                                 key_material_len);
-  if (len < 0) {
-    log_info(LD_GENERAL, "crypto_dh_compute_secret failed.");
-    goto err;
+  return check_created_cell(cell_out);
+}
+
+/** Helper: return 0 if <b>cell</b> appears valid, -1 otherwise. */
+static int
+check_extend_cell(const extend_cell_t *cell)
+{
+  if (tor_digest_is_zero((const char*)cell->node_id))
+    return -1;
+  /* We don't currently allow EXTEND2 cells without an IPv4 address */
+  if (tor_addr_family(&cell->orport_ipv4.addr) == AF_UNSPEC)
+    return -1;
+  if (cell->create_cell.cell_type == CELL_CREATE) {
+    if (cell->cell_type != RELAY_COMMAND_EXTEND)
+      return -1;
+  } else if (cell->create_cell.cell_type == CELL_CREATE2) {
+    if (cell->cell_type != RELAY_COMMAND_EXTEND2 &&
+        cell->cell_type != RELAY_COMMAND_EXTEND)
+      return -1;
+  } else {
+    /* In particular, no CREATE_FAST cells are allowed */
+    return -1;
   }
+  if (cell->create_cell.handshake_type == ONION_HANDSHAKE_TYPE_FAST)
+    return -1;
 
-  /* send back H(K|0) as proof that we learned K. */
-  memcpy(handshake_reply_out+DH_KEY_LEN, key_material, DIGEST_LEN);
+  return check_create_cell(&cell->create_cell, 1);
+}
 
-  /* use the rest of the key material for our shared keys, digests, etc */
-  memcpy(key_out, key_material+DIGEST_LEN, key_out_len);
+/** Protocol constants for specifier types in EXTEND2
+ * @{
+ */
+#define SPECTYPE_IPV4 0
+#define SPECTYPE_IPV6 1
+#define SPECTYPE_LEGACY_ID 2
+/** @} */
+
+/** Parse an EXTEND or EXTEND2 cell (according to <b>command</b>) from the
+ * <b>payload_length</b> bytes of <b>payload</b> into <b>cell_out</b>. Return
+ * 0 on success, -1 on failure. */
+int
+extend_cell_parse(extend_cell_t *cell_out, const uint8_t command,
+                  const uint8_t *payload, size_t payload_length)
+{
+  const uint8_t *eop;
 
-  memwipe(challenge, 0, sizeof(challenge));
-  memwipe(key_material, 0, key_material_len);
-  tor_free(key_material);
-  crypto_dh_free(dh);
-  return 0;
- err:
-  memwipe(challenge, 0, sizeof(challenge));
-  if (key_material) {
-    memwipe(key_material, 0, key_material_len);
-    tor_free(key_material);
+  memset(cell_out, 0, sizeof(*cell_out));
+  if (payload_length > RELAY_PAYLOAD_SIZE)
+    return -1;
+  eop = payload + payload_length;
+
+  switch (command) {
+  case RELAY_COMMAND_EXTEND:
+    {
+      if (payload_length != 6 + TAP_ONIONSKIN_CHALLENGE_LEN + DIGEST_LEN)
+        return -1;
+
+      cell_out->cell_type = RELAY_COMMAND_EXTEND;
+      tor_addr_from_ipv4n(&cell_out->orport_ipv4.addr, get_uint32(payload));
+      cell_out->orport_ipv4.port = ntohs(get_uint16(payload+4));
+      tor_addr_make_unspec(&cell_out->orport_ipv6.addr);
+      if (tor_memeq(payload + 6, NTOR_CREATE_MAGIC, 16)) {
+        cell_out->create_cell.cell_type = CELL_CREATE2;
+        cell_out->create_cell.handshake_type = ONION_HANDSHAKE_TYPE_NTOR;
+        cell_out->create_cell.handshake_len = NTOR_ONIONSKIN_LEN;
+        memcpy(cell_out->create_cell.onionskin, payload + 22,
+               NTOR_ONIONSKIN_LEN);
+      } else {
+        cell_out->create_cell.cell_type = CELL_CREATE;
+        cell_out->create_cell.handshake_type = ONION_HANDSHAKE_TYPE_TAP;
+        cell_out->create_cell.handshake_len = TAP_ONIONSKIN_CHALLENGE_LEN;
+        memcpy(cell_out->create_cell.onionskin, payload + 6,
+               TAP_ONIONSKIN_CHALLENGE_LEN);
+      }
+      memcpy(cell_out->node_id, payload + 6 + TAP_ONIONSKIN_CHALLENGE_LEN,
+             DIGEST_LEN);
+      break;
+    }
+  case RELAY_COMMAND_EXTEND2:
+    {
+      uint8_t n_specs = *payload, spectype, speclen;
+      int i;
+      int found_ipv4 = 0, found_ipv6 = 0, found_id = 0;
+      tor_addr_make_unspec(&cell_out->orport_ipv4.addr);
+      tor_addr_make_unspec(&cell_out->orport_ipv6.addr);
+
+      cell_out->cell_type = RELAY_COMMAND_EXTEND2;
+      ++payload;
+      /* Parse the specifiers. We'll only take the first IPv4 and first IPv6
+       * addres, and the node ID, and ignore everything else */
+      for (i = 0; i < n_specs; ++i) {
+        if (eop - payload < 2)
+          return -1;
+        spectype = payload[0];
+        speclen = payload[1];
+        payload += 2;
+        if (eop - payload < speclen)
+          return -1;
+        switch (spectype) {
+        case SPECTYPE_IPV4:
+          if (speclen != 6)
+            return -1;
+          if (!found_ipv4) {
+            tor_addr_from_ipv4n(&cell_out->orport_ipv4.addr,
+                                get_uint32(payload));
+            cell_out->orport_ipv4.port = ntohs(get_uint16(payload+4));
+            found_ipv4 = 1;
+          }
+          break;
+        case SPECTYPE_IPV6:
+          if (speclen != 18)
+            return -1;
+          if (!found_ipv6) {
+            tor_addr_from_ipv6_bytes(&cell_out->orport_ipv6.addr,
+                                     (const char*)payload);
+            cell_out->orport_ipv6.port = ntohs(get_uint16(payload+16));
+            found_ipv6 = 1;
+          }
+          break;
+        case SPECTYPE_LEGACY_ID:
+          if (speclen != 20)
+            return -1;
+          if (found_id)
+            return -1;
+          memcpy(cell_out->node_id, payload, 20);
+          found_id = 1;
+          break;
+        }
+        payload += speclen;
+      }
+      if (!found_id || !found_ipv4)
+        return -1;
+      if (parse_create2_payload(&cell_out->create_cell,payload,eop-payload)<0)
+        return -1;
+      break;
+    }
+  default:
+    return -1;
   }
-  if (dh) crypto_dh_free(dh);
 
-  return -1;
+  return check_extend_cell(cell_out);
 }
 
-/** Finish the client side of the DH handshake.
- * Given the 128 byte DH reply + 20 byte hash as generated by
- * onion_skin_server_handshake and the handshake state generated by
- * onion_skin_create, verify H(K) with the first 20 bytes of shared
- * key material, then generate key_out_len more bytes of shared key
- * material and store them in key_out.
- *
- * After the invocation, call crypto_dh_free on handshake_state.
- */
+/** Helper: return 0 if <b>cell</b> appears valid, -1 otherwise. */
+static int
+check_extended_cell(const extended_cell_t *cell)
+{
+  if (cell->created_cell.cell_type == CELL_CREATED) {
+    if (cell->cell_type != RELAY_COMMAND_EXTENDED)
+      return -1;
+  } else if (cell->created_cell.cell_type == CELL_CREATED2) {
+    if (cell->cell_type != RELAY_COMMAND_EXTENDED2)
+      return -1;
+  } else {
+    return -1;
+  }
+
+  return check_created_cell(&cell->created_cell);
+}
+
+/** Parse an EXTENDED or EXTENDED2 cell (according to <b>command</b>) from the
+ * <b>payload_length</b> bytes of <b>payload</b> into <b>cell_out</b>. Return
+ * 0 on success, -1 on failure. */
 int
-onion_skin_client_handshake(crypto_dh_t *handshake_state,
-            const char *handshake_reply, /* ONIONSKIN_REPLY_LEN bytes */
-            char *key_out,
-            size_t key_out_len)
+extended_cell_parse(extended_cell_t *cell_out,
+                    const uint8_t command, const uint8_t *payload,
+                    size_t payload_len)
 {
-  ssize_t len;
-  char *key_material=NULL;
-  size_t key_material_len;
-  tor_assert(crypto_dh_get_bytes(handshake_state) == DH_KEY_LEN);
-
-  key_material_len = DIGEST_LEN + key_out_len;
-  key_material = tor_malloc(key_material_len);
-  len = crypto_dh_compute_secret(LOG_PROTOCOL_WARN, handshake_state,
-                                 handshake_reply, DH_KEY_LEN, key_material,
-                                 key_material_len);
-  if (len < 0)
-    goto err;
-
-  if (tor_memneq(key_material, handshake_reply+DH_KEY_LEN, DIGEST_LEN)) {
-    /* H(K) does *not* match. Something fishy. */
-    log_warn(LD_PROTOCOL,"Digest DOES NOT MATCH on onion handshake. "
-             "Bug or attack.");
-    goto err;
+  memset(cell_out, 0, sizeof(*cell_out));
+  if (payload_len > RELAY_PAYLOAD_SIZE)
+    return -1;
+
+  switch (command) {
+  case RELAY_COMMAND_EXTENDED:
+    if (payload_len != TAP_ONIONSKIN_REPLY_LEN)
+      return -1;
+    cell_out->cell_type = RELAY_COMMAND_EXTENDED;
+    cell_out->created_cell.cell_type = CELL_CREATED;
+    cell_out->created_cell.handshake_len = TAP_ONIONSKIN_REPLY_LEN;
+    memcpy(cell_out->created_cell.reply, payload, TAP_ONIONSKIN_REPLY_LEN);
+    break;
+  case RELAY_COMMAND_EXTENDED2:
+    {
+      cell_out->cell_type = RELAY_COMMAND_EXTENDED2;
+      cell_out->created_cell.cell_type = CELL_CREATED2;
+      cell_out->created_cell.handshake_len = ntohs(get_uint16(payload));
+      if (cell_out->created_cell.handshake_len > RELAY_PAYLOAD_SIZE - 2 ||
+          cell_out->created_cell.handshake_len > payload_len - 2)
+        return -1;
+      memcpy(cell_out->created_cell.reply, payload+2,
+             cell_out->created_cell.handshake_len);
+    }
+    break;
+  default:
+    return -1;
   }
 
-  /* use the rest of the key material for our shared keys, digests, etc */
-  memcpy(key_out, key_material+DIGEST_LEN, key_out_len);
+  return check_extended_cell(cell_out);
+}
+
+/** Fill <b>cell_out</b> with a correctly formatted version of the
+ * CREATE{,_FAST,2} cell in <b>cell_in</b>. Return 0 on success, -1 on
+ * failure.  This is a cell we didn't originate if <b>relayed</b> is true. */
+static int
+create_cell_format_impl(cell_t *cell_out, const create_cell_t *cell_in,
+                        int relayed)
+{
+  uint8_t *p;
+  size_t space;
+  if (check_create_cell(cell_in, relayed) < 0)
+    return -1;
+
+  memset(cell_out->payload, 0, sizeof(cell_out->payload));
+  cell_out->command = cell_in->cell_type;
+
+  p = cell_out->payload;
+  space = sizeof(cell_out->payload);
+
+  switch (cell_in->cell_type) {
+  case CELL_CREATE:
+    if (cell_in->handshake_type == ONION_HANDSHAKE_TYPE_NTOR) {
+      memcpy(p, NTOR_CREATE_MAGIC, 16);
+      p += 16;
+      space -= 16;
+    }
+    /* Fall through */
+  case CELL_CREATE_FAST:
+    tor_assert(cell_in->handshake_len <= space);
+    memcpy(p, cell_in->onionskin, cell_in->handshake_len);
+    break;
+  case CELL_CREATE2:
+    tor_assert(cell_in->handshake_len <= sizeof(cell_out->payload)-4);
+    set_uint16(cell_out->payload, htons(cell_in->handshake_type));
+    set_uint16(cell_out->payload+2, htons(cell_in->handshake_len));
+    memcpy(cell_out->payload + 4, cell_in->onionskin, cell_in->handshake_len);
+    break;
+  default:
+    return -1;
+  }
 
-  memwipe(key_material, 0, key_material_len);
-  tor_free(key_material);
   return 0;
- err:
-  memwipe(key_material, 0, key_material_len);
-  tor_free(key_material);
-  return -1;
 }
 
-/** Implement the server side of the CREATE_FAST abbreviated handshake.  The
- * client has provided DIGEST_LEN key bytes in <b>key_in</b> ("x").  We
- * generate a reply of DIGEST_LEN*2 bytes in <b>key_out</b>, consisting of a
- * new random "y", followed by H(x|y) to check for correctness.  We set
- * <b>key_out_len</b> bytes of key material in <b>key_out</b>.
- * Return 0 on success, &lt;0 on failure.
- **/
 int
-fast_server_handshake(const uint8_t *key_in, /* DIGEST_LEN bytes */
-                      uint8_t *handshake_reply_out, /* DIGEST_LEN*2 bytes */
-                      uint8_t *key_out,
-                      size_t key_out_len)
+create_cell_format(cell_t *cell_out, const create_cell_t *cell_in)
 {
-  char tmp[DIGEST_LEN+DIGEST_LEN];
-  char *out = NULL;
-  size_t out_len;
-  int r = -1;
+  return create_cell_format_impl(cell_out, cell_in, 0);
+}
 
-  if (crypto_rand((char*)handshake_reply_out, DIGEST_LEN)<0)
+int
+create_cell_format_relayed(cell_t *cell_out, const create_cell_t *cell_in)
+{
+  return create_cell_format_impl(cell_out, cell_in, 1);
+}
+
+/** Fill <b>cell_out</b> with a correctly formatted version of the
+ * CREATED{,_FAST,2} cell in <b>cell_in</b>. Return 0 on success, -1 on
+ * failure. */
+int
+created_cell_format(cell_t *cell_out, const created_cell_t *cell_in)
+{
+  if (check_created_cell(cell_in) < 0)
     return -1;
 
-  memcpy(tmp, key_in, DIGEST_LEN);
-  memcpy(tmp+DIGEST_LEN, handshake_reply_out, DIGEST_LEN);
-  out_len = key_out_len+DIGEST_LEN;
-  out = tor_malloc(out_len);
-  if (crypto_expand_key_material(tmp, sizeof(tmp), out, out_len)) {
-    goto done;
+  memset(cell_out->payload, 0, sizeof(cell_out->payload));
+  cell_out->command = cell_in->cell_type;
+
+  switch (cell_in->cell_type) {
+  case CELL_CREATED:
+  case CELL_CREATED_FAST:
+    tor_assert(cell_in->handshake_len <= sizeof(cell_out->payload));
+    memcpy(cell_out->payload, cell_in->reply, cell_in->handshake_len);
+    break;
+  case CELL_CREATED2:
+    tor_assert(cell_in->handshake_len <= sizeof(cell_out->payload)-2);
+    set_uint16(cell_out->payload, htons(cell_in->handshake_len));
+    memcpy(cell_out->payload + 2, cell_in->reply, cell_in->handshake_len);
+    break;
+  default:
+    return -1;
   }
-  memcpy(handshake_reply_out+DIGEST_LEN, out, DIGEST_LEN);
-  memcpy(key_out, out+DIGEST_LEN, key_out_len);
-  r = 0;
- done:
-  memwipe(tmp, 0, sizeof(tmp));
-  memwipe(out, 0, out_len);
-  tor_free(out);
-  return r;
+  return 0;
 }
 
-/** Implement the second half of the client side of the CREATE_FAST handshake.
- * We sent the server <b>handshake_state</b> ("x") already, and the server
- * told us <b>handshake_reply_out</b> (y|H(x|y)).  Make sure that the hash is
- * correct, and generate key material in <b>key_out</b>.  Return 0 on success,
- * true on failure.
- *
- * NOTE: The "CREATE_FAST" handshake path is distinguishable from regular
- * "onionskin" handshakes, and is not secure if an adversary can see or modify
- * the messages.  Therefore, it should only be used by clients, and only as
- * the first hop of a circuit (since the first hop is already authenticated
- * and protected by TLS).
- */
+/** Format the EXTEND{,2} cell in <b>cell_in</b>, storing its relay payload in
+ * <b>payload_out</b>, the number of bytes used in *<b>len_out</b>, and the
+ * relay command in *<b>command_out</b>. The <b>payload_out</b> must have
+ * RELAY_PAYLOAD_SIZE bytes available.  Return 0 on success, -1 on failure. */
 int
-fast_client_handshake(const uint8_t *handshake_state,/*DIGEST_LEN bytes*/
-                      const uint8_t *handshake_reply_out,/*DIGEST_LEN*2 bytes*/
-                      uint8_t *key_out,
-                      size_t key_out_len)
+extend_cell_format(uint8_t *command_out, uint16_t *len_out,
+                   uint8_t *payload_out, const extend_cell_t *cell_in)
 {
-  char tmp[DIGEST_LEN+DIGEST_LEN];
-  char *out;
-  size_t out_len;
-  int r = -1;
+  uint8_t *p, *eop;
+  if (check_extend_cell(cell_in) < 0)
+    return -1;
 
-  memcpy(tmp, handshake_state, DIGEST_LEN);
-  memcpy(tmp+DIGEST_LEN, handshake_reply_out, DIGEST_LEN);
-  out_len = key_out_len+DIGEST_LEN;
-  out = tor_malloc(out_len);
-  if (crypto_expand_key_material(tmp, sizeof(tmp), out, out_len)) {
-    goto done;
-  }
-  if (tor_memneq(out, handshake_reply_out+DIGEST_LEN, DIGEST_LEN)) {
-    /* H(K) does *not* match. Something fishy. */
-    log_warn(LD_PROTOCOL,"Digest DOES NOT MATCH on fast handshake. "
-             "Bug or attack.");
-    goto done;
+  p = payload_out;
+  eop = payload_out + RELAY_PAYLOAD_SIZE;
+
+  memset(p, 0, RELAY_PAYLOAD_SIZE);
+
+  switch (cell_in->cell_type) {
+  case RELAY_COMMAND_EXTEND:
+    {
+      *command_out = RELAY_COMMAND_EXTEND;
+      *len_out = 6 + TAP_ONIONSKIN_CHALLENGE_LEN + DIGEST_LEN;
+      set_uint32(p, tor_addr_to_ipv4n(&cell_in->orport_ipv4.addr));
+      set_uint16(p+4, ntohs(cell_in->orport_ipv4.port));
+      if (cell_in->create_cell.handshake_type == ONION_HANDSHAKE_TYPE_NTOR) {
+        memcpy(p+6, NTOR_CREATE_MAGIC, 16);
+        memcpy(p+22, cell_in->create_cell.onionskin, NTOR_ONIONSKIN_LEN);
+      } else {
+        memcpy(p+6, cell_in->create_cell.onionskin,
+               TAP_ONIONSKIN_CHALLENGE_LEN);
+      }
+      memcpy(p+6+TAP_ONIONSKIN_CHALLENGE_LEN, cell_in->node_id, DIGEST_LEN);
+    }
+    break;
+  case RELAY_COMMAND_EXTEND2:
+    {
+      uint8_t n = 2;
+      *command_out = RELAY_COMMAND_EXTEND2;
+
+      *p++ = n; /* 2 identifiers */
+      *p++ = SPECTYPE_IPV4; /* First is IPV4. */
+      *p++ = 6; /* It's 6 bytes long. */
+      set_uint32(p, tor_addr_to_ipv4n(&cell_in->orport_ipv4.addr));
+      set_uint16(p+4, htons(cell_in->orport_ipv4.port));
+      p += 6;
+      *p++ = SPECTYPE_LEGACY_ID; /* Next is an identity digest. */
+      *p++ = 20; /* It's 20 bytes long */
+      memcpy(p, cell_in->node_id, DIGEST_LEN);
+      p += 20;
+
+      /* Now we can send the handshake */
+      set_uint16(p, htons(cell_in->create_cell.handshake_type));
+      set_uint16(p+2, htons(cell_in->create_cell.handshake_len));
+      p += 4;
+
+      if (cell_in->create_cell.handshake_len > eop - p)
+        return -1;
+
+      memcpy(p, cell_in->create_cell.onionskin,
+             cell_in->create_cell.handshake_len);
+
+      p += cell_in->create_cell.handshake_len;
+      *len_out = p - payload_out;
+    }
+    break;
+  default:
+    return -1;
   }
-  memcpy(key_out, out+DIGEST_LEN, key_out_len);
-  r = 0;
- done:
-  memwipe(tmp, 0, sizeof(tmp));
-  memwipe(out, 0, out_len);
-  tor_free(out);
-  return r;
+
+  return 0;
 }
 
-/** Remove all circuits from the pending list.  Called from tor_free_all. */
-void
-clear_pending_onions(void)
+/** Format the EXTENDED{,2} cell in <b>cell_in</b>, storing its relay payload
+ * in <b>payload_out</b>, the number of bytes used in *<b>len_out</b>, and the
+ * relay command in *<b>command_out</b>. The <b>payload_out</b> must have
+ * RELAY_PAYLOAD_SIZE bytes available.  Return 0 on success, -1 on failure. */
+int
+extended_cell_format(uint8_t *command_out, uint16_t *len_out,
+                     uint8_t *payload_out, const extended_cell_t *cell_in)
 {
-  while (ol_list) {
-    onion_queue_t *victim = ol_list;
-    ol_list = victim->next;
-    tor_free(victim->onionskin);
-    tor_free(victim);
+  uint8_t *p;
+  if (check_extended_cell(cell_in) < 0)
+    return -1;
+
+  p = payload_out;
+  memset(p, 0, RELAY_PAYLOAD_SIZE);
+
+  switch (cell_in->cell_type) {
+  case RELAY_COMMAND_EXTENDED:
+    {
+      *command_out = RELAY_COMMAND_EXTENDED;
+      *len_out = TAP_ONIONSKIN_REPLY_LEN;
+      memcpy(payload_out, cell_in->created_cell.reply,
+             TAP_ONIONSKIN_REPLY_LEN);
+    }
+    break;
+  case RELAY_COMMAND_EXTENDED2:
+    {
+      *command_out = RELAY_COMMAND_EXTENDED2;
+      *len_out = 2 + cell_in->created_cell.handshake_len;
+      set_uint16(payload_out, htons(cell_in->created_cell.handshake_len));
+      if (2+cell_in->created_cell.handshake_len > RELAY_PAYLOAD_SIZE)
+        return -1;
+      memcpy(payload_out+2, cell_in->created_cell.reply,
+             cell_in->created_cell.handshake_len);
+    }
+    break;
+  default:
+    return -1;
   }
-  ol_list = ol_tail = NULL;
-  ol_length = 0;
+
+  return 0;
 }
 

+ 94 - 24
src/or/onion.h

@@ -12,37 +12,107 @@
 #ifndef TOR_ONION_H
 #define TOR_ONION_H
 
-int onion_pending_add(or_circuit_t *circ, char *onionskin);
-or_circuit_t *onion_next_task(char **onionskin_out);
+struct create_cell_t;
+int onion_pending_add(or_circuit_t *circ, struct create_cell_t *onionskin);
+or_circuit_t *onion_next_task(struct create_cell_t **onionskin_out);
 void onion_pending_remove(or_circuit_t *circ);
+void clear_pending_onions(void);
+
+typedef struct server_onion_keys_t {
+  uint8_t my_identity[DIGEST_LEN];
+  crypto_pk_t *onion_key;
+  crypto_pk_t *last_onion_key;
+#ifdef CURVE25519_ENABLED
+  di_digest256_map_t *curve25519_key_map;
+  curve25519_keypair_t *junk_keypair;
+#endif
+} server_onion_keys_t;
 
-int onion_skin_create(crypto_pk_t *router_key,
-                      crypto_dh_t **handshake_state_out,
-                      char *onion_skin_out);
+#define MAX_ONIONSKIN_CHALLENGE_LEN 255
+#define MAX_ONIONSKIN_REPLY_LEN 255
 
-int onion_skin_server_handshake(const char *onion_skin,
-                                crypto_pk_t *private_key,
-                                crypto_pk_t *prev_private_key,
-                                char *handshake_reply_out,
-                                char *key_out,
-                                size_t key_out_len);
+void setup_server_onion_keys(server_onion_keys_t *keys);
+void release_server_onion_keys(server_onion_keys_t *keys);
 
-int onion_skin_client_handshake(crypto_dh_t *handshake_state,
-                                const char *handshake_reply,
-                                char *key_out,
-                                size_t key_out_len);
+void onion_handshake_state_release(onion_handshake_state_t *state);
 
-int fast_server_handshake(const uint8_t *key_in,
-                          uint8_t *handshake_reply_out,
-                          uint8_t *key_out,
-                          size_t key_out_len);
+int onion_skin_create(int type,
+                      const extend_info_t *node,
+                      onion_handshake_state_t *state_out,
+                      uint8_t *onion_skin_out);
+int onion_skin_server_handshake(int type,
+                      const uint8_t *onion_skin, size_t onionskin_len,
+                      const server_onion_keys_t *keys,
+                      uint8_t *reply_out,
+                      uint8_t *keys_out, size_t key_out_len,
+                      uint8_t *rend_nonce_out);
+int onion_skin_client_handshake(int type,
+                      const onion_handshake_state_t *handshake_state,
+                      const uint8_t *reply, size_t reply_len,
+                      uint8_t *keys_out, size_t key_out_len,
+                      uint8_t *rend_authenticator_out);
 
-int fast_client_handshake(const uint8_t *handshake_state,
-                          const uint8_t *handshake_reply_out,
-                          uint8_t *key_out,
-                          size_t key_out_len);
+/** A parsed CREATE, CREATE_FAST, or CREATE2 cell. */
+typedef struct create_cell_t {
+  /** The cell command. One of CREATE{,_FAST,2} */
+  uint8_t cell_type;
+  /** One of the ONION_HANDSHAKE_TYPE_* values */
+  uint16_t handshake_type;
+  /** The number of bytes used in <b>onionskin</b>. */
+  uint16_t handshake_len;
+  /** The client-side message for the circuit creation handshake. */
+  uint8_t onionskin[CELL_PAYLOAD_SIZE - 4];
+} create_cell_t;
 
-void clear_pending_onions(void);
+/** A parsed CREATED, CREATED_FAST, or CREATED2 cell. */
+typedef struct created_cell_t {
+  /** The cell command. One of CREATED{,_FAST,2} */
+  uint8_t cell_type;
+  /** The number of bytes used in <b>reply</b>. */
+  uint16_t handshake_len;
+  /** The server-side message for the circuit creation handshake. */
+  uint8_t reply[CELL_PAYLOAD_SIZE - 2];
+} created_cell_t;
+
+/** A parsed RELAY_EXTEND or RELAY_EXTEND2 cell */
+typedef struct extend_cell_t {
+  /** One of RELAY_EXTEND or RELAY_EXTEND2 */
+  uint8_t cell_type;
+  /** An IPv4 address and port for the node we're connecting to. */
+  tor_addr_port_t orport_ipv4;
+  /** An IPv6 address and port for the node we're connecting to. Not currently
+   * used. */
+  tor_addr_port_t orport_ipv6;
+  /** Identity fingerprint of the node we're conecting to.*/
+  uint8_t node_id[DIGEST_LEN];
+  /** The "create cell" embedded in this extend cell. Note that unlike the
+   * create cells we generate ourself, this once can have a handshake type we
+   * don't recognize. */
+  create_cell_t create_cell;
+} extend_cell_t;
+
+/** A parsed RELAY_EXTEND or RELAY_EXTEND2 cell */
+typedef struct extended_cell_t {
+  /** One of RELAY_EXTENDED or RELAY_EXTENDED2. */
+  uint8_t cell_type;
+  /** The "created cell" embedded in this extended cell. */
+  created_cell_t created_cell;
+} extended_cell_t;
+
+int create_cell_parse(create_cell_t *cell_out, const cell_t *cell_in);
+int created_cell_parse(created_cell_t *cell_out, const cell_t *cell_in);
+int extend_cell_parse(extend_cell_t *cell_out, uint8_t command,
+                      const uint8_t *payload_in, size_t payload_len);
+int extended_cell_parse(extended_cell_t *cell_out, uint8_t command,
+                        const uint8_t *payload_in, size_t payload_len);
+
+int create_cell_format(cell_t *cell_out, const create_cell_t *cell_in);
+int create_cell_format_relayed(cell_t *cell_out, const create_cell_t *cell_in);
+int created_cell_format(cell_t *cell_out, const created_cell_t *cell_in);
+int extend_cell_format(uint8_t *command_out, uint16_t *len_out,
+                       uint8_t *payload_out, const extend_cell_t *cell_in);
+int extended_cell_format(uint8_t *command_out, uint16_t *len_out,
+                         uint8_t *payload_out, const extended_cell_t *cell_in);
 
 #endif
 

+ 123 - 0
src/or/onion_fast.c

@@ -0,0 +1,123 @@
+/* Copyright (c) 2001 Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2012, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file onion_fast.c
+ * \brief Functions implement the CREATE_FAST circuit handshake.
+ **/
+
+#include "or.h"
+#include "onion_fast.h"
+
+/** Release all state held in <b>victim</b>. */
+void
+fast_handshake_state_free(fast_handshake_state_t *victim)
+{
+  if (! victim)
+    return;
+  memwipe(victim, 0, sizeof(fast_handshake_state_t));
+  tor_free(victim);
+}
+
+/** Create the state needed to perform a CREATE_FAST hasnshake. Return 0
+ * on success, -1 on failure. */
+int
+fast_onionskin_create(fast_handshake_state_t **handshake_state_out,
+                      uint8_t *handshake_out)
+{
+  fast_handshake_state_t *s;
+  *handshake_state_out = s = tor_malloc(sizeof(fast_handshake_state_t));
+  if (crypto_rand((char*)s->state, sizeof(s->state)) < 0) {
+    tor_free(s);
+    return -1;
+  }
+  memcpy(handshake_out, s->state, DIGEST_LEN);
+  return 0;
+}
+
+/** Implement the server side of the CREATE_FAST abbreviated handshake.  The
+ * client has provided DIGEST_LEN key bytes in <b>key_in</b> ("x").  We
+ * generate a reply of DIGEST_LEN*2 bytes in <b>key_out</b>, consisting of a
+ * new random "y", followed by H(x|y) to check for correctness.  We set
+ * <b>key_out_len</b> bytes of key material in <b>key_out</b>.
+ * Return 0 on success, &lt;0 on failure.
+ **/
+int
+fast_server_handshake(const uint8_t *key_in, /* DIGEST_LEN bytes */
+                      uint8_t *handshake_reply_out, /* DIGEST_LEN*2 bytes */
+                      uint8_t *key_out,
+                      size_t key_out_len)
+{
+  uint8_t tmp[DIGEST_LEN+DIGEST_LEN];
+  uint8_t *out = NULL;
+  size_t out_len;
+  int r = -1;
+
+  if (crypto_rand((char*)handshake_reply_out, DIGEST_LEN)<0)
+    return -1;
+
+  memcpy(tmp, key_in, DIGEST_LEN);
+  memcpy(tmp+DIGEST_LEN, handshake_reply_out, DIGEST_LEN);
+  out_len = key_out_len+DIGEST_LEN;
+  out = tor_malloc(out_len);
+  if (crypto_expand_key_material_TAP(tmp, sizeof(tmp), out, out_len)) {
+    goto done;
+  }
+  memcpy(handshake_reply_out+DIGEST_LEN, out, DIGEST_LEN);
+  memcpy(key_out, out+DIGEST_LEN, key_out_len);
+  r = 0;
+ done:
+  memwipe(tmp, 0, sizeof(tmp));
+  memwipe(out, 0, out_len);
+  tor_free(out);
+  return r;
+}
+
+/** Implement the second half of the client side of the CREATE_FAST handshake.
+ * We sent the server <b>handshake_state</b> ("x") already, and the server
+ * told us <b>handshake_reply_out</b> (y|H(x|y)).  Make sure that the hash is
+ * correct, and generate key material in <b>key_out</b>.  Return 0 on success,
+ * true on failure.
+ *
+ * NOTE: The "CREATE_FAST" handshake path is distinguishable from regular
+ * "onionskin" handshakes, and is not secure if an adversary can see or modify
+ * the messages.  Therefore, it should only be used by clients, and only as
+ * the first hop of a circuit (since the first hop is already authenticated
+ * and protected by TLS).
+ */
+int
+fast_client_handshake(const fast_handshake_state_t *handshake_state,
+                      const uint8_t *handshake_reply_out,/*DIGEST_LEN*2 bytes*/
+                      uint8_t *key_out,
+                      size_t key_out_len)
+{
+  uint8_t tmp[DIGEST_LEN+DIGEST_LEN];
+  uint8_t *out;
+  size_t out_len;
+  int r = -1;
+
+  memcpy(tmp, handshake_state->state, DIGEST_LEN);
+  memcpy(tmp+DIGEST_LEN, handshake_reply_out, DIGEST_LEN);
+  out_len = key_out_len+DIGEST_LEN;
+  out = tor_malloc(out_len);
+  if (crypto_expand_key_material_TAP(tmp, sizeof(tmp), out, out_len)) {
+    goto done;
+  }
+  if (tor_memneq(out, handshake_reply_out+DIGEST_LEN, DIGEST_LEN)) {
+    /* H(K) does *not* match. Something fishy. */
+    log_warn(LD_PROTOCOL,"Digest DOES NOT MATCH on fast handshake. "
+             "Bug or attack.");
+    goto done;
+  }
+  memcpy(key_out, out+DIGEST_LEN, key_out_len);
+  r = 0;
+ done:
+  memwipe(tmp, 0, sizeof(tmp));
+  memwipe(out, 0, out_len);
+  tor_free(out);
+  return r;
+}
+

+ 38 - 0
src/or/onion_fast.h

@@ -0,0 +1,38 @@
+/* Copyright (c) 2001 Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2012, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file onion_fast.h
+ * \brief Header file for onion_fast.c.
+ **/
+
+#ifndef TOR_ONION_FAST_H
+#define TOR_ONION_FAST_H
+
+#define CREATE_FAST_LEN DIGEST_LEN
+#define CREATED_FAST_LEN DIGEST_LEN*2
+
+typedef struct fast_handshake_state_t {
+  uint8_t state[DIGEST_LEN];
+} fast_handshake_state_t;
+
+void fast_handshake_state_free(fast_handshake_state_t *victim);
+
+int fast_onionskin_create(fast_handshake_state_t **handshake_state_out,
+                          uint8_t *handshake_out);
+
+int fast_server_handshake(const uint8_t *message_in,
+                          uint8_t *handshake_reply_out,
+                          uint8_t *key_out,
+                          size_t key_out_len);
+
+int fast_client_handshake(const fast_handshake_state_t *handshake_state,
+                          const uint8_t *handshake_reply_out,
+                          uint8_t *key_out,
+                          size_t key_out_len);
+
+#endif
+

+ 295 - 0
src/or/onion_ntor.c

@@ -0,0 +1,295 @@
+/* Copyright (c) 2012, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#include "orconfig.h"
+
+#include "crypto.h"
+#define ONION_NTOR_PRIVATE
+#include "onion_ntor.h"
+#include "torlog.h"
+#include "util.h"
+
+/** Free storage held in an ntor handshake state. */
+void
+ntor_handshake_state_free(ntor_handshake_state_t *state)
+{
+  if (!state)
+    return;
+  memwipe(state, 0, sizeof(*state));
+  tor_free(state);
+}
+
+/** Convenience function to represent HMAC_SHA256 as our instantiation of
+ * ntor's "tweaked hash'.  Hash the <b>inp_len</b> bytes at <b>inp</b> into
+ * a DIGEST256_LEN-byte digest at <b>out</b>, with the hash changing
+ * depending on the value of <b>tweak</b>. */
+static void
+h_tweak(uint8_t *out,
+        const uint8_t *inp, size_t inp_len,
+        const char *tweak)
+{
+  size_t tweak_len = strlen(tweak);
+  crypto_hmac_sha256((char*)out, tweak, tweak_len, (const char*)inp, inp_len);
+}
+
+/** Wrapper around a set of tweak-values for use with the ntor handshake. */
+typedef struct tweakset_t {
+  const char *t_mac;
+  const char *t_key;
+  const char *t_verify;
+  const char *m_expand;
+} tweakset_t;
+
+/** The tweaks to be used with our handshake. */
+const tweakset_t proto1_tweaks = {
+#define PROTOID "ntor-curve25519-sha256-1"
+#define PROTOID_LEN 24
+  PROTOID ":mac",
+  PROTOID ":key_extract",
+  PROTOID ":verify",
+  PROTOID ":key_expand"
+};
+
+/** Convenience macro: copy <b>len</b> bytes from <b>inp</b> to <b>ptr</b>,
+ * and advance <b>ptr</b> by the number of bytes copied. */
+#define APPEND(ptr, inp, len)                   \
+  STMT_BEGIN {                                  \
+    memcpy(ptr, (inp), (len));                  \
+    ptr += len;                                 \
+  } STMT_END
+
+/**
+ * Compute the first client-side step of the ntor handshake for communicating
+ * with a server whose DIGEST_LEN-byte server identity is <b>router_id</b>,
+ * and whose onion key is <b>router_key</b>. Store the NTOR_ONIONSKIN_LEN-byte
+ * message in <b>onion_skin_out</b>, and store the handshake state in
+ * *<b>handshake_state_out</b>.  Return 0 on success, -1 on failure.
+ */
+int
+onion_skin_ntor_create(const uint8_t *router_id,
+                       const curve25519_public_key_t *router_key,
+                       ntor_handshake_state_t **handshake_state_out,
+                       uint8_t *onion_skin_out)
+{
+  ntor_handshake_state_t *state;
+  uint8_t *op;
+
+  state = tor_malloc_zero(sizeof(ntor_handshake_state_t));
+
+  memcpy(state->router_id, router_id, DIGEST_LEN);
+  memcpy(&state->pubkey_B, router_key, sizeof(curve25519_public_key_t));
+  if (curve25519_secret_key_generate(&state->seckey_x, 0) < 0) {
+    tor_free(state);
+    return -1;
+  }
+  curve25519_public_key_generate(&state->pubkey_X, &state->seckey_x);
+
+  op = onion_skin_out;
+  APPEND(op, router_id, DIGEST_LEN);
+  APPEND(op, router_key->public_key, CURVE25519_PUBKEY_LEN);
+  APPEND(op, state->pubkey_X.public_key, CURVE25519_PUBKEY_LEN);
+  tor_assert(op == onion_skin_out + NTOR_ONIONSKIN_LEN);
+
+  *handshake_state_out = state;
+
+  return 0;
+}
+
+#define SERVER_STR "Server"
+#define SERVER_STR_LEN 6
+
+#define SECRET_INPUT_LEN (CURVE25519_PUBKEY_LEN * 3 +   \
+                          CURVE25519_OUTPUT_LEN * 2 +   \
+                          DIGEST_LEN + PROTOID_LEN)
+#define AUTH_INPUT_LEN (DIGEST256_LEN + DIGEST_LEN +    \
+                        CURVE25519_PUBKEY_LEN*3 +       \
+                        PROTOID_LEN + SERVER_STR_LEN)
+
+/**
+ * Perform the server side of an ntor handshake. Given an
+ * NTOR_ONIONSKIN_LEN-byte message in <b>onion_skin</b>, our own identity
+ * fingerprint as <b>my_node_id</b>, and an associative array mapping public
+ * onion keys to curve25519_keypair_t in <b>private_keys</b>, attempt to
+ * perform the handshake.  Use <b>junk_keys</b> if present if the handshake
+ * indicates an unrecognized public key.  Write an NTOR_REPLY_LEN-byte
+ * message to send back to the client into <b>handshake_reply_out</b>, and
+ * generate <b>key_out_len</b> bytes of key material in <b>key_out</b>. Return
+ * 0 on success, -1 on failure.
+ */
+int
+onion_skin_ntor_server_handshake(const uint8_t *onion_skin,
+                                 const di_digest256_map_t *private_keys,
+                                 const curve25519_keypair_t *junk_keys,
+                                 const uint8_t *my_node_id,
+                                 uint8_t *handshake_reply_out,
+                                 uint8_t *key_out,
+                                 size_t key_out_len)
+{
+  const tweakset_t *T = &proto1_tweaks;
+  /* Sensitive stack-allocated material. Kept in an anonymous struct to make
+   * it easy to wipe. */
+  struct {
+    uint8_t secret_input[SECRET_INPUT_LEN];
+    uint8_t auth_input[AUTH_INPUT_LEN];
+    curve25519_public_key_t pubkey_X;
+    curve25519_secret_key_t seckey_y;
+    curve25519_public_key_t pubkey_Y;
+    uint8_t verify[DIGEST256_LEN];
+  } s;
+  uint8_t *si = s.secret_input, *ai = s.auth_input;
+  const curve25519_keypair_t *keypair_bB;
+  int bad;
+
+  /* Decode the onion skin */
+  /* XXXX Does this possible early-return business threaten our security? */
+  if (tor_memneq(onion_skin, my_node_id, DIGEST_LEN))
+    return -1;
+  /* Note that on key-not-found, we go through with this operation anyway,
+   * using "junk_keys". This will result in failed authentication, but won't
+   * leak whether we recognized the key. */
+  keypair_bB = dimap_search(private_keys, onion_skin + DIGEST_LEN,
+                            (void*)junk_keys);
+  if (!keypair_bB)
+    return -1;
+
+  memcpy(s.pubkey_X.public_key, onion_skin+DIGEST_LEN+DIGEST256_LEN,
+         CURVE25519_PUBKEY_LEN);
+
+  /* Make y, Y */
+  curve25519_secret_key_generate(&s.seckey_y, 0);
+  curve25519_public_key_generate(&s.pubkey_Y, &s.seckey_y);
+
+  /* NOTE: If we ever use a group other than curve25519, or a different
+   * representation for its points, we may need to perform different or
+   * additional checks on X here and on Y in the client handshake, or lose our
+   * security properties. What checks we need would depend on the properties
+   * of the group and its representation.
+   *
+   * In short: if you use anything other than curve25519, this aspect of the
+   * code will need to be reconsidered carefully. */
+
+  /* build secret_input */
+  curve25519_handshake(si, &s.seckey_y, &s.pubkey_X);
+  bad = safe_mem_is_zero(si, CURVE25519_OUTPUT_LEN);
+  si += CURVE25519_OUTPUT_LEN;
+  curve25519_handshake(si, &keypair_bB->seckey, &s.pubkey_X);
+  bad |= safe_mem_is_zero(si, CURVE25519_OUTPUT_LEN);
+  si += CURVE25519_OUTPUT_LEN;
+
+  APPEND(si, my_node_id, DIGEST_LEN);
+  APPEND(si, keypair_bB->pubkey.public_key, CURVE25519_PUBKEY_LEN);
+  APPEND(si, s.pubkey_X.public_key, CURVE25519_PUBKEY_LEN);
+  APPEND(si, s.pubkey_Y.public_key, CURVE25519_PUBKEY_LEN);
+  APPEND(si, PROTOID, PROTOID_LEN);
+  tor_assert(si == s.secret_input + sizeof(s.secret_input));
+
+  /* Compute hashes of secret_input */
+  h_tweak(s.verify, s.secret_input, sizeof(s.secret_input), T->t_verify);
+
+  /* Compute auth_input */
+  APPEND(ai, s.verify, DIGEST256_LEN);
+  APPEND(ai, my_node_id, DIGEST_LEN);
+  APPEND(ai, keypair_bB->pubkey.public_key, CURVE25519_PUBKEY_LEN);
+  APPEND(ai, s.pubkey_Y.public_key, CURVE25519_PUBKEY_LEN);
+  APPEND(ai, s.pubkey_X.public_key, CURVE25519_PUBKEY_LEN);
+  APPEND(ai, PROTOID, PROTOID_LEN);
+  APPEND(ai, SERVER_STR, SERVER_STR_LEN);
+  tor_assert(ai == s.auth_input + sizeof(s.auth_input));
+
+  /* Build the reply */
+  memcpy(handshake_reply_out, s.pubkey_Y.public_key, CURVE25519_PUBKEY_LEN);
+  h_tweak(handshake_reply_out+CURVE25519_PUBKEY_LEN,
+          s.auth_input, sizeof(s.auth_input),
+          T->t_mac);
+
+  /* Generate the key material */
+  crypto_expand_key_material_rfc5869_sha256(
+                           s.secret_input, sizeof(s.secret_input),
+                           (const uint8_t*)T->t_key, strlen(T->t_key),
+                           (const uint8_t*)T->m_expand, strlen(T->m_expand),
+                           key_out, key_out_len);
+
+  /* Wipe all of our local state */
+  memwipe(&s, 0, sizeof(s));
+
+  return bad ? -1 : 0;
+}
+
+/**
+ * Perform the final client side of the ntor handshake, using the state in
+ * <b>handshake_state</b> and the server's NTOR_REPLY_LEN-byte reply in
+ * <b>handshake_reply</b>.  Generate <b>key_out_len</b> bytes of key material
+ * in <b>key_out</b>. Return 0 on success, -1 on failure.
+ */
+int
+onion_skin_ntor_client_handshake(
+                             const ntor_handshake_state_t *handshake_state,
+                             const uint8_t *handshake_reply,
+                             uint8_t *key_out,
+                             size_t key_out_len)
+{
+  const tweakset_t *T = &proto1_tweaks;
+  /* Sensitive stack-allocated material. Kept in an anonymous struct to make
+   * it easy to wipe. */
+  struct {
+    curve25519_public_key_t pubkey_Y;
+    uint8_t secret_input[SECRET_INPUT_LEN];
+    uint8_t verify[DIGEST256_LEN];
+    uint8_t auth_input[AUTH_INPUT_LEN];
+    uint8_t auth[DIGEST256_LEN];
+  } s;
+  uint8_t *ai = s.auth_input, *si = s.secret_input;
+  const uint8_t *auth_candidate;
+  int bad;
+
+  /* Decode input */
+  memcpy(s.pubkey_Y.public_key, handshake_reply, CURVE25519_PUBKEY_LEN);
+  auth_candidate = handshake_reply + CURVE25519_PUBKEY_LEN;
+
+  /* See note in server_handshake above about checking points.  The
+   * circumstances under which we'd need to check Y for membership are
+   * different than those under which we'd be checking X. */
+
+  /* Compute secret_input */
+  curve25519_handshake(si, &handshake_state->seckey_x, &s.pubkey_Y);
+  bad = safe_mem_is_zero(si, CURVE25519_OUTPUT_LEN);
+  si += CURVE25519_OUTPUT_LEN;
+  curve25519_handshake(si, &handshake_state->seckey_x,
+                       &handshake_state->pubkey_B);
+  bad |= safe_mem_is_zero(si, CURVE25519_OUTPUT_LEN);
+  si += CURVE25519_OUTPUT_LEN;
+  APPEND(si, handshake_state->router_id, DIGEST_LEN);
+  APPEND(si, handshake_state->pubkey_B.public_key, CURVE25519_PUBKEY_LEN);
+  APPEND(si, handshake_state->pubkey_X.public_key, CURVE25519_PUBKEY_LEN);
+  APPEND(si, s.pubkey_Y.public_key, CURVE25519_PUBKEY_LEN);
+  APPEND(si, PROTOID, PROTOID_LEN);
+  tor_assert(si == s.secret_input + sizeof(s.secret_input));
+
+  /* Compute verify from secret_input */
+  h_tweak(s.verify, s.secret_input, sizeof(s.secret_input), T->t_verify);
+
+  /* Compute auth_input */
+  APPEND(ai, s.verify, DIGEST256_LEN);
+  APPEND(ai, handshake_state->router_id, DIGEST_LEN);
+  APPEND(ai, handshake_state->pubkey_B.public_key, CURVE25519_PUBKEY_LEN);
+  APPEND(ai, s.pubkey_Y.public_key, CURVE25519_PUBKEY_LEN);
+  APPEND(ai, handshake_state->pubkey_X.public_key, CURVE25519_PUBKEY_LEN);
+  APPEND(ai, PROTOID, PROTOID_LEN);
+  APPEND(ai, SERVER_STR, SERVER_STR_LEN);
+  tor_assert(ai == s.auth_input + sizeof(s.auth_input));
+
+  /* Compute auth */
+  h_tweak(s.auth, s.auth_input, sizeof(s.auth_input), T->t_mac);
+
+  bad |= tor_memneq(s.auth, auth_candidate, DIGEST256_LEN);
+
+  crypto_expand_key_material_rfc5869_sha256(
+                           s.secret_input, sizeof(s.secret_input),
+                           (const uint8_t*)T->t_key, strlen(T->t_key),
+                           (const uint8_t*)T->m_expand, strlen(T->m_expand),
+                           key_out, key_out_len);
+
+  memwipe(&s, 0, sizeof(s));
+  return bad ? -1 : 0;
+}
+

+ 63 - 0
src/or/onion_ntor.h

@@ -0,0 +1,63 @@
+/* Copyright (c) 2012, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#ifndef TOR_ONION_NTOR_H
+#define TOR_ONION_NTOR_H
+
+#include "torint.h"
+#include "crypto_curve25519.h"
+#include "di_ops.h"
+
+/** State to be maintained by a client between sending an ntor onionskin
+ * and receiving a reply. */
+typedef struct ntor_handshake_state_t ntor_handshake_state_t;
+
+/** Length of an ntor onionskin, as sent from the client to server. */
+#define NTOR_ONIONSKIN_LEN 84
+/** Length of an ntor reply, as sent from server to client. */
+#define NTOR_REPLY_LEN 64
+
+#ifdef CURVE25519_ENABLED
+void ntor_handshake_state_free(ntor_handshake_state_t *state);
+
+int onion_skin_ntor_create(const uint8_t *router_id,
+                           const curve25519_public_key_t *router_key,
+                           ntor_handshake_state_t **handshake_state_out,
+                           uint8_t *onion_skin_out);
+
+int onion_skin_ntor_server_handshake(const uint8_t *onion_skin,
+                                 const di_digest256_map_t *private_keys,
+                                 const curve25519_keypair_t *junk_keypair,
+                                 const uint8_t *my_node_id,
+                                 uint8_t *handshake_reply_out,
+                                 uint8_t *key_out,
+                                 size_t key_out_len);
+
+int onion_skin_ntor_client_handshake(
+                             const ntor_handshake_state_t *handshake_state,
+                             const uint8_t *handshake_reply,
+                             uint8_t *key_out,
+                             size_t key_out_len);
+
+#ifdef ONION_NTOR_PRIVATE
+
+/** Storage held by a client while waiting for an ntor reply from a server. */
+struct ntor_handshake_state_t {
+  /** Identity digest of the router we're talking to. */
+  uint8_t router_id[DIGEST_LEN];
+  /** Onion key of the router we're talking to. */
+  curve25519_public_key_t pubkey_B;
+
+  /**
+   * Short-lived keypair for use with this handshake.
+   * @{ */
+  curve25519_secret_key_t seckey_x;
+  curve25519_public_key_t pubkey_X;
+  /** @} */
+};
+#endif
+
+#endif
+
+#endif
+

+ 218 - 0
src/or/onion_tap.c

@@ -0,0 +1,218 @@
+/* Copyright (c) 2001 Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2012, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file onion_tap.c
+ * \brief Functions to implement the original Tor circuit extension handshake
+ * (a.k.a TAP).
+ *
+ * We didn't call it "TAP" ourselves -- Ian Goldberg named it in "On the
+ * Security of the Tor Authentication Protocol".  (Spoiler: it's secure, but
+ * its security is kind of fragile and implementation dependent.  Never modify
+ * this implementation without reading and understanding that paper at least.)
+ **/
+
+#include "or.h"
+#include "config.h"
+#include "onion_tap.h"
+#include "rephist.h"
+
+/*----------------------------------------------------------------------*/
+
+/** Given a router's 128 byte public key,
+ * stores the following in onion_skin_out:
+ *   - [42 bytes] OAEP padding
+ *   - [16 bytes] Symmetric key for encrypting blob past RSA
+ *   - [70 bytes] g^x part 1 (inside the RSA)
+ *   - [58 bytes] g^x part 2 (symmetrically encrypted)
+ *
+ * Stores the DH private key into handshake_state_out for later completion
+ * of the handshake.
+ *
+ * The meeting point/cookies and auth are zeroed out for now.
+ */
+int
+onion_skin_TAP_create(crypto_pk_t *dest_router_key,
+                  crypto_dh_t **handshake_state_out,
+                  char *onion_skin_out) /* TAP_ONIONSKIN_CHALLENGE_LEN bytes */
+{
+  char challenge[DH_KEY_LEN];
+  crypto_dh_t *dh = NULL;
+  int dhbytes, pkbytes;
+
+  tor_assert(dest_router_key);
+  tor_assert(handshake_state_out);
+  tor_assert(onion_skin_out);
+  *handshake_state_out = NULL;
+  memset(onion_skin_out, 0, TAP_ONIONSKIN_CHALLENGE_LEN);
+
+  if (!(dh = crypto_dh_new(DH_TYPE_CIRCUIT)))
+    goto err;
+
+  dhbytes = crypto_dh_get_bytes(dh);
+  pkbytes = (int) crypto_pk_keysize(dest_router_key);
+  tor_assert(dhbytes == 128);
+  tor_assert(pkbytes == 128);
+
+  if (crypto_dh_get_public(dh, challenge, dhbytes))
+    goto err;
+
+  note_crypto_pk_op(ENC_ONIONSKIN);
+
+  /* set meeting point, meeting cookie, etc here. Leave zero for now. */
+  if (crypto_pk_public_hybrid_encrypt(dest_router_key, onion_skin_out,
+                                      TAP_ONIONSKIN_CHALLENGE_LEN,
+                                      challenge, DH_KEY_LEN,
+                                      PK_PKCS1_OAEP_PADDING, 1)<0)
+    goto err;
+
+  memwipe(challenge, 0, sizeof(challenge));
+  *handshake_state_out = dh;
+
+  return 0;
+ err:
+  memwipe(challenge, 0, sizeof(challenge));
+  if (dh) crypto_dh_free(dh);
+  return -1;
+}
+
+/** Given an encrypted DH public key as generated by onion_skin_create,
+ * and the private key for this onion router, generate the reply (128-byte
+ * DH plus the first 20 bytes of shared key material), and store the
+ * next key_out_len bytes of key material in key_out.
+ */
+int
+onion_skin_TAP_server_handshake(
+                            /*TAP_ONIONSKIN_CHALLENGE_LEN*/
+                            const char *onion_skin,
+                            crypto_pk_t *private_key,
+                            crypto_pk_t *prev_private_key,
+                            /*TAP_ONIONSKIN_REPLY_LEN*/
+                            char *handshake_reply_out,
+                            char *key_out,
+                            size_t key_out_len)
+{
+  char challenge[TAP_ONIONSKIN_CHALLENGE_LEN];
+  crypto_dh_t *dh = NULL;
+  ssize_t len;
+  char *key_material=NULL;
+  size_t key_material_len=0;
+  int i;
+  crypto_pk_t *k;
+
+  len = -1;
+  for (i=0;i<2;++i) {
+    k = i==0?private_key:prev_private_key;
+    if (!k)
+      break;
+    note_crypto_pk_op(DEC_ONIONSKIN);
+    len = crypto_pk_private_hybrid_decrypt(k, challenge,
+                                           TAP_ONIONSKIN_CHALLENGE_LEN,
+                                           onion_skin,
+                                           TAP_ONIONSKIN_CHALLENGE_LEN,
+                                           PK_PKCS1_OAEP_PADDING,0);
+    if (len>0)
+      break;
+  }
+  if (len<0) {
+    log_info(LD_PROTOCOL,
+             "Couldn't decrypt onionskin: client may be using old onion key");
+    goto err;
+  } else if (len != DH_KEY_LEN) {
+    log_warn(LD_PROTOCOL, "Unexpected onionskin length after decryption: %ld",
+             (long)len);
+    goto err;
+  }
+
+  dh = crypto_dh_new(DH_TYPE_CIRCUIT);
+  if (!dh) {
+    log_warn(LD_BUG, "Couldn't allocate DH key");
+    goto err;
+  }
+  if (crypto_dh_get_public(dh, handshake_reply_out, DH_KEY_LEN)) {
+    log_info(LD_GENERAL, "crypto_dh_get_public failed.");
+    goto err;
+  }
+
+  key_material_len = DIGEST_LEN+key_out_len;
+  key_material = tor_malloc(key_material_len);
+  len = crypto_dh_compute_secret(LOG_PROTOCOL_WARN, dh, challenge,
+                                 DH_KEY_LEN, key_material,
+                                 key_material_len);
+  if (len < 0) {
+    log_info(LD_GENERAL, "crypto_dh_compute_secret failed.");
+    goto err;
+  }
+
+  /* send back H(K|0) as proof that we learned K. */
+  memcpy(handshake_reply_out+DH_KEY_LEN, key_material, DIGEST_LEN);
+
+  /* use the rest of the key material for our shared keys, digests, etc */
+  memcpy(key_out, key_material+DIGEST_LEN, key_out_len);
+
+  memwipe(challenge, 0, sizeof(challenge));
+  memwipe(key_material, 0, key_material_len);
+  tor_free(key_material);
+  crypto_dh_free(dh);
+  return 0;
+ err:
+  memwipe(challenge, 0, sizeof(challenge));
+  if (key_material) {
+    memwipe(key_material, 0, key_material_len);
+    tor_free(key_material);
+  }
+  if (dh) crypto_dh_free(dh);
+
+  return -1;
+}
+
+/** Finish the client side of the DH handshake.
+ * Given the 128 byte DH reply + 20 byte hash as generated by
+ * onion_skin_server_handshake and the handshake state generated by
+ * onion_skin_create, verify H(K) with the first 20 bytes of shared
+ * key material, then generate key_out_len more bytes of shared key
+ * material and store them in key_out.
+ *
+ * After the invocation, call crypto_dh_free on handshake_state.
+ */
+int
+onion_skin_TAP_client_handshake(crypto_dh_t *handshake_state,
+            const char *handshake_reply, /* TAP_ONIONSKIN_REPLY_LEN bytes */
+            char *key_out,
+            size_t key_out_len)
+{
+  ssize_t len;
+  char *key_material=NULL;
+  size_t key_material_len;
+  tor_assert(crypto_dh_get_bytes(handshake_state) == DH_KEY_LEN);
+
+  key_material_len = DIGEST_LEN + key_out_len;
+  key_material = tor_malloc(key_material_len);
+  len = crypto_dh_compute_secret(LOG_PROTOCOL_WARN, handshake_state,
+                                 handshake_reply, DH_KEY_LEN, key_material,
+                                 key_material_len);
+  if (len < 0)
+    goto err;
+
+  if (tor_memneq(key_material, handshake_reply+DH_KEY_LEN, DIGEST_LEN)) {
+    /* H(K) does *not* match. Something fishy. */
+    log_warn(LD_PROTOCOL,"Digest DOES NOT MATCH on onion handshake. "
+             "Bug or attack.");
+    goto err;
+  }
+
+  /* use the rest of the key material for our shared keys, digests, etc */
+  memcpy(key_out, key_material+DIGEST_LEN, key_out_len);
+
+  memwipe(key_material, 0, key_material_len);
+  tor_free(key_material);
+  return 0;
+ err:
+  memwipe(key_material, 0, key_material_len);
+  tor_free(key_material);
+  return -1;
+}
+

+ 37 - 0
src/or/onion_tap.h

@@ -0,0 +1,37 @@
+/* Copyright (c) 2001 Matej Pfajfar.
+ * Copyright (c) 2001-2004, Roger Dingledine.
+ * Copyright (c) 2004-2006, Roger Dingledine, Nick Mathewson.
+ * Copyright (c) 2007-2012, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+/**
+ * \file onion_tap.h
+ * \brief Header file for onion_tap.c.
+ **/
+
+#ifndef TOR_ONION_TAP_H
+#define TOR_ONION_TAP_H
+
+#define TAP_ONIONSKIN_CHALLENGE_LEN (PKCS1_OAEP_PADDING_OVERHEAD+\
+                                 CIPHER_KEY_LEN+\
+                                 DH_KEY_LEN)
+#define TAP_ONIONSKIN_REPLY_LEN (DH_KEY_LEN+DIGEST_LEN)
+
+int onion_skin_TAP_create(crypto_pk_t *router_key,
+                      crypto_dh_t **handshake_state_out,
+                      char *onion_skin_out);
+
+int onion_skin_TAP_server_handshake(const char *onion_skin,
+                                crypto_pk_t *private_key,
+                                crypto_pk_t *prev_private_key,
+                                char *handshake_reply_out,
+                                char *key_out,
+                                size_t key_out_len);
+
+int onion_skin_TAP_client_handshake(crypto_dh_t *handshake_state,
+                                const char *handshake_reply,
+                                char *key_out,
+                                size_t key_out_len);
+
+#endif
+

+ 46 - 18
src/or/or.h

@@ -99,6 +99,7 @@
 #include "compat_libevent.h"
 #include "ht.h"
 #include "replaycache.h"
+#include "crypto_curve25519.h"
 
 /* These signals are defined to help handle_control_signal work.
  */
@@ -279,6 +280,7 @@ typedef enum {
 #define CPUWORKER_STATE_MAX_ 2
 
 #define CPUWORKER_TASK_ONION CPUWORKER_STATE_BUSY_ONION
+#define CPUWORKER_TASK_SHUTDOWN 255
 
 #define OR_CONN_STATE_MIN_ 1
 /** State for a connection to an OR: waiting for connect() to finish. */
@@ -560,6 +562,8 @@ typedef enum {
 #define RELAY_COMMAND_RESOLVE 11
 #define RELAY_COMMAND_RESOLVED 12
 #define RELAY_COMMAND_BEGIN_DIR 13
+#define RELAY_COMMAND_EXTEND2 14
+#define RELAY_COMMAND_EXTENDED2 15
 
 #define RELAY_COMMAND_ESTABLISH_INTRO 32
 #define RELAY_COMMAND_ESTABLISH_RENDEZVOUS 33
@@ -826,6 +830,8 @@ typedef enum {
 #define CELL_VERSIONS 7
 #define CELL_NETINFO 8
 #define CELL_RELAY_EARLY 9
+#define CELL_CREATE2 10
+#define CELL_CREATED2 11
 
 #define CELL_VPADDING 128
 #define CELL_CERTS 129
@@ -1398,6 +1404,7 @@ typedef struct or_connection_t {
 
   or_handshake_state_t *handshake_state; /**< If we are setting this connection
                                           * up, state information to do so. */
+
   time_t timestamp_lastempty; /**< When was the outbuf last completely empty?*/
   time_t timestamp_last_added_nonpadding; /** When did we last add a
                                            * non-padding cell to the outbuf? */
@@ -1929,6 +1936,8 @@ typedef struct {
 
   crypto_pk_t *onion_pkey; /**< Public RSA key for onions. */
   crypto_pk_t *identity_pkey;  /**< Public RSA key for signing. */
+  /** Public curve25519 key for onions */
+  curve25519_public_key_t *onion_curve25519_pkey;
 
   char *platform; /**< What software/operating system is this OR using? */
 
@@ -2052,6 +2061,9 @@ typedef struct routerstatus_t {
   /** True iff this router is a version that allows DATA cells to arrive on
    * a stream before it has sent a CONNECTED cell. */
   unsigned int version_supports_optimistic_data:1;
+  /** True iff this router has a version that allows it to accept EXTEND2
+   * cells */
+  unsigned int version_supports_extend2_cells:1;
 
   unsigned int has_bandwidth:1; /**< The vote/consensus had bw info */
   unsigned int has_exitsummary:1; /**< The vote/consensus had exit summaries */
@@ -2142,6 +2154,8 @@ typedef struct microdesc_t {
 
   /** As routerinfo_t.onion_pkey */
   crypto_pk_t *onion_pkey;
+  /** As routerinfo_t.onion_curve25519_pkey */
+  curve25519_public_key_t *onion_curve25519_pkey;
   /** As routerinfo_t.ipv6_add */
   tor_addr_t ipv6_addr;
   /** As routerinfo_t.ipv6_orport */
@@ -2501,6 +2515,9 @@ typedef struct extend_info_t {
   uint16_t port; /**< OR port. */
   tor_addr_t addr; /**< IP address. */
   crypto_pk_t *onion_key; /**< Current onionskin key. */
+#ifdef CURVE25519_ENABLED
+  curve25519_public_key_t curve25519_onion_key;
+#endif
 } extend_info_t;
 
 /** Certificate for v3 directory protocol: binds long-term authority identity
@@ -2557,6 +2574,20 @@ typedef enum {
 
 #define CRYPT_PATH_MAGIC 0x70127012u
 
+struct fast_handshake_state_t;
+struct ntor_handshake_state_t;
+#define ONION_HANDSHAKE_TYPE_TAP 0x0000
+#define ONION_HANDSHAKE_TYPE_FAST 0x0001
+#define ONION_HANDSHAKE_TYPE_NTOR 0x0002
+typedef struct {
+  uint16_t tag;
+  union {
+    struct fast_handshake_state_t *fast;
+    crypto_dh_t *tap;
+    struct ntor_handshake_state_t *ntor;
+  } u;
+} onion_handshake_state_t;
+
 /** Holds accounting information for a single step in the layered encryption
  * performed by a circuit.  Used only at the client edge of a circuit. */
 typedef struct crypt_path_t {
@@ -2575,17 +2606,15 @@ typedef struct crypt_path_t {
   /** Digest state for cells heading away from the OR at this step. */
   crypto_digest_t *b_digest;
 
-  /** Current state of Diffie-Hellman key negotiation with the OR at this
+  /** Current state of the handshake as performed with the OR at this
    * step. */
-  crypto_dh_t *dh_handshake_state;
-  /** Current state of 'fast' (non-PK) key negotiation with the OR at this
-   * step. Used to save CPU when TLS is already providing all the
-   * authentication, secrecy, and integrity we need, and we're already
-   * distinguishable from an OR.
-   */
-  uint8_t fast_handshake_state[DIGEST_LEN];
+  onion_handshake_state_t handshake_state;
+  /** Diffie-hellman handshake state for performing an introduction
+   * operations */
+  crypto_dh_t *rend_dh_handshake_state;
+
   /** Negotiated key material shared with the OR at this step. */
-  char handshake_digest[DIGEST_LEN];/* KH in tor-spec.txt */
+  char rend_circ_nonce[DIGEST_LEN];/* KH in tor-spec.txt */
 
   /** Information to extend to the OR at this step. */
   extend_info_t *extend_info;
@@ -2626,10 +2655,6 @@ typedef struct {
 #define CPATH_KEY_MATERIAL_LEN (20*2+16*2)
 
 #define DH_KEY_LEN DH_BYTES
-#define ONIONSKIN_CHALLENGE_LEN (PKCS1_OAEP_PADDING_OVERHEAD+\
-                                 CIPHER_KEY_LEN+\
-                                 DH_KEY_LEN)
-#define ONIONSKIN_REPLY_LEN (DH_KEY_LEN+DIGEST_LEN)
 
 /** Information used to build a circuit. */
 typedef struct {
@@ -2661,6 +2686,8 @@ typedef struct {
 #define ORIGIN_CIRCUIT_MAGIC 0x35315243u
 #define OR_CIRCUIT_MAGIC 0x98ABC04Fu
 
+struct create_cell_t;
+
 /**
  * A circuit is a path over the onion routing
  * network. Applications can connect to one end of the circuit, and can
@@ -2735,10 +2762,8 @@ typedef struct circuit_t {
    * more. */
   int deliver_window;
 
-  /** For storage while n_chan is pending
-    * (state CIRCUIT_STATE_CHAN_WAIT). When defined, it is always
-    * length ONIONSKIN_CHALLENGE_LEN. */
-  char *n_chan_onionskin;
+  /** For storage while n_chan is pending (state CIRCUIT_STATE_CHAN_WAIT). */
+  struct create_cell_t *n_chan_create_cell;
 
   /** When did circuit construction actually begin (ie send the
    * CREATE cell or begin cannibalization).
@@ -3026,7 +3051,8 @@ typedef struct or_circuit_t {
   char rend_token[REND_TOKEN_LEN];
 
   /* ???? move to a subtype or adjunct structure? Wastes 20 bytes -NM */
-  char handshake_digest[DIGEST_LEN]; /**< Stores KH for the handshake. */
+  /** Stores KH for the handshake. */
+  char rend_circ_nonce[DIGEST_LEN];/* KH in tor-spec.txt */
 
   /** How many more relay_early cells can we send on this circuit, according
    * to the specification? */
@@ -3877,6 +3903,8 @@ typedef struct {
 
   char *TLSECGroup; /**< One of "P256", "P224", or nil for auto */
 
+  /** Autobool: should we use the ntor handshake if we can? */
+  int UseNTorHandshake;
 } or_options_t;
 
 /** Persistent state for an onion router, as saved to disk. */

+ 21 - 7
src/or/relay.c

@@ -27,6 +27,7 @@
 #include "mempool.h"
 #include "networkstatus.h"
 #include "nodelist.h"
+#include "onion.h"
 #include "policies.h"
 #include "reasons.h"
 #include "relay.h"
@@ -571,6 +572,7 @@ relay_send_command_from_edge(streamid_t stream_id, circuit_t *circ,
     origin_circuit_t *origin_circ = TO_ORIGIN_CIRCUIT(circ);
     if (origin_circ->remaining_relay_early_cells > 0 &&
         (relay_command == RELAY_COMMAND_EXTEND ||
+         relay_command == RELAY_COMMAND_EXTEND2 ||
          cpath_layer != origin_circ->cpath)) {
       /* If we've got any relay_early cells left and (we're sending
        * an extend cell or we're not talking to the first hop), use
@@ -584,7 +586,8 @@ relay_send_command_from_edge(streamid_t stream_id, circuit_t *circ,
        * task 878. */
       origin_circ->relay_early_commands[
           origin_circ->relay_early_cells_sent++] = relay_command;
-    } else if (relay_command == RELAY_COMMAND_EXTEND) {
+    } else if (relay_command == RELAY_COMMAND_EXTEND ||
+               relay_command == RELAY_COMMAND_EXTEND2) {
       /* If no RELAY_EARLY cells can be sent over this circuit, log which
        * commands have been sent as RELAY_EARLY cells before; helps debug
        * task 878. */
@@ -1282,7 +1285,8 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
         connection_mark_and_flush(TO_CONN(conn));
       }
       return 0;
-    case RELAY_COMMAND_EXTEND: {
+    case RELAY_COMMAND_EXTEND:
+    case RELAY_COMMAND_EXTEND2: {
       static uint64_t total_n_extend=0, total_nonearly=0;
       total_n_extend++;
       if (rh.stream_id) {
@@ -1317,17 +1321,27 @@ connection_edge_process_relay_cell(cell_t *cell, circuit_t *circ,
       return circuit_extend(cell, circ);
     }
     case RELAY_COMMAND_EXTENDED:
+    case RELAY_COMMAND_EXTENDED2:
       if (!layer_hint) {
         log_fn(LOG_PROTOCOL_WARN, LD_PROTOCOL,
                "'extended' unsupported at non-origin. Dropping.");
         return 0;
       }
       log_debug(domain,"Got an extended cell! Yay.");
-      if ((reason = circuit_finish_handshake(TO_ORIGIN_CIRCUIT(circ),
-                                       CELL_CREATED,
-                                       cell->payload+RELAY_HEADER_SIZE)) < 0) {
-        log_warn(domain,"circuit_finish_handshake failed.");
-        return reason;
+      {
+        extended_cell_t extended_cell;
+        if (extended_cell_parse(&extended_cell, rh.command,
+                        (const uint8_t*)cell->payload+RELAY_HEADER_SIZE,
+                        rh.length)<0) {
+          log_warn(LD_PROTOCOL,
+                   "Can't parse EXTENDED cell; killing circuit.");
+          return -END_CIRC_REASON_TORPROTOCOL;
+        }
+        if ((reason = circuit_finish_handshake(TO_ORIGIN_CIRCUIT(circ),
+                                         &extended_cell.created_cell)) < 0) {
+          log_warn(domain,"circuit_finish_handshake failed.");
+          return reason;
+        }
       }
       if ((reason=circuit_send_next_onion_skin(TO_ORIGIN_CIRCUIT(circ)))<0) {
         log_info(domain,"circuit_send_next_onion_skin() failed.");

+ 7 - 7
src/or/rendclient.c

@@ -206,12 +206,12 @@ rend_client_send_introduction(origin_circuit_t *introcirc,
     cpath = rendcirc->build_state->pending_final_cpath =
       tor_malloc_zero(sizeof(crypt_path_t));
     cpath->magic = CRYPT_PATH_MAGIC;
-    if (!(cpath->dh_handshake_state = crypto_dh_new(DH_TYPE_REND))) {
+    if (!(cpath->rend_dh_handshake_state = crypto_dh_new(DH_TYPE_REND))) {
       log_warn(LD_BUG, "Internal error: couldn't allocate DH.");
       status = -2;
       goto perm_err;
     }
-    if (crypto_dh_generate_public(cpath->dh_handshake_state)<0) {
+    if (crypto_dh_generate_public(cpath->rend_dh_handshake_state)<0) {
       log_warn(LD_BUG, "Internal error: couldn't generate g^x.");
       status = -2;
       goto perm_err;
@@ -261,7 +261,7 @@ rend_client_send_introduction(origin_circuit_t *introcirc,
     dh_offset = MAX_NICKNAME_LEN+1+REND_COOKIE_LEN;
   }
 
-  if (crypto_dh_get_public(cpath->dh_handshake_state, tmp+dh_offset,
+  if (crypto_dh_get_public(cpath->rend_dh_handshake_state, tmp+dh_offset,
                            DH_KEY_LEN)<0) {
     log_warn(LD_BUG, "Internal error: couldn't extract g^x.");
     status = -2;
@@ -907,9 +907,9 @@ rend_client_receive_rendezvous(origin_circuit_t *circ, const uint8_t *request,
   tor_assert(circ->build_state);
   tor_assert(circ->build_state->pending_final_cpath);
   hop = circ->build_state->pending_final_cpath;
-  tor_assert(hop->dh_handshake_state);
+  tor_assert(hop->rend_dh_handshake_state);
   if (crypto_dh_compute_secret(LOG_PROTOCOL_WARN,
-                               hop->dh_handshake_state, (char*)request,
+                               hop->rend_dh_handshake_state, (char*)request,
                                DH_KEY_LEN,
                                keys, DIGEST_LEN+CPATH_KEY_MATERIAL_LEN)<0) {
     log_warn(LD_GENERAL, "Couldn't complete DH handshake.");
@@ -925,8 +925,8 @@ rend_client_receive_rendezvous(origin_circuit_t *circ, const uint8_t *request,
     goto err;
   }
 
-  crypto_dh_free(hop->dh_handshake_state);
-  hop->dh_handshake_state = NULL;
+  crypto_dh_free(hop->rend_dh_handshake_state);
+  hop->rend_dh_handshake_state = NULL;
 
   /* All is well. Extend the circuit. */
   circuit_change_purpose(TO_CIRCUIT(circ), CIRCUIT_PURPOSE_C_REND_JOINED);

+ 2 - 2
src/or/rendmid.c

@@ -56,8 +56,8 @@ rend_mid_establish_intro(or_circuit_t *circ, const uint8_t *request,
     goto err;
   }
 
-  /* Next 20 bytes: Hash of handshake_digest | "INTRODUCE" */
-  memcpy(buf, circ->handshake_digest, DIGEST_LEN);
+  /* Next 20 bytes: Hash of rend_circ_nonce | "INTRODUCE" */
+  memcpy(buf, circ->rend_circ_nonce, DIGEST_LEN);
   memcpy(buf+DIGEST_LEN, "INTRODUCE", 9);
   if (crypto_digest(expected_digest, buf, DIGEST_LEN+9) < 0) {
     log_warn(LD_BUG, "Internal error computing digest.");

+ 7 - 7
src/or/rendservice.c

@@ -1378,11 +1378,11 @@ rend_service_introduce(origin_circuit_t *circuit, const uint8_t *request,
   cpath->magic = CRYPT_PATH_MAGIC;
   launched->build_state->expiry_time = now + MAX_REND_TIMEOUT;
 
-  cpath->dh_handshake_state = dh;
+  cpath->rend_dh_handshake_state = dh;
   dh = NULL;
   if (circuit_init_cpath_crypto(cpath,keys+DIGEST_LEN,1)<0)
     goto err;
-  memcpy(cpath->handshake_digest, keys, DIGEST_LEN);
+  memcpy(cpath->rend_circ_nonce, keys, DIGEST_LEN);
 
   /* For path bias: This intro circuit was used successfully */
   circuit->path_state = PATH_STATE_USE_SUCCEEDED;
@@ -2486,7 +2486,7 @@ rend_service_intro_has_opened(origin_circuit_t *circuit)
   len = r;
   set_uint16(buf, htons((uint16_t)len));
   len += 2;
-  memcpy(auth, circuit->cpath->prev->handshake_digest, DIGEST_LEN);
+  memcpy(auth, circuit->cpath->prev->rend_circ_nonce, DIGEST_LEN);
   memcpy(auth+DIGEST_LEN, "INTRODUCE", 9);
   if (crypto_digest(buf+len, auth, DIGEST_LEN+9))
     goto err;
@@ -2632,13 +2632,13 @@ rend_service_rendezvous_has_opened(origin_circuit_t *circuit)
 
   /* All we need to do is send a RELAY_RENDEZVOUS1 cell... */
   memcpy(buf, circuit->rend_data->rend_cookie, REND_COOKIE_LEN);
-  if (crypto_dh_get_public(hop->dh_handshake_state,
+  if (crypto_dh_get_public(hop->rend_dh_handshake_state,
                            buf+REND_COOKIE_LEN, DH_KEY_LEN)<0) {
     log_warn(LD_GENERAL,"Couldn't get DH public key.");
     reason = END_CIRC_REASON_INTERNAL;
     goto err;
   }
-  memcpy(buf+REND_COOKIE_LEN+DH_KEY_LEN, hop->handshake_digest,
+  memcpy(buf+REND_COOKIE_LEN+DH_KEY_LEN, hop->rend_circ_nonce,
          DIGEST_LEN);
 
   /* Send the cell */
@@ -2651,8 +2651,8 @@ rend_service_rendezvous_has_opened(origin_circuit_t *circuit)
     goto err;
   }
 
-  crypto_dh_free(hop->dh_handshake_state);
-  hop->dh_handshake_state = NULL;
+  crypto_dh_free(hop->rend_dh_handshake_state);
+  hop->rend_dh_handshake_state = NULL;
 
   /* Append the cpath entry. */
   hop->state = CPATH_STATE_OPEN;

+ 219 - 5
src/or/router.c

@@ -13,6 +13,7 @@
 #include "config.h"
 #include "connection.h"
 #include "control.h"
+#include "crypto_curve25519.h"
 #include "directory.h"
 #include "dirserv.h"
 #include "dns.h"
@@ -54,6 +55,13 @@ static crypto_pk_t *onionkey=NULL;
 /** Previous private onionskin decryption key: used to decode CREATE cells
  * generated by clients that have an older version of our descriptor. */
 static crypto_pk_t *lastonionkey=NULL;
+#ifdef CURVE25519_ENABLED
+/** Current private ntor secret key: used to perform the ntor handshake. */
+static curve25519_keypair_t curve25519_onion_key;
+/** Previous private ntor secret key: used to perform the ntor handshake
+ * with clients that have an older version of our descriptor. */
+static curve25519_keypair_t last_curve25519_onion_key;
+#endif
 /** Private server "identity key": used to sign directory info and TLS
  * certificates. Never changes. */
 static crypto_pk_t *server_identitykey=NULL;
@@ -126,6 +134,55 @@ dup_onion_keys(crypto_pk_t **key, crypto_pk_t **last)
   tor_mutex_release(key_lock);
 }
 
+#ifdef CURVE25519_ENABLED
+/** Return the current secret onion key for the ntor handshake. Must only
+ * be called from the main thread. */
+static const curve25519_keypair_t *
+get_current_curve25519_keypair(void)
+{
+  return &curve25519_onion_key;
+}
+/** Return a map from KEYID (the key itself) to keypairs for use in the ntor
+ * handshake. Must only be called from the main thread. */
+di_digest256_map_t *
+construct_ntor_key_map(void)
+{
+  di_digest256_map_t *m = NULL;
+
+  dimap_add_entry(&m,
+                  curve25519_onion_key.pubkey.public_key,
+                  tor_memdup(&curve25519_onion_key,
+                             sizeof(curve25519_keypair_t)));
+  if (!tor_mem_is_zero((const char*)
+                          last_curve25519_onion_key.pubkey.public_key,
+                       CURVE25519_PUBKEY_LEN)) {
+    dimap_add_entry(&m,
+                    last_curve25519_onion_key.pubkey.public_key,
+                    tor_memdup(&last_curve25519_onion_key,
+                               sizeof(curve25519_keypair_t)));
+  }
+
+  return m;
+}
+/** Helper used to deallocate a di_digest256_map_t returned by
+ * construct_ntor_key_map. */
+static void
+ntor_key_map_free_helper(void *arg)
+{
+  curve25519_keypair_t *k = arg;
+  memwipe(k, 0, sizeof(*k));
+  tor_free(k);
+}
+/** Release all storage from a keymap returned by construct_ntor_key_map. */
+void
+ntor_key_map_free(di_digest256_map_t *map)
+{
+  if (!map)
+    return;
+  dimap_free(map, ntor_key_map_free_helper);
+}
+#endif
+
 /** Return the time when the onion key was last set.  This is either the time
  * when the process launched, or the time of the most recent key rotation since
  * the process launched.
@@ -253,11 +310,18 @@ void
 rotate_onion_key(void)
 {
   char *fname, *fname_prev;
-  crypto_pk_t *prkey;
+  crypto_pk_t *prkey = NULL;
   or_state_t *state = get_or_state();
+#ifdef CURVE25519_ENABLED
+  curve25519_keypair_t new_curve25519_keypair;
+#endif
   time_t now;
   fname = get_datadir_fname2("keys", "secret_onion_key");
   fname_prev = get_datadir_fname2("keys", "secret_onion_key.old");
+  if (file_status(fname) == FN_FILE) {
+    if (replace_file(fname, fname_prev))
+      goto error;
+  }
   if (!(prkey = crypto_pk_new())) {
     log_err(LD_GENERAL,"Error constructing rotated onion key");
     goto error;
@@ -266,19 +330,38 @@ rotate_onion_key(void)
     log_err(LD_BUG,"Error generating onion key");
     goto error;
   }
+  if (crypto_pk_write_private_key_to_filename(prkey, fname)) {
+    log_err(LD_FS,"Couldn't write generated onion key to \"%s\".", fname);
+    goto error;
+  }
+#ifdef CURVE25519_ENABLED
+  tor_free(fname);
+  tor_free(fname_prev);
+  fname = get_datadir_fname2("keys", "secret_onion_key_ntor");
+  fname_prev = get_datadir_fname2("keys", "secret_onion_key_ntor.old");
+  if (curve25519_keypair_generate(&new_curve25519_keypair, 1) < 0)
+    goto error;
   if (file_status(fname) == FN_FILE) {
     if (replace_file(fname, fname_prev))
       goto error;
   }
-  if (crypto_pk_write_private_key_to_filename(prkey, fname)) {
-    log_err(LD_FS,"Couldn't write generated onion key to \"%s\".", fname);
+  if (curve25519_keypair_write_to_file(&new_curve25519_keypair, fname,
+                                       "onion") < 0) {
+    log_err(LD_FS,"Couldn't write curve25519 onion key to \"%s\".",fname);
     goto error;
   }
+#endif
   log_info(LD_GENERAL, "Rotating onion key");
   tor_mutex_acquire(key_lock);
   crypto_pk_free(lastonionkey);
   lastonionkey = onionkey;
   onionkey = prkey;
+#ifdef CURVE25519_ENABLED
+  memcpy(&last_curve25519_onion_key, &curve25519_onion_key,
+         sizeof(curve25519_keypair_t));
+  memcpy(&curve25519_onion_key, &new_curve25519_keypair,
+         sizeof(curve25519_keypair_t));
+#endif
   now = time(NULL);
   state->LastRotatedOnionKey = onionkey_set_at = now;
   tor_mutex_release(key_lock);
@@ -290,6 +373,9 @@ rotate_onion_key(void)
   if (prkey)
     crypto_pk_free(prkey);
  done:
+#ifdef CURVE25519_ENABLED
+  memwipe(&new_curve25519_keypair, 0, sizeof(new_curve25519_keypair));
+#endif
   tor_free(fname);
   tor_free(fname_prev);
 }
@@ -363,6 +449,77 @@ init_key_from_file(const char *fname, int generate, int severity)
   return NULL;
 }
 
+#ifdef CURVE25519_ENABLED
+/** Load a curve25519 keypair from the file <b>fname</b>, writing it into
+ * <b>keys_out</b>.  If the file isn't found and <b>generate</b> is true,
+ * create a new keypair and write it into the file.  If there are errors, log
+ * them at level <b>severity</b>. Generate files using <b>tag</b> in their
+ * ASCII wrapper. */
+static int
+init_curve25519_keypair_from_file(curve25519_keypair_t *keys_out,
+                                  const char *fname,
+                                  int generate,
+                                  int severity,
+                                  const char *tag)
+{
+  switch (file_status(fname)) {
+    case FN_DIR:
+    case FN_ERROR:
+      log(severity, LD_FS,"Can't read key from \"%s\"", fname);
+      goto error;
+    case FN_NOENT:
+      if (generate) {
+        if (!have_lockfile()) {
+          if (try_locking(get_options(), 0)<0) {
+            /* Make sure that --list-fingerprint only creates new keys
+             * if there is no possibility for a deadlock. */
+            log(severity, LD_FS, "Another Tor process has locked \"%s\". Not "
+                "writing any new keys.", fname);
+            /*XXXX The 'other process' might make a key in a second or two;
+             * maybe we should wait for it. */
+            goto error;
+          }
+        }
+        log_info(LD_GENERAL, "No key found in \"%s\"; generating fresh key.",
+                 fname);
+        if (curve25519_keypair_generate(keys_out, 1) < 0)
+          goto error;
+        if (curve25519_keypair_write_to_file(keys_out, fname, tag)<0) {
+          log(severity, LD_FS,
+              "Couldn't write generated key to \"%s\".", fname);
+          memset(keys_out, 0, sizeof(*keys_out));
+          goto error;
+        }
+      } else {
+        log_info(LD_GENERAL, "No key found in \"%s\"", fname);
+      }
+      return 0;
+    case FN_FILE:
+      {
+        char *tag_in=NULL;
+        if (curve25519_keypair_read_from_file(keys_out, &tag_in, fname) < 0) {
+          log(severity, LD_GENERAL,"Error loading private key.");
+          tor_free(tag_in);
+          goto error;
+        }
+        if (!tag_in || strcmp(tag_in, tag)) {
+          log(severity, LD_GENERAL,"Unexpected tag %s on private key.",
+              escaped(tag_in));
+          tor_free(tag_in);
+          goto error;
+        }
+        tor_free(tag_in);
+        return 0;
+      }
+    default:
+      tor_assert(0);
+  }
+
+ error:
+  return -1;
+}
+#endif
+
 /** Try to load the vote-signing private key and certificate for being a v3
  * directory authority, and make sure they match.  If <b>legacy</b>, load a
  * legacy key/cert set for emergency key migration; otherwise load the regular
@@ -641,12 +798,35 @@ init_keys(void)
 
   keydir = get_datadir_fname2("keys", "secret_onion_key.old");
   if (!lastonionkey && file_status(keydir) == FN_FILE) {
-    prkey = init_key_from_file(keydir, 1, LOG_ERR);
+    prkey = init_key_from_file(keydir, 1, LOG_ERR); /* XXXX Why 1? */
     if (prkey)
       lastonionkey = prkey;
   }
   tor_free(keydir);
 
+#ifdef CURVE25519_ENABLED
+  {
+    /* 2b. Load curve25519 onion keys. */
+    int r;
+    keydir = get_datadir_fname2("keys", "secret_onion_key_ntor");
+    r = init_curve25519_keypair_from_file(&curve25519_onion_key,
+                                          keydir, 1, LOG_ERR, "onion");
+    tor_free(keydir);
+    if (r<0)
+      return -1;
+
+    keydir = get_datadir_fname2("keys", "secret_onion_key_ntor.old");
+    if (tor_mem_is_zero((const char *)
+                           last_curve25519_onion_key.pubkey.public_key,
+                        CURVE25519_PUBKEY_LEN) &&
+        file_status(keydir) == FN_FILE) {
+      init_curve25519_keypair_from_file(&last_curve25519_onion_key,
+                                        keydir, 0, LOG_ERR, "onion");
+    }
+    tor_free(keydir);
+  }
+#endif
+
   /* 3. Initialize link key and TLS context. */
   if (router_initialize_tls_context() < 0) {
     log_err(LD_GENERAL,"Error initializing TLS context");
@@ -905,7 +1085,8 @@ extend_info_from_router(const routerinfo_t *r)
 
   router_get_prim_orport(r, &ap);
   return extend_info_new(r->nickname, r->cache_info.identity_digest,
-                           r->onion_pkey, &ap.addr, ap.port);
+                         r->onion_pkey, r->onion_curve25519_pkey,
+                         &ap.addr, ap.port);
 }
 
 /** Some time has passed, or we just got new directory information.
@@ -1432,6 +1613,13 @@ router_digest_is_me(const char *digest)
           tor_memeq(server_identitykey_digest, digest, DIGEST_LEN));
 }
 
+/** Return my identity digest. */
+const uint8_t *
+router_get_my_id_digest(void)
+{
+  return (const uint8_t *)server_identitykey_digest;
+}
+
 /** Return true iff I'm a server and <b>digest</b> is equal to
  * my identity digest. */
 int
@@ -1578,6 +1766,11 @@ router_rebuild_descriptor(int force)
   ri->cache_info.published_on = time(NULL);
   ri->onion_pkey = crypto_pk_dup_key(get_onion_key()); /* must invoke from
                                                         * main thread */
+#ifdef CURVE25519_ENABLED
+  ri->onion_curve25519_pkey =
+    tor_memdup(&get_current_curve25519_keypair()->pubkey,
+               sizeof(curve25519_public_key_t));
+#endif
 
   /* For now, at most one IPv6 or-address is being advertised. */
   {
@@ -2158,6 +2351,22 @@ router_dump_router_to_string(char *s, size_t maxlen, routerinfo_t *router,
     written += result;
   }
 
+#ifdef CURVE25519_ENABLED
+  if (router->onion_curve25519_pkey) {
+    char kbuf[128];
+    base64_encode(kbuf, sizeof(kbuf),
+                  (const char *)router->onion_curve25519_pkey->public_key,
+                  CURVE25519_PUBKEY_LEN);
+    result = tor_snprintf(s+written,maxlen-written, "ntor-onion-key %s",
+                          kbuf);
+    if (result<0) {
+      log_warn(LD_BUG,"descriptor snprintf ran out of room!");
+      return -1;
+    }
+    written += result;
+  }
+#endif
+
   /* Write the exit policy to the end of 's'. */
   if (!router->exit_policy || !smartlist_len(router->exit_policy)) {
     strlcat(s+written, "reject *:*\n", maxlen-written);
@@ -2806,6 +3015,11 @@ router_free_all(void)
   crypto_pk_free(legacy_signing_key);
   authority_cert_free(legacy_key_certificate);
 
+#ifdef CURVE25519_ENABLED
+  memwipe(&curve25519_onion_key, 0, sizeof(curve25519_onion_key));
+  memwipe(&last_curve25519_onion_key, 0, sizeof(last_curve25519_onion_key));
+#endif
+
   if (warned_nonexistent_family) {
     SMARTLIST_FOREACH(warned_nonexistent_family, char *, cp, tor_free(cp));
     smartlist_free(warned_nonexistent_family);

+ 6 - 0
src/or/router.h

@@ -30,6 +30,11 @@ crypto_pk_t *init_key_from_file(const char *fname, int generate,
                                     int severity);
 void v3_authority_check_key_expiry(void);
 
+#ifdef CURVE25519_ENABLED
+di_digest256_map_t *construct_ntor_key_map(void);
+void ntor_key_map_free(di_digest256_map_t *map);
+#endif
+
 int router_initialize_tls_context(void);
 int init_keys(void);
 
@@ -79,6 +84,7 @@ extrainfo_t *router_get_my_extrainfo(void);
 const char *router_get_my_descriptor(void);
 const char *router_get_descriptor_gen_reason(void);
 int router_digest_is_me(const char *digest);
+const uint8_t *router_get_my_id_digest(void);
 int router_extrainfo_digest_is_me(const char *digest);
 int router_is_me(const routerinfo_t *router);
 int router_fingerprint_is_me(const char *fp);

+ 1 - 0
src/or/routerlist.c

@@ -2482,6 +2482,7 @@ routerinfo_free(routerinfo_t *router)
   tor_free(router->contact_info);
   if (router->onion_pkey)
     crypto_pk_free(router->onion_pkey);
+  tor_free(router->onion_curve25519_pkey);
   if (router->identity_pkey)
     crypto_pk_free(router->identity_pkey);
   if (router->declared_family) {

+ 36 - 0
src/or/routerparse.c

@@ -43,6 +43,7 @@ typedef enum {
   K_SIGNED_DIRECTORY,
   K_SIGNING_KEY,
   K_ONION_KEY,
+  K_ONION_KEY_NTOR,
   K_ROUTER_SIGNATURE,
   K_PUBLISHED,
   K_RUNNING_ROUTERS,
@@ -276,6 +277,7 @@ static token_rule_t routerdesc_token_table[] = {
   T01("ipv6-policy",         K_IPV6_POLICY,         CONCAT_ARGS, NO_OBJ),
   T1( "signing-key",         K_SIGNING_KEY,         NO_ARGS, NEED_KEY_1024 ),
   T1( "onion-key",           K_ONION_KEY,           NO_ARGS, NEED_KEY_1024 ),
+  T01("ntor-onion-key",      K_ONION_KEY_NTOR,      GE(1), NO_OBJ ),
   T1_END( "router-signature",    K_ROUTER_SIGNATURE,    NO_ARGS, NEED_OBJ ),
   T1( "published",           K_PUBLISHED,       CONCAT_ARGS, NO_OBJ ),
   T01("uptime",              K_UPTIME,              GE(1),   NO_OBJ ),
@@ -508,6 +510,7 @@ static token_rule_t networkstatus_detached_signature_token_table[] = {
 /** List of tokens recognized in microdescriptors */
 static token_rule_t microdesc_token_table[] = {
   T1_START("onion-key",        K_ONION_KEY,        NO_ARGS,     NEED_KEY_1024),
+  T01("ntor-onion-key",        K_ONION_KEY_NTOR,   GE(1),       NO_OBJ ),
   T0N("a",                     K_A,                GE(1),       NO_OBJ ),
   T01("family",                K_FAMILY,           ARGS,        NO_OBJ ),
   T01("p",                     K_P,                CONCAT_ARGS, NO_OBJ ),
@@ -1284,6 +1287,21 @@ router_parse_entry_from_string(const char *s, const char *end,
   router->onion_pkey = tok->key;
   tok->key = NULL; /* Prevent free */
 
+  if ((tok = find_opt_by_keyword(tokens, K_ONION_KEY_NTOR))) {
+    uint8_t k[CURVE25519_PUBKEY_LEN+32];
+    int r;
+    tor_assert(tok->n_args >= 1);
+    r = base64_decode((char*)k, sizeof(k), tok->args[0], strlen(tok->args[0]));
+    if (r != CURVE25519_PUBKEY_LEN) {
+      log_warn(LD_DIR, "Bogus onion-key-ntor in routerinfo");
+      goto err;
+    }
+    router->onion_curve25519_pkey =
+      tor_malloc(sizeof(curve25519_public_key_t));
+    memcpy(router->onion_curve25519_pkey->public_key,
+           k, CURVE25519_PUBKEY_LEN);
+  }
+
   tok = find_by_keyword(tokens, K_SIGNING_KEY);
   router->identity_pkey = tok->key;
   tok->key = NULL; /* Prevent free */
@@ -1938,6 +1956,8 @@ routerstatus_parse_entry_from_string(memarea_t *area,
         tor_version_supports_microdescriptors(tok->args[0]);
       rs->version_supports_optimistic_data =
         tor_version_as_new_as(tok->args[0], "0.2.3.1-alpha");
+      rs->version_supports_extend2_cells =
+        tor_version_as_new_as(tok->args[0], "0.2.4.7-alpha");
     }
     if (vote_rs) {
       vote_rs->version = tor_strdup(tok->args[0]);
@@ -4243,6 +4263,22 @@ microdescs_parse_from_string(const char *s, const char *eos,
     md->onion_pkey = tok->key;
     tok->key = NULL;
 
+    if ((tok = find_opt_by_keyword(tokens, K_ONION_KEY_NTOR))) {
+      uint8_t k[CURVE25519_PUBKEY_LEN+32];
+      int r;
+      tor_assert(tok->n_args >= 1);
+      r = base64_decode((char*)k, sizeof(k),
+                        tok->args[0], strlen(tok->args[0]));
+      if (r != CURVE25519_PUBKEY_LEN) {
+        log_warn(LD_DIR, "Bogus onion-key-ntor in microdesc");
+        goto next;
+      }
+      md->onion_curve25519_pkey =
+        tor_malloc(sizeof(curve25519_public_key_t));
+      memcpy(md->onion_curve25519_pkey->public_key,
+             k, CURVE25519_PUBKEY_LEN);
+    }
+
     {
       smartlist_t *a_lines = find_all_by_keyword(tokens, K_A);
       if (a_lines) {

+ 142 - 1
src/test/bench.c

@@ -15,17 +15,23 @@ const char tor_git_revision[] = "";
 #include "orconfig.h"
 
 #define RELAY_PRIVATE
+#define CONFIG_PRIVATE
 
 #include "or.h"
+#include "onion_tap.h"
 #include "relay.h"
 #include <openssl/opensslv.h>
 #include <openssl/evp.h>
-#if OPENSSL_VERSION_NUMBER >= OPENSSL_V_SERIES(1,0,0)
 #ifndef OPENSSL_NO_EC
 #include <openssl/ec.h>
 #include <openssl/ecdh.h>
 #include <openssl/obj_mac.h>
 #endif
+
+#include "config.h"
+#ifdef CURVE25519_ENABLED
+#include "crypto_curve25519.h"
+#include "onion_ntor.h"
 #endif
 
 #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_PROCESS_CPUTIME_ID)
@@ -105,6 +111,125 @@ bench_aes(void)
   crypto_cipher_free(c);
 }
 
+static void
+bench_onion_TAP(void)
+{
+  const int iters = 1<<9;
+  int i;
+  crypto_pk_t *key, *key2;
+  uint64_t start, end;
+  char os[TAP_ONIONSKIN_CHALLENGE_LEN];
+  char or[TAP_ONIONSKIN_REPLY_LEN];
+  crypto_dh_t *dh_out;
+
+  key = crypto_pk_new();
+  key2 = crypto_pk_new();
+  crypto_pk_generate_key_with_bits(key, 1024);
+  crypto_pk_generate_key_with_bits(key2, 1024);
+
+  reset_perftime();
+  start = perftime();
+  for (i = 0; i < iters; ++i) {
+    onion_skin_TAP_create(key, &dh_out, os);
+    crypto_dh_free(dh_out);
+  }
+  end = perftime();
+  printf("Client-side, part 1: %f usec.\n", NANOCOUNT(start, end, iters)/1e3);
+
+  onion_skin_TAP_create(key, &dh_out, os);
+  start = perftime();
+  for (i = 0; i < iters; ++i) {
+    char key_out[CPATH_KEY_MATERIAL_LEN];
+    onion_skin_TAP_server_handshake(os, key, NULL, or,
+                                    key_out, sizeof(key_out));
+  }
+  end = perftime();
+  printf("Server-side, key guessed right: %f usec\n",
+         NANOCOUNT(start, end, iters)/1e3);
+
+  start = perftime();
+  for (i = 0; i < iters; ++i) {
+    char key_out[CPATH_KEY_MATERIAL_LEN];
+    onion_skin_TAP_server_handshake(os, key2, key, or,
+                                    key_out, sizeof(key_out));
+  }
+  end = perftime();
+  printf("Server-side, key guessed wrong: %f usec.\n",
+         NANOCOUNT(start, end, iters)/1e3);
+
+  start = perftime();
+  for (i = 0; i < iters; ++i) {
+    crypto_dh_t *dh;
+    char key_out[CPATH_KEY_MATERIAL_LEN];
+    int s;
+    dh = crypto_dh_dup(dh_out);
+    s = onion_skin_TAP_client_handshake(dh, or, key_out, sizeof(key_out));
+    tor_assert(s == 0);
+  }
+  end = perftime();
+  printf("Client-side, part 2: %f usec.\n",
+         NANOCOUNT(start, end, iters)/1e3);
+
+  crypto_pk_free(key);
+}
+
+#ifdef CURVE25519_ENABLED
+static void
+bench_onion_ntor(void)
+{
+  const int iters = 1<<10;
+  int i;
+  curve25519_keypair_t keypair1, keypair2;
+  uint64_t start, end;
+  uint8_t os[NTOR_ONIONSKIN_LEN];
+  uint8_t or[NTOR_REPLY_LEN];
+  ntor_handshake_state_t *state = NULL;
+  uint8_t nodeid[DIGEST_LEN];
+  di_digest256_map_t *keymap = NULL;
+
+  curve25519_secret_key_generate(&keypair1.seckey, 0);
+  curve25519_public_key_generate(&keypair1.pubkey, &keypair1.seckey);
+  curve25519_secret_key_generate(&keypair2.seckey, 0);
+  curve25519_public_key_generate(&keypair2.pubkey, &keypair2.seckey);
+  dimap_add_entry(&keymap, keypair1.pubkey.public_key, &keypair1);
+  dimap_add_entry(&keymap, keypair2.pubkey.public_key, &keypair2);
+
+  reset_perftime();
+  start = perftime();
+  for (i = 0; i < iters; ++i) {
+    onion_skin_ntor_create(nodeid, &keypair1.pubkey, &state, os);
+    ntor_handshake_state_free(state);
+  }
+  end = perftime();
+  printf("Client-side, part 1: %f usec.\n", NANOCOUNT(start, end, iters)/1e3);
+
+  onion_skin_ntor_create(nodeid, &keypair1.pubkey, &state, os);
+  start = perftime();
+  for (i = 0; i < iters; ++i) {
+    uint8_t key_out[CPATH_KEY_MATERIAL_LEN];
+    onion_skin_ntor_server_handshake(os, keymap, NULL, nodeid, or,
+                                key_out, sizeof(key_out));
+  }
+  end = perftime();
+  printf("Server-side: %f usec\n",
+         NANOCOUNT(start, end, iters)/1e3);
+
+  start = perftime();
+  for (i = 0; i < iters; ++i) {
+    uint8_t key_out[CPATH_KEY_MATERIAL_LEN];
+    int s;
+    s = onion_skin_ntor_client_handshake(state, or, key_out, sizeof(key_out));
+    tor_assert(s == 0);
+  }
+  end = perftime();
+  printf("Client-side, part 2: %f usec.\n",
+         NANOCOUNT(start, end, iters)/1e3);
+
+  ntor_handshake_state_free(state);
+  dimap_free(keymap, NULL);
+}
+#endif
+
 static void
 bench_cell_aes(void)
 {
@@ -355,6 +480,10 @@ typedef struct benchmark_t {
 static struct benchmark_t benchmarks[] = {
   ENT(dmap),
   ENT(aes),
+  ENT(onion_TAP),
+#ifdef CURVE25519_ENABLED
+  ENT(onion_ntor),
+#endif
   ENT(cell_aes),
   ENT(cell_ops),
   ENT(dh),
@@ -385,6 +514,8 @@ main(int argc, const char **argv)
   int i;
   int list=0, n_enabled=0;
   benchmark_t *b;
+  char *errmsg;
+  or_options_t *options;
 
   tor_threads_init();
 
@@ -405,6 +536,16 @@ main(int argc, const char **argv)
   reset_perftime();
 
   crypto_seed_rng(1);
+  options = options_new();
+  init_logging();
+  options->command = CMD_RUN_UNITTESTS;
+  options->DataDirectory = tor_strdup("");
+  options_init(options);
+  if (set_options(options, &errmsg) < 0) {
+    printf("Failed to set initial options: %s\n", errmsg);
+    tor_free(errmsg);
+    return 1;
+  }
 
   for (b = benchmarks; b->name; ++b) {
     if (b->enabled || n_enabled == 0) {

+ 17 - 2
src/test/include.am

@@ -36,14 +36,16 @@ src_test_bench_CPPFLAGS= $(src_test_AM_CPPFLAGS)
 
 src_test_test_LDFLAGS = @TOR_LDFLAGS_zlib@ @TOR_LDFLAGS_openssl@ \
         @TOR_LDFLAGS_libevent@
-src_test_test_LDADD = src/or/libtor.a src/common/libor.a src/common/libor-crypto.a \
+src_test_test_LDADD = src/or/libtor.a src/common/libor.a \
+	src/common/libor-crypto.a $(LIBDONNA) \
 	src/common/libor-event.a \
 	@TOR_ZLIB_LIBS@ @TOR_LIB_MATH@ @TOR_LIBEVENT_LIBS@ \
 	@TOR_OPENSSL_LIBS@ @TOR_LIB_WS32@ @TOR_LIB_GDI@
 
 src_test_bench_LDFLAGS = @TOR_LDFLAGS_zlib@ @TOR_LDFLAGS_openssl@ \
         @TOR_LDFLAGS_libevent@
-src_test_bench_LDADD = src/or/libtor.a src/common/libor.a src/common/libor-crypto.a \
+src_test_bench_LDADD = src/or/libtor.a src/common/libor.a \
+	src/common/libor-crypto.a $(LIBDONNA) \
 	src/common/libor-event.a \
 	@TOR_ZLIB_LIBS@ @TOR_LIB_MATH@ @TOR_LIBEVENT_LIBS@ \
 	@TOR_OPENSSL_LIBS@ @TOR_LIB_WS32@ @TOR_LIB_GDI@
@@ -51,3 +53,16 @@ src_test_bench_LDADD = src/or/libtor.a src/common/libor.a src/common/libor-crypt
 noinst_HEADERS+= \
 	src/test/test.h
 
+if CURVE25519_ENABLED
+noinst_PROGRAMS+= src/test/test-ntor-cl
+src_test_test_ntor_cl_SOURCES  = src/test/test_ntor_cl.c
+src_test_test_ntor_cl_LDFLAGS = @TOR_LDFLAGS_zlib@ @TOR_LDFLAGS_openssl@
+src_test_test_ntor_cl_LDADD = src/or/libtor.a src/common/libor.a \
+	src/common/libor-crypto.a $(LIBDONNA) \
+	@TOR_ZLIB_LIBS@ @TOR_LIB_MATH@ \
+	@TOR_OPENSSL_LIBS@ @TOR_LIB_WS32@ @TOR_LIB_GDI@
+src_test_test_ntor_cl_AM_CPPFLAGS =	       \
+	-I"$(top_srcdir)/src/or"
+
+endif
+

+ 387 - 0
src/test/ntor_ref.py

@@ -0,0 +1,387 @@
+# Copyright 2012  The Tor Project, Inc
+# See LICENSE for licensing information
+
+"""
+ntor_ref.py
+
+
+This module is a reference implementation for the "ntor" protocol
+s proposed by Goldberg, Stebila, and Ustaoglu and as instantiated in
+Tor Proposal 216.
+
+It's meant to be used to validate Tor's ntor implementation.  It
+requirs the curve25519 python module from the curve25519-donna
+package.
+
+                *** DO NOT USE THIS IN PRODUCTION. ***
+
+commands:
+
+   gen_kdf_vectors: Print out some test vectors for the RFC5869 KDF.
+   timing: Print a little timing information about this implementation's
+      handshake.
+   self-test: Try handshaking with ourself; make sure we can.
+   test-tor: Handshake with tor's ntor implementation via the program
+      src/test/test-ntor-cl; make sure we can.
+
+"""
+
+import binascii
+import curve25519
+import hashlib
+import hmac
+import subprocess
+
+# **********************************************************************
+# Helpers and constants
+
+def HMAC(key,msg):
+    "Return the HMAC-SHA256 of 'msg' using the key 'key'."
+    H = hmac.new(key, "", hashlib.sha256)
+    H.update(msg)
+    return H.digest()
+
+def H(msg,tweak):
+    """Return the hash of 'msg' using tweak 'tweak'.  (In this version of ntor,
+       the tweaked hash is just HMAC with the tweak as the key.)"""
+    return HMAC(key=tweak,
+                msg=msg)
+
+def keyid(k):
+    """Return the 32-byte key ID of a public key 'k'. (Since we're
+       using curve25519, we let k be its own keyid.)
+    """
+    return k.serialize()
+
+NODE_ID_LENGTH = 20
+KEYID_LENGTH = 32
+G_LENGTH = 32
+H_LENGTH = 32
+
+PROTOID = b"ntor-curve25519-sha256-1"
+M_EXPAND = PROTOID + ":key_expand"
+T_MAC    = PROTOID + ":mac"
+T_KEY    = PROTOID + ":key_extract"
+T_VERIFY = PROTOID + ":verify"
+
+def H_mac(msg): return H(msg, tweak=T_MAC)
+def H_verify(msg): return H(msg, tweak=T_VERIFY)
+
+class PrivateKey(curve25519.keys.Private):
+    """As curve25519.keys.Private, but doesn't regenerate its public key
+       every time you ask for it.
+    """
+    def __init__(self):
+        curve25519.keys.Private.__init__(self)
+        self._memo_public = None
+
+    def get_public(self):
+        if self._memo_public is None:
+            self._memo_public = curve25519.keys.Private.get_public(self)
+
+        return self._memo_public
+
+# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+def kdf_rfc5869(key, salt, info, n):
+
+    prk = HMAC(key=salt, msg=key)
+
+    out = b""
+    last = b""
+    i = 1
+    while len(out) < n:
+        m = last + info + chr(i)
+        last = h = HMAC(key=prk, msg=m)
+        out += h
+        i = i + 1
+    return out[:n]
+
+def kdf_ntor(key, n):
+    return kdf_rfc5869(key, T_KEY, M_EXPAND, n)
+
+# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+def client_part1(node_id, pubkey_B):
+    """Initial handshake, client side.
+
+       From the specification:
+
+         <<To send a create cell, the client generates a keypair x,X =
+           KEYGEN(), and sends a CREATE cell with contents:
+
+           NODEID:     ID             -- ID_LENGTH bytes
+           KEYID:      KEYID(B)       -- H_LENGTH bytes
+           CLIENT_PK:  X              -- G_LENGTH bytes
+         >>
+
+       Takes node_id -- a digest of the server's identity key,
+             pubkey_B -- a public key for the server.
+       Returns a tuple of (client secret key x, client->server message)"""
+
+    assert len(node_id) == NODE_ID_LENGTH
+
+    key_id = keyid(pubkey_B)
+    seckey_x = PrivateKey()
+    pubkey_X = seckey_x.get_public().serialize()
+
+    message = node_id + key_id + pubkey_X
+
+    assert len(message) == NODE_ID_LENGTH + H_LENGTH + H_LENGTH
+    return seckey_x , message
+
+def hash_nil(x):
+    """Identity function: if we don't pass a hash function that does nothing,
+       the curve25519 python lib will try to sha256 it for us."""
+    return x
+
+def bad_result(r):
+    """Helper: given a result of multiplying a public key by a private key,
+       return True iff one of the inputs was broken"""
+    assert len(r) == 32
+    return r == '\x00'*32
+
+def server(seckey_b, my_node_id, message, keyBytes=72):
+    """Handshake step 2, server side.
+
+       From the spec:
+
+       <<
+         The server generates a keypair of y,Y = KEYGEN(), and computes
+
+           secret_input = EXP(X,y) | EXP(X,b) | ID | B | X | Y | PROTOID
+           KEY_SEED = H(secret_input, t_key)
+           verify = H(secret_input, t_verify)
+           auth_input = verify | ID | B | Y | X | PROTOID | "Server"
+
+         The server sends a CREATED cell containing:
+
+           SERVER_PK:  Y                     -- G_LENGTH bytes
+           AUTH:       H(auth_input, t_mac)  -- H_LENGTH byets
+        >>
+
+       Takes seckey_b -- the server's secret key
+             my_node_id -- the servers's public key digest,
+             message -- a message from a client
+             keybytes -- amount of key material to generate
+
+       Returns a tuple of (key material, sever->client reply), or None on
+       error.
+    """
+
+    assert len(message) == NODE_ID_LENGTH + H_LENGTH + H_LENGTH
+
+    if my_node_id != message[:NODE_ID_LENGTH]:
+        return None
+
+    badness = (keyid(seckey_b.get_public()) !=
+               message[NODE_ID_LENGTH:NODE_ID_LENGTH+H_LENGTH])
+
+    pubkey_X = curve25519.keys.Public(message[NODE_ID_LENGTH+H_LENGTH:])
+    seckey_y = PrivateKey()
+    pubkey_Y = seckey_y.get_public()
+    pubkey_B = seckey_b.get_public()
+    xy = seckey_y.get_shared_key(pubkey_X, hash_nil)
+    xb = seckey_b.get_shared_key(pubkey_X, hash_nil)
+
+    # secret_input = EXP(X,y) | EXP(X,b) | ID | B | X | Y | PROTOID
+    secret_input = (xy + xb + my_node_id +
+                    pubkey_B.serialize() +
+                    pubkey_X.serialize() +
+                    pubkey_Y.serialize() +
+                    PROTOID)
+
+    verify = H_verify(secret_input)
+
+    # auth_input = verify | ID | B | Y | X | PROTOID | "Server"
+    auth_input = (verify +
+                  my_node_id +
+                  pubkey_B.serialize() +
+                  pubkey_Y.serialize() +
+                  pubkey_X.serialize() +
+                  PROTOID +
+                  "Server")
+
+    msg = pubkey_Y.serialize() + H_mac(auth_input)
+
+    badness += bad_result(xb)
+    badness += bad_result(xy)
+
+    if badness:
+        return None
+
+    keys = kdf_ntor(secret_input, keyBytes)
+
+    return keys, msg
+
+def client_part2(seckey_x, msg, node_id, pubkey_B, keyBytes=72):
+    """Handshake step 3: client side again.
+
+       From the spec:
+
+       <<
+         The client then checks Y is in G^* [see NOTE below], and computes
+
+         secret_input = EXP(Y,x) | EXP(B,x) | ID | B | X | Y | PROTOID
+         KEY_SEED = H(secret_input, t_key)
+         verify = H(secret_input, t_verify)
+         auth_input = verify | ID | B | Y | X | PROTOID | "Server"
+
+         The client verifies that AUTH == H(auth_input, t_mac).
+       >>
+
+       Takes seckey_x -- the secret key we generated in step 1.
+             msg -- the message from the server.
+             node_id -- the node_id we used in step 1.
+             server_key -- the same public key we used in step 1.
+             keyBytes -- the number of bytes we want to generate
+       Returns key material, or None on error
+
+    """
+    assert len(msg) == G_LENGTH + H_LENGTH
+
+    pubkey_Y = curve25519.keys.Public(msg[:G_LENGTH])
+    their_auth = msg[G_LENGTH:]
+
+    pubkey_X = seckey_x.get_public()
+
+    yx = seckey_x.get_shared_key(pubkey_Y, hash_nil)
+    bx = seckey_x.get_shared_key(pubkey_B, hash_nil)
+
+
+    # secret_input = EXP(Y,x) | EXP(B,x) | ID | B | X | Y | PROTOID
+    secret_input = (yx + bx + node_id +
+                    pubkey_B.serialize() +
+                    pubkey_X.serialize() +
+                    pubkey_Y.serialize() + PROTOID)
+
+    verify = H_verify(secret_input)
+
+    # auth_input = verify | ID | B | Y | X | PROTOID | "Server"
+    auth_input = (verify + node_id +
+                  pubkey_B.serialize() +
+                  pubkey_Y.serialize() +
+                  pubkey_X.serialize() + PROTOID +
+                  "Server")
+
+    my_auth = H_mac(auth_input)
+
+    badness = my_auth != their_auth
+    badness = bad_result(yx) + bad_result(bx)
+
+    if badness:
+        return None
+
+    return kdf_ntor(secret_input, keyBytes)
+
+# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+def demo(node_id="iToldYouAboutStairs.", server_key=PrivateKey()):
+    """
+       Try to handshake with ourself.
+    """
+    x, create = client_part1(node_id, server_key.get_public())
+    skeys, created = server(server_key, node_id, create)
+    ckeys = client_part2(x, created, node_id, server_key.get_public())
+    assert len(skeys) == 72
+    assert len(ckeys) == 72
+    assert skeys == ckeys
+
+# ======================================================================
+def timing():
+    """
+       Use Python's timeit module to see how fast this nonsense is
+    """
+    import timeit
+    t = timeit.Timer(stmt="ntor_ref.demo(N,SK)",
+       setup="import ntor_ref,curve25519;N='ABCD'*5;SK=ntor_ref.PrivateKey()")
+    print t.timeit(number=1000)
+
+# ======================================================================
+
+def kdf_vectors():
+    """
+       Generate some vectors to check our KDF.
+    """
+    import binascii
+    def kdf_vec(inp):
+        k = kdf(inp, T_KEY, M_EXPAND, 100)
+        print repr(inp), "\n\""+ binascii.b2a_hex(k)+ "\""
+    kdf_vec("")
+    kdf_vec("Tor")
+    kdf_vec("AN ALARMING ITEM TO FIND ON YOUR CREDIT-RATING STATEMENT")
+
+# ======================================================================
+
+
+def test_tor():
+    """
+       Call the test-ntor-cl command-line program to make sure we can
+       interoperate with Tor's ntor program
+    """
+    enhex=binascii.b2a_hex
+    dehex=lambda s: binascii.a2b_hex(s.strip())
+
+    PROG = "./src/test/test-ntor-cl"
+    def tor_client1(node_id, pubkey_B):
+        " returns (msg, state) "
+        p = subprocess.Popen([PROG, "client1", enhex(node_id),
+                              enhex(pubkey_B.serialize())],
+                             stdout=subprocess.PIPE)
+        return map(dehex, p.stdout.readlines())
+    def tor_server1(seckey_b, node_id, msg, n):
+        " returns (msg, keys) "
+        p = subprocess.Popen([PROG, "server1", enhex(seckey_b.serialize()),
+                              enhex(node_id), enhex(msg), str(n)],
+                             stdout=subprocess.PIPE)
+        return map(dehex, p.stdout.readlines())
+    def tor_client2(state, msg, n):
+        " returns (keys,) "
+        p = subprocess.Popen([PROG, "client2", enhex(state),
+                              enhex(msg), str(n)],
+                             stdout=subprocess.PIPE)
+        return map(dehex, p.stdout.readlines())
+
+
+    node_id = "thisisatornodeid$#%^"
+    seckey_b = PrivateKey()
+    pubkey_B = seckey_b.get_public()
+
+    # Do a pure-Tor handshake
+    c2s_msg, c_state = tor_client1(node_id, pubkey_B)
+    s2c_msg, s_keys = tor_server1(seckey_b, node_id, c2s_msg, 90)
+    c_keys, = tor_client2(c_state, s2c_msg, 90)
+    assert c_keys == s_keys
+    assert len(c_keys) == 90
+
+    # Try a mixed handshake with Tor as the client
+    c2s_msg, c_state = tor_client1(node_id, pubkey_B)
+    s_keys, s2c_msg = server(seckey_b, node_id, c2s_msg, 90)
+    c_keys, = tor_client2(c_state, s2c_msg, 90)
+    assert c_keys == s_keys
+    assert len(c_keys) == 90
+
+    # Now do a mixed handshake with Tor as the server
+    c_x, c2s_msg = client_part1(node_id, pubkey_B)
+    s2c_msg, s_keys = tor_server1(seckey_b, node_id, c2s_msg, 90)
+    c_keys = client_part2(c_x, s2c_msg, node_id, pubkey_B, 90)
+    assert c_keys == s_keys
+    assert len(c_keys) == 90
+
+    print "We just interoperated."
+
+# ======================================================================
+
+if __name__ == '__main__':
+    import sys
+    if sys.argv[1] == 'gen_kdf_vectors':
+        kdf_vectors()
+    elif sys.argv[1] == 'timing':
+        timing()
+    elif sys.argv[1] == 'self-test':
+        demo()
+    elif sys.argv[1] == 'test-tor':
+        test_tor()
+
+    else:
+        print __doc__

+ 69 - 8
src/test/test.c

@@ -53,10 +53,14 @@ double fabs(double x);
 #include "torgzip.h"
 #include "mempool.h"
 #include "memarea.h"
-#include "onion.h"
+#include "onion_tap.h"
 #include "policies.h"
 #include "rephist.h"
 #include "routerparse.h"
+#ifdef CURVE25519_ENABLED
+#include "crypto_curve25519.h"
+#include "onion_ntor.h"
+#endif
 
 #ifdef USE_DMALLOC
 #include <dmalloc.h>
@@ -815,11 +819,11 @@ test_onion_handshake(void)
 {
   /* client-side */
   crypto_dh_t *c_dh = NULL;
-  char c_buf[ONIONSKIN_CHALLENGE_LEN];
+  char c_buf[TAP_ONIONSKIN_CHALLENGE_LEN];
   char c_keys[40];
 
   /* server-side */
-  char s_buf[ONIONSKIN_REPLY_LEN];
+  char s_buf[TAP_ONIONSKIN_REPLY_LEN];
   char s_keys[40];
 
   /* shared */
@@ -828,18 +832,18 @@ test_onion_handshake(void)
   pk = pk_generate(0);
 
   /* client handshake 1. */
-  memset(c_buf, 0, ONIONSKIN_CHALLENGE_LEN);
-  test_assert(! onion_skin_create(pk, &c_dh, c_buf));
+  memset(c_buf, 0, TAP_ONIONSKIN_CHALLENGE_LEN);
+  test_assert(! onion_skin_TAP_create(pk, &c_dh, c_buf));
 
   /* server handshake */
-  memset(s_buf, 0, ONIONSKIN_REPLY_LEN);
+  memset(s_buf, 0, TAP_ONIONSKIN_REPLY_LEN);
   memset(s_keys, 0, 40);
-  test_assert(! onion_skin_server_handshake(c_buf, pk, NULL,
+  test_assert(! onion_skin_TAP_server_handshake(c_buf, pk, NULL,
                                             s_buf, s_keys, 40));
 
   /* client handshake 2 */
   memset(c_keys, 0, 40);
-  test_assert(! onion_skin_client_handshake(c_dh, s_buf, c_keys, 40));
+  test_assert(! onion_skin_TAP_client_handshake(c_dh, s_buf, c_keys, 40));
 
   if (memcmp(c_keys, s_keys, 40)) {
     puts("Aiiiie");
@@ -856,6 +860,60 @@ test_onion_handshake(void)
     crypto_pk_free(pk);
 }
 
+#ifdef CURVE25519_ENABLED
+static void
+test_ntor_handshake(void *arg)
+{
+  /* client-side */
+  ntor_handshake_state_t *c_state = NULL;
+  uint8_t c_buf[NTOR_ONIONSKIN_LEN];
+  uint8_t c_keys[400];
+
+  /* server-side */
+  di_digest256_map_t *s_keymap=NULL;
+  curve25519_keypair_t s_keypair;
+  uint8_t s_buf[NTOR_REPLY_LEN];
+  uint8_t s_keys[400];
+
+  /* shared */
+  const curve25519_public_key_t *server_pubkey;
+  uint8_t node_id[20] = "abcdefghijklmnopqrst";
+
+  (void) arg;
+
+  /* Make the server some keys */
+  curve25519_secret_key_generate(&s_keypair.seckey, 0);
+  curve25519_public_key_generate(&s_keypair.pubkey, &s_keypair.seckey);
+  dimap_add_entry(&s_keymap, s_keypair.pubkey.public_key, &s_keypair);
+  server_pubkey = &s_keypair.pubkey;
+
+  /* client handshake 1. */
+  memset(c_buf, 0, NTOR_ONIONSKIN_LEN);
+  tt_int_op(0, ==, onion_skin_ntor_create(node_id, server_pubkey,
+                                          &c_state, c_buf));
+
+  /* server handshake */
+  memset(s_buf, 0, NTOR_REPLY_LEN);
+  memset(s_keys, 0, 40);
+  tt_int_op(0, ==, onion_skin_ntor_server_handshake(c_buf, s_keymap, NULL,
+                                                    node_id,
+                                                    s_buf, s_keys, 400));
+
+  /* client handshake 2 */
+  memset(c_keys, 0, 40);
+  tt_int_op(0, ==, onion_skin_ntor_client_handshake(c_state, s_buf,
+                                                    c_keys, 400));
+
+  test_memeq(c_keys, s_keys, 400);
+  memset(s_buf, 0, 40);
+  test_memneq(c_keys, s_buf, 40);
+
+ done:
+  ntor_handshake_state_free(c_state);
+  dimap_free(s_keymap, NULL);
+}
+#endif
+
 static void
 test_circuit_timeout(void)
 {
@@ -1947,6 +2005,9 @@ static struct testcase_t test_array[] = {
   ENT(buffers),
   { "buffer_copy", test_buffer_copy, 0, NULL, NULL },
   ENT(onion_handshake),
+#ifdef CURVE25519_ENABLED
+  { "ntor_handshake", test_ntor_handshake, 0, NULL, NULL },
+#endif
   ENT(circuit_timeout),
   ENT(policies),
   ENT(rend_fns),

+ 502 - 0
src/test/test_cell_formats.c

@@ -9,6 +9,10 @@
 #define RELAY_PRIVATE
 #include "or.h"
 #include "connection_edge.h"
+#include "onion.h"
+#include "onion_tap.h"
+#include "onion_fast.h"
+#include "onion_ntor.h"
 #include "relay.h"
 #include "test.h"
 
@@ -374,6 +378,500 @@ test_cfmt_connected_cells(void *arg)
   tor_free(mem_op_hex_tmp);
 }
 
+static void
+test_cfmt_create_cells(void *arg)
+{
+  uint8_t b[MAX_ONIONSKIN_CHALLENGE_LEN];
+  create_cell_t cc;
+  cell_t cell;
+  cell_t cell2;
+
+  (void)arg;
+
+  /* === Let's try parsing some good cells! */
+
+  /* A valid create cell. */
+  memset(&cell, 0, sizeof(cell));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, TAP_ONIONSKIN_CHALLENGE_LEN);
+  cell.command = CELL_CREATE;
+  memcpy(cell.payload, b, TAP_ONIONSKIN_CHALLENGE_LEN);
+  tt_int_op(0, ==, create_cell_parse(&cc, &cell));
+  tt_int_op(CELL_CREATE, ==, cc.cell_type);
+  tt_int_op(ONION_HANDSHAKE_TYPE_TAP, ==, cc.handshake_type);
+  tt_int_op(TAP_ONIONSKIN_CHALLENGE_LEN, ==, cc.handshake_len);
+  test_memeq(cc.onionskin, b, TAP_ONIONSKIN_CHALLENGE_LEN + 10);
+  tt_int_op(0, ==, create_cell_format(&cell2, &cc));
+  tt_int_op(cell.command, ==, cell2.command);
+  test_memeq(cell.payload, cell2.payload, CELL_PAYLOAD_SIZE);
+
+  /* A valid create_fast cell. */
+  memset(&cell, 0, sizeof(cell));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, CREATE_FAST_LEN);
+  cell.command = CELL_CREATE_FAST;
+  memcpy(cell.payload, b, CREATE_FAST_LEN);
+  tt_int_op(0, ==, create_cell_parse(&cc, &cell));
+  tt_int_op(CELL_CREATE_FAST, ==, cc.cell_type);
+  tt_int_op(ONION_HANDSHAKE_TYPE_FAST, ==, cc.handshake_type);
+  tt_int_op(CREATE_FAST_LEN, ==, cc.handshake_len);
+  test_memeq(cc.onionskin, b, CREATE_FAST_LEN + 10);
+  tt_int_op(0, ==, create_cell_format(&cell2, &cc));
+  tt_int_op(cell.command, ==, cell2.command);
+  test_memeq(cell.payload, cell2.payload, CELL_PAYLOAD_SIZE);
+
+  /* A valid create2 cell with a TAP payload */
+  memset(&cell, 0, sizeof(cell));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, TAP_ONIONSKIN_CHALLENGE_LEN);
+  cell.command = CELL_CREATE2;
+  memcpy(cell.payload, "\x00\x00\x00\xBA", 4); /* TAP, 186 bytes long */
+  memcpy(cell.payload+4, b, TAP_ONIONSKIN_CHALLENGE_LEN);
+  tt_int_op(0, ==, create_cell_parse(&cc, &cell));
+  tt_int_op(CELL_CREATE2, ==, cc.cell_type);
+  tt_int_op(ONION_HANDSHAKE_TYPE_TAP, ==, cc.handshake_type);
+  tt_int_op(TAP_ONIONSKIN_CHALLENGE_LEN, ==, cc.handshake_len);
+  test_memeq(cc.onionskin, b, TAP_ONIONSKIN_CHALLENGE_LEN + 10);
+  tt_int_op(0, ==, create_cell_format(&cell2, &cc));
+  tt_int_op(cell.command, ==, cell2.command);
+  test_memeq(cell.payload, cell2.payload, CELL_PAYLOAD_SIZE);
+
+  /* A valid create2 cell with an ntor payload */
+  memset(&cell, 0, sizeof(cell));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, NTOR_ONIONSKIN_LEN);
+  cell.command = CELL_CREATE2;
+  memcpy(cell.payload, "\x00\x02\x00\x54", 4); /* ntor, 84 bytes long */
+  memcpy(cell.payload+4, b, NTOR_ONIONSKIN_LEN);
+#ifdef CURVE25519_ENABLED
+  tt_int_op(0, ==, create_cell_parse(&cc, &cell));
+  tt_int_op(CELL_CREATE2, ==, cc.cell_type);
+  tt_int_op(ONION_HANDSHAKE_TYPE_NTOR, ==, cc.handshake_type);
+  tt_int_op(NTOR_ONIONSKIN_LEN, ==, cc.handshake_len);
+  test_memeq(cc.onionskin, b, NTOR_ONIONSKIN_LEN + 10);
+  tt_int_op(0, ==, create_cell_format(&cell2, &cc));
+  tt_int_op(cell.command, ==, cell2.command);
+  test_memeq(cell.payload, cell2.payload, CELL_PAYLOAD_SIZE);
+#else
+  tt_int_op(-1, ==, create_cell_parse(&cc, &cell));
+#endif
+
+  /* A valid create cell with an ntor payload, in legacy format. */
+  memset(&cell, 0, sizeof(cell));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, NTOR_ONIONSKIN_LEN);
+  cell.command = CELL_CREATE;
+  memcpy(cell.payload, "ntorNTORntorNTOR", 16);
+  memcpy(cell.payload+16, b, NTOR_ONIONSKIN_LEN);
+#ifdef CURVE25519_ENABLED
+  tt_int_op(0, ==, create_cell_parse(&cc, &cell));
+  tt_int_op(CELL_CREATE, ==, cc.cell_type);
+  tt_int_op(ONION_HANDSHAKE_TYPE_NTOR, ==, cc.handshake_type);
+  tt_int_op(NTOR_ONIONSKIN_LEN, ==, cc.handshake_len);
+  test_memeq(cc.onionskin, b, NTOR_ONIONSKIN_LEN + 10);
+  tt_int_op(0, ==, create_cell_format(&cell2, &cc));
+  tt_int_op(cell.command, ==, cell2.command);
+  test_memeq(cell.payload, cell2.payload, CELL_PAYLOAD_SIZE);
+#else
+  tt_int_op(-1, ==, create_cell_parse(&cc, &cell));
+#endif
+
+  /* == Okay, now let's try to parse some impossible stuff. */
+
+  /* It has to be some kind of a create cell! */
+  cell.command = CELL_CREATED;
+  tt_int_op(-1, ==, create_cell_parse(&cc, &cell));
+
+  /* You can't acutally make an unparseable CREATE or CREATE_FAST cell. */
+
+  /* Try some CREATE2 cells.  First with a bad type. */
+  cell.command = CELL_CREATE2;
+  memcpy(cell.payload, "\x00\x50\x00\x99", 4); /* Type 0x50???? */
+  tt_int_op(-1, ==, create_cell_parse(&cc, &cell));
+  /* Now a good type with an incorrect length. */
+  memcpy(cell.payload, "\x00\x00\x00\xBC", 4); /* TAP, 187 bytes.*/
+  tt_int_op(-1, ==, create_cell_parse(&cc, &cell));
+  /* Now a good type with a ridiculous length. */
+  memcpy(cell.payload, "\x00\x00\x02\x00", 4); /* TAP, 512 bytes.*/
+  tt_int_op(-1, ==, create_cell_parse(&cc, &cell));
+
+  /* == Time to try formatting bad cells.  The important thing is that
+     we reject big lengths, so just check that for now. */
+  cc.handshake_len = 512;
+  tt_int_op(-1, ==, create_cell_format(&cell2, &cc));
+
+  /* == Try formatting a create2 cell we don't understand. XXXX */
+
+ done:
+  ;
+}
+
+static void
+test_cfmt_created_cells(void *arg)
+{
+  uint8_t b[512];
+  created_cell_t cc;
+  cell_t cell;
+  cell_t cell2;
+
+  (void)arg;
+
+  /* A good CREATED cell */
+  memset(&cell, 0, sizeof(cell));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, TAP_ONIONSKIN_REPLY_LEN);
+  cell.command = CELL_CREATED;
+  memcpy(cell.payload, b, TAP_ONIONSKIN_REPLY_LEN);
+  tt_int_op(0, ==, created_cell_parse(&cc, &cell));
+  tt_int_op(CELL_CREATED, ==, cc.cell_type);
+  tt_int_op(TAP_ONIONSKIN_REPLY_LEN, ==, cc.handshake_len);
+  test_memeq(cc.reply, b, TAP_ONIONSKIN_REPLY_LEN + 10);
+  tt_int_op(0, ==, created_cell_format(&cell2, &cc));
+  tt_int_op(cell.command, ==, cell2.command);
+  test_memeq(cell.payload, cell2.payload, CELL_PAYLOAD_SIZE);
+
+  /* A good CREATED_FAST cell */
+  memset(&cell, 0, sizeof(cell));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, CREATED_FAST_LEN);
+  cell.command = CELL_CREATED_FAST;
+  memcpy(cell.payload, b, CREATED_FAST_LEN);
+  tt_int_op(0, ==, created_cell_parse(&cc, &cell));
+  tt_int_op(CELL_CREATED_FAST, ==, cc.cell_type);
+  tt_int_op(CREATED_FAST_LEN, ==, cc.handshake_len);
+  test_memeq(cc.reply, b, CREATED_FAST_LEN + 10);
+  tt_int_op(0, ==, created_cell_format(&cell2, &cc));
+  tt_int_op(cell.command, ==, cell2.command);
+  test_memeq(cell.payload, cell2.payload, CELL_PAYLOAD_SIZE);
+
+  /* A good CREATED2 cell with short reply */
+  memset(&cell, 0, sizeof(cell));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, 64);
+  cell.command = CELL_CREATED2;
+  memcpy(cell.payload, "\x00\x40", 2);
+  memcpy(cell.payload+2, b, 64);
+  tt_int_op(0, ==, created_cell_parse(&cc, &cell));
+  tt_int_op(CELL_CREATED2, ==, cc.cell_type);
+  tt_int_op(64, ==, cc.handshake_len);
+  test_memeq(cc.reply, b, 80);
+  tt_int_op(0, ==, created_cell_format(&cell2, &cc));
+  tt_int_op(cell.command, ==, cell2.command);
+  test_memeq(cell.payload, cell2.payload, CELL_PAYLOAD_SIZE);
+
+  /* A good CREATED2 cell with maximal reply */
+  memset(&cell, 0, sizeof(cell));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, 496);
+  cell.command = CELL_CREATED2;
+  memcpy(cell.payload, "\x01\xF0", 2);
+  memcpy(cell.payload+2, b, 496);
+  tt_int_op(0, ==, created_cell_parse(&cc, &cell));
+  tt_int_op(CELL_CREATED2, ==, cc.cell_type);
+  tt_int_op(496, ==, cc.handshake_len);
+  test_memeq(cc.reply, b, 496);
+  tt_int_op(0, ==, created_cell_format(&cell2, &cc));
+  tt_int_op(cell.command, ==, cell2.command);
+  test_memeq(cell.payload, cell2.payload, CELL_PAYLOAD_SIZE);
+
+  /* Bogus CREATED2 cell: too long! */
+  memset(&cell, 0, sizeof(cell));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, 496);
+  cell.command = CELL_CREATED2;
+  memcpy(cell.payload, "\x01\xF1", 2);
+  tt_int_op(-1, ==, created_cell_parse(&cc, &cell));
+
+  /* Unformattable CREATED2 cell: too long! */
+  cc.handshake_len = 497;
+  tt_int_op(-1, ==, created_cell_format(&cell2, &cc));
+
+ done:
+  ;
+}
+
+static void
+test_cfmt_extend_cells(void *arg)
+{
+  cell_t cell;
+  uint8_t b[512];
+  extend_cell_t ec;
+  create_cell_t *cc = &ec.create_cell;
+  uint8_t p[RELAY_PAYLOAD_SIZE];
+  uint8_t p2[RELAY_PAYLOAD_SIZE];
+  uint8_t p2_cmd;
+  uint16_t p2_len;
+  char *mem_op_hex_tmp = NULL;
+
+  (void) arg;
+
+  /* Let's start with a simple EXTEND cell. */
+  memset(p, 0, sizeof(p));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, TAP_ONIONSKIN_CHALLENGE_LEN);
+  memcpy(p, "\x12\xf4\x00\x01\x01\x02", 6); /* 18 244 0 1 : 258 */
+  memcpy(p+6,b,TAP_ONIONSKIN_CHALLENGE_LEN);
+  memcpy(p+6+TAP_ONIONSKIN_CHALLENGE_LEN, "electroencephalogram", 20);
+  tt_int_op(0, ==, extend_cell_parse(&ec, RELAY_COMMAND_EXTEND,
+                                     p, 26+TAP_ONIONSKIN_CHALLENGE_LEN));
+  tt_int_op(RELAY_COMMAND_EXTEND, ==, ec.cell_type);
+  tt_str_op("18.244.0.1", ==, fmt_addr(&ec.orport_ipv4.addr));
+  tt_int_op(258, ==, ec.orport_ipv4.port);
+  tt_int_op(AF_UNSPEC, ==, tor_addr_family(&ec.orport_ipv6.addr));
+  test_memeq(ec.node_id, "electroencephalogram", 20);
+  tt_int_op(cc->cell_type, ==, CELL_CREATE);
+  tt_int_op(cc->handshake_type, ==, ONION_HANDSHAKE_TYPE_TAP);
+  tt_int_op(cc->handshake_len, ==, TAP_ONIONSKIN_CHALLENGE_LEN);
+  test_memeq(cc->onionskin, b, TAP_ONIONSKIN_CHALLENGE_LEN+20);
+  tt_int_op(0, ==, extend_cell_format(&p2_cmd, &p2_len, p2, &ec));
+  tt_int_op(p2_cmd, ==, RELAY_COMMAND_EXTEND);
+  tt_int_op(p2_len, ==, 26+TAP_ONIONSKIN_CHALLENGE_LEN);
+  test_memeq(p2, p, RELAY_PAYLOAD_SIZE);
+
+  /* Let's do an ntor stuffed in a legacy EXTEND cell */
+  memset(p, 0, sizeof(p));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, NTOR_ONIONSKIN_LEN);
+  memcpy(p, "\x12\xf4\x00\x01\x01\x02", 6); /* 18 244 0 1 : 258 */
+  memcpy(p+6,"ntorNTORntorNTOR", 16);
+  memcpy(p+22, b, NTOR_ONIONSKIN_LEN);
+  memcpy(p+6+TAP_ONIONSKIN_CHALLENGE_LEN, "electroencephalogram", 20);
+  tt_int_op(0, ==, extend_cell_parse(&ec, RELAY_COMMAND_EXTEND,
+                                     p, 26+TAP_ONIONSKIN_CHALLENGE_LEN));
+  tt_int_op(RELAY_COMMAND_EXTEND, ==, ec.cell_type);
+  tt_str_op("18.244.0.1", ==, fmt_addr(&ec.orport_ipv4.addr));
+  tt_int_op(258, ==, ec.orport_ipv4.port);
+  tt_int_op(AF_UNSPEC, ==, tor_addr_family(&ec.orport_ipv6.addr));
+  test_memeq(ec.node_id, "electroencephalogram", 20);
+  tt_int_op(cc->cell_type, ==, CELL_CREATE2);
+  tt_int_op(cc->handshake_type, ==, ONION_HANDSHAKE_TYPE_NTOR);
+  tt_int_op(cc->handshake_len, ==, NTOR_ONIONSKIN_LEN);
+  test_memeq(cc->onionskin, b, NTOR_ONIONSKIN_LEN+20);
+  tt_int_op(0, ==, extend_cell_format(&p2_cmd, &p2_len, p2, &ec));
+  tt_int_op(p2_cmd, ==, RELAY_COMMAND_EXTEND);
+  tt_int_op(p2_len, ==, 26+TAP_ONIONSKIN_CHALLENGE_LEN);
+  test_memeq(p2, p, RELAY_PAYLOAD_SIZE);
+  tt_int_op(0, ==, create_cell_format_relayed(&cell, cc));
+
+  /* Now let's do a minimal ntor EXTEND2 cell. */
+  memset(&ec, 0xff, sizeof(ec));
+  memset(p, 0, sizeof(p));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, NTOR_ONIONSKIN_LEN);
+  /* 2 items; one 18.244.0.1:61681 */
+  memcpy(p, "\x02\x00\x06\x12\xf4\x00\x01\xf0\xf1", 9);
+  /* The other is a digest. */
+  memcpy(p+9, "\x02\x14" "anarchoindividualist", 22);
+  /* Prep for the handshake: type and length */
+  memcpy(p+31, "\x00\x02\x00\x54", 4);
+  memcpy(p+35, b, NTOR_ONIONSKIN_LEN);
+  tt_int_op(0, ==, extend_cell_parse(&ec, RELAY_COMMAND_EXTEND2,
+                                     p, 35+NTOR_ONIONSKIN_LEN));
+  tt_int_op(RELAY_COMMAND_EXTEND2, ==, ec.cell_type);
+  tt_str_op("18.244.0.1", ==, fmt_addr(&ec.orport_ipv4.addr));
+  tt_int_op(61681, ==, ec.orport_ipv4.port);
+  tt_int_op(AF_UNSPEC, ==, tor_addr_family(&ec.orport_ipv6.addr));
+  test_memeq(ec.node_id, "anarchoindividualist", 20);
+  tt_int_op(cc->cell_type, ==, CELL_CREATE2);
+  tt_int_op(cc->handshake_type, ==, ONION_HANDSHAKE_TYPE_NTOR);
+  tt_int_op(cc->handshake_len, ==, NTOR_ONIONSKIN_LEN);
+  test_memeq(cc->onionskin, b, NTOR_ONIONSKIN_LEN+20);
+  tt_int_op(0, ==, extend_cell_format(&p2_cmd, &p2_len, p2, &ec));
+  tt_int_op(p2_cmd, ==, RELAY_COMMAND_EXTEND2);
+  tt_int_op(p2_len, ==, 35+NTOR_ONIONSKIN_LEN);
+  test_memeq(p2, p, RELAY_PAYLOAD_SIZE);
+
+  /* Now let's do a fanciful EXTEND2 cell. */
+  memset(&ec, 0xff, sizeof(ec));
+  memset(p, 0, sizeof(p));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, 99);
+  /* 4 items; one 18 244 0 1 61681 */
+  memcpy(p, "\x04\x00\x06\x12\xf4\x00\x01\xf0\xf1", 9);
+  /* One is a digest. */
+  memcpy(p+9, "\x02\x14" "anthropomorphization", 22);
+  /* One is an ipv6 address */
+  memcpy(p+31, "\x01\x12\x20\x02\x00\x00\x00\x00\x00\x00"
+               "\x00\x00\x00\x00\x00\xf0\xc5\x1e\x11\x12", 20);
+  /* One is the Konami code. */
+  memcpy(p+51, "\xf0\x20upupdowndownleftrightleftrightba", 34);
+  /* Prep for the handshake: weird type and length */
+  memcpy(p+85, "\x01\x05\x00\x63", 4);
+  memcpy(p+89, b, 99);
+  tt_int_op(0, ==, extend_cell_parse(&ec, RELAY_COMMAND_EXTEND2, p, 89+99));
+  tt_int_op(RELAY_COMMAND_EXTEND2, ==, ec.cell_type);
+  tt_str_op("18.244.0.1", ==, fmt_addr(&ec.orport_ipv4.addr));
+  tt_int_op(61681, ==, ec.orport_ipv4.port);
+  tt_str_op("2002::f0:c51e", ==, fmt_addr(&ec.orport_ipv6.addr));
+  tt_int_op(4370, ==, ec.orport_ipv6.port);
+  test_memeq(ec.node_id, "anthropomorphization", 20);
+  tt_int_op(cc->cell_type, ==, CELL_CREATE2);
+  tt_int_op(cc->handshake_type, ==, 0x105);
+  tt_int_op(cc->handshake_len, ==, 99);
+  test_memeq(cc->onionskin, b, 99+20);
+  tt_int_op(0, ==, extend_cell_format(&p2_cmd, &p2_len, p2, &ec));
+  tt_int_op(p2_cmd, ==, RELAY_COMMAND_EXTEND2);
+  /* We'll generate it minus the IPv6 address and minus the konami code */
+  tt_int_op(p2_len, ==, 89+99-34-20);
+  test_memeq_hex(p2,
+                 /* Two items: one that same darn IP address. */
+                 "02000612F40001F0F1"
+                 /* The next is a digest : anthropomorphization */
+                 "0214616e7468726f706f6d6f727068697a6174696f6e"
+                 /* Now the handshake prologue */
+                 "01050063");
+  test_memeq(p2+1+8+22+4, b, 99+20);
+  tt_int_op(0, ==, create_cell_format_relayed(&cell, cc));
+
+  /* == Now try parsing some junk */
+
+  /* Try a too-long handshake */
+  memset(p, 0, sizeof(p));
+  memcpy(p, "\x02\x00\x06\x12\xf4\x00\x01\xf0\xf1", 9);
+  memcpy(p+9, "\x02\x14" "anarchoindividualist", 22);
+  memcpy(p+31, "\xff\xff\x01\xd0", 4);
+  tt_int_op(-1, ==, extend_cell_parse(&ec, RELAY_COMMAND_EXTEND2,
+                                      p, sizeof(p)));
+
+  /* Try two identities. */
+  memset(p, 0, sizeof(p));
+  memcpy(p, "\x03\x00\x06\x12\xf4\x00\x01\xf0\xf1", 9);
+  memcpy(p+9, "\x02\x14" "anarchoindividualist", 22);
+  memcpy(p+31, "\x02\x14" "autodepolymerization", 22);
+  memcpy(p+53, "\xff\xff\x00\x10", 4);
+  tt_int_op(-1, ==, extend_cell_parse(&ec, RELAY_COMMAND_EXTEND2,
+                                      p, sizeof(p)));
+
+  /* No identities. */
+  memset(p, 0, sizeof(p));
+  memcpy(p, "\x01\x00\x06\x12\xf4\x00\x01\xf0\xf1", 9);
+  memcpy(p+53, "\xff\xff\x00\x10", 4);
+  tt_int_op(-1, ==, extend_cell_parse(&ec, RELAY_COMMAND_EXTEND2,
+                                      p, sizeof(p)));
+
+  /* Try a bad IPv4 address (too long, too short)*/
+  memset(p, 0, sizeof(p));
+  memcpy(p, "\x02\x00\x07\x12\xf4\x00\x01\xf0\xf1\xff", 10);
+  memcpy(p+10, "\x02\x14" "anarchoindividualist", 22);
+  memcpy(p+32, "\xff\xff\x00\x10", 4);
+  tt_int_op(-1, ==, extend_cell_parse(&ec, RELAY_COMMAND_EXTEND2,
+                                      p, sizeof(p)));
+  memset(p, 0, sizeof(p));
+  memcpy(p, "\x02\x00\x05\x12\xf4\x00\x01\xf0", 8);
+  memcpy(p+8, "\x02\x14" "anarchoindividualist", 22);
+  memcpy(p+30, "\xff\xff\x00\x10", 4);
+  tt_int_op(-1, ==, extend_cell_parse(&ec, RELAY_COMMAND_EXTEND2,
+                                      p, sizeof(p)));
+
+  /* IPv6 address (too long, too short, no IPv4)*/
+  memset(p, 0, sizeof(p));
+  memcpy(p, "\x03\x00\x06\x12\xf4\x00\x01\xf0\xf1", 9);
+  memcpy(p+9, "\x02\x14" "anarchoindividualist", 22);
+  memcpy(p+31, "\x01\x13" "xxxxxxxxxxxxxxxxYYZ", 19);
+  memcpy(p+50, "\xff\xff\x00\x20", 4);
+  tt_int_op(-1, ==, extend_cell_parse(&ec, RELAY_COMMAND_EXTEND2,
+                                      p, sizeof(p)));
+  memset(p, 0, sizeof(p));
+  memcpy(p, "\x03\x00\x06\x12\xf4\x00\x01\xf0\xf1", 9);
+  memcpy(p+9, "\x02\x14" "anarchoindividualist", 22);
+  memcpy(p+31, "\x01\x11" "xxxxxxxxxxxxxxxxY", 17);
+  memcpy(p+48, "\xff\xff\x00\x20", 4);
+  tt_int_op(-1, ==, extend_cell_parse(&ec, RELAY_COMMAND_EXTEND2,
+                                      p, sizeof(p)));
+  memset(p, 0, sizeof(p));
+  memcpy(p, "\x02", 1);
+  memcpy(p+1, "\x02\x14" "anarchoindividualist", 22);
+  memcpy(p+23, "\x01\x12" "xxxxxxxxxxxxxxxxYY", 18);
+  memcpy(p+41, "\xff\xff\x00\x20", 4);
+  tt_int_op(-1, ==, extend_cell_parse(&ec, RELAY_COMMAND_EXTEND2,
+                                      p, sizeof(p)));
+
+  /* Running out of space in specifiers  */
+  memset(p,0,sizeof(p));
+  memcpy(p, "\x05\x0a\xff", 3);
+  memcpy(p+3+255, "\x0a\xff", 2);
+  tt_int_op(-1, ==, extend_cell_parse(&ec, RELAY_COMMAND_EXTEND2,
+                                      p, sizeof(p)));
+
+  /* Fuzz, because why not. */
+  memset(&ec, 0xff, sizeof(ec));
+  {
+    int i;
+    memset(p, 0, sizeof(p));
+    for (i = 0; i < 10000; ++i) {
+      int n = crypto_rand_int(sizeof(p));
+      crypto_rand((char *)p, n);
+      extend_cell_parse(&ec, RELAY_COMMAND_EXTEND2, p, n);
+    }
+  }
+
+ done:
+  tor_free(mem_op_hex_tmp);
+}
+
+static void
+test_cfmt_extended_cells(void *arg)
+{
+  uint8_t b[512];
+  extended_cell_t ec;
+  created_cell_t *cc = &ec.created_cell;
+  uint8_t p[RELAY_PAYLOAD_SIZE];
+  uint8_t p2[RELAY_PAYLOAD_SIZE];
+  uint8_t p2_cmd;
+  uint16_t p2_len;
+  char *mem_op_hex_tmp = NULL;
+
+  (void) arg;
+
+  /* Try a regular EXTENDED cell. */
+  memset(&ec, 0xff, sizeof(ec));
+  memset(p, 0, sizeof(p));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, TAP_ONIONSKIN_REPLY_LEN);
+  memcpy(p,b,TAP_ONIONSKIN_REPLY_LEN);
+  tt_int_op(0, ==, extended_cell_parse(&ec, RELAY_COMMAND_EXTENDED, p,
+                                       TAP_ONIONSKIN_REPLY_LEN));
+  tt_int_op(RELAY_COMMAND_EXTENDED, ==, ec.cell_type);
+  tt_int_op(cc->cell_type, ==, CELL_CREATED);
+  tt_int_op(cc->handshake_len, ==, TAP_ONIONSKIN_REPLY_LEN);
+  test_memeq(cc->reply, b, TAP_ONIONSKIN_REPLY_LEN);
+  tt_int_op(0, ==, extended_cell_format(&p2_cmd, &p2_len, p2, &ec));
+  tt_int_op(RELAY_COMMAND_EXTENDED, ==, p2_cmd);
+  tt_int_op(TAP_ONIONSKIN_REPLY_LEN, ==, p2_len);
+  test_memeq(p2, p, sizeof(p2));
+
+  /* Try an EXTENDED2 cell */
+  memset(&ec, 0xff, sizeof(ec));
+  memset(p, 0, sizeof(p));
+  memset(b, 0, sizeof(b));
+  crypto_rand((char*)b, 42);
+  memcpy(p,"\x00\x2a",2);
+  memcpy(p+2,b,42);
+  tt_int_op(0, ==, extended_cell_parse(&ec, RELAY_COMMAND_EXTENDED2, p, 2+42));
+  tt_int_op(RELAY_COMMAND_EXTENDED2, ==, ec.cell_type);
+  tt_int_op(cc->cell_type, ==, CELL_CREATED2);
+  tt_int_op(cc->handshake_len, ==, 42);
+  test_memeq(cc->reply, b, 42+10);
+  tt_int_op(0, ==, extended_cell_format(&p2_cmd, &p2_len, p2, &ec));
+  tt_int_op(RELAY_COMMAND_EXTENDED2, ==, p2_cmd);
+  tt_int_op(2+42, ==, p2_len);
+  test_memeq(p2, p, sizeof(p2));
+
+  /* Try an almost-too-long EXTENDED2 cell */
+  memcpy(p, "\x01\xf0", 2);
+  tt_int_op(0, ==,
+            extended_cell_parse(&ec, RELAY_COMMAND_EXTENDED2, p, sizeof(p)));
+
+  /* Now try a too-long extended2 cell. That's the only misparse I can think
+   * of. */
+  memcpy(p, "\x01\xf1", 2);
+  tt_int_op(-1, ==,
+            extended_cell_parse(&ec, RELAY_COMMAND_EXTENDED2, p, sizeof(p)));
+
+ done:
+  tor_free(mem_op_hex_tmp);
+}
+
 #define TEST(name, flags)                                               \
   { #name, test_cfmt_ ## name, flags, 0, NULL }
 
@@ -381,6 +879,10 @@ struct testcase_t cell_format_tests[] = {
   TEST(relay_header, 0),
   TEST(begin_cells, 0),
   TEST(connected_cells, 0),
+  TEST(create_cells, 0),
+  TEST(created_cells, 0),
+  TEST(extend_cells, 0),
+  TEST(extended_cells, 0),
   END_OF_TESTCASES
 };
 

+ 45 - 0
src/test/test_containers.c

@@ -782,6 +782,50 @@ test_container_order_functions(void)
   ;
 }
 
+static void
+test_di_map(void *arg)
+{
+  di_digest256_map_t *map = NULL;
+  const uint8_t key1[] = "In view of the fact that it was ";
+  const uint8_t key2[] = "superficially convincing, being ";
+  const uint8_t key3[] = "properly enciphered in a one-tim";
+  const uint8_t key4[] = "e cipher scheduled for use today";
+  char *v1 = tor_strdup(", it came close to causing a disaster...");
+  char *v2 = tor_strdup("I regret to have to advise you that the mission");
+  char *v3 = tor_strdup("was actually initiated...");
+  /* -- John Brunner, _The Shockwave Rider_ */
+
+  (void)arg;
+
+  /* Try searching on an empty map. */
+  tt_ptr_op(NULL, ==, dimap_search(map, key1, NULL));
+  tt_ptr_op(NULL, ==, dimap_search(map, key2, NULL));
+  tt_ptr_op(v3, ==, dimap_search(map, key2, v3));
+  dimap_free(map, NULL);
+  map = NULL;
+
+  /* Add a single entry. */
+  dimap_add_entry(&map, key1, v1);
+  tt_ptr_op(NULL, ==, dimap_search(map, key2, NULL));
+  tt_ptr_op(v3, ==, dimap_search(map, key2, v3));
+  tt_ptr_op(v1, ==, dimap_search(map, key1, NULL));
+
+  /* Now try it with three entries in the map. */
+  dimap_add_entry(&map, key2, v2);
+  dimap_add_entry(&map, key3, v3);
+  tt_ptr_op(v1, ==, dimap_search(map, key1, NULL));
+  tt_ptr_op(v2, ==, dimap_search(map, key2, NULL));
+  tt_ptr_op(v3, ==, dimap_search(map, key3, NULL));
+  tt_ptr_op(NULL, ==, dimap_search(map, key4, NULL));
+  tt_ptr_op(v1, ==, dimap_search(map, key4, v1));
+
+ done:
+  tor_free(v1);
+  tor_free(v2);
+  tor_free(v3);
+  dimap_free(map, NULL);
+}
+
 #define CONTAINER_LEGACY(name)                                          \
   { #name, legacy_test_helper, 0, &legacy_setup, test_container_ ## name }
 
@@ -796,6 +840,7 @@ struct testcase_t container_tests[] = {
   CONTAINER_LEGACY(strmap),
   CONTAINER_LEGACY(pqueue),
   CONTAINER_LEGACY(order_functions),
+  { "di_map", test_di_map, 0, NULL, NULL },
   END_OF_TESTCASES
 };
 

+ 181 - 0
src/test/test_crypto.c

@@ -5,9 +5,13 @@
 
 #include "orconfig.h"
 #define CRYPTO_PRIVATE
+#define CRYPTO_CURVE25519_PRIVATE
 #include "or.h"
 #include "test.h"
 #include "aes.h"
+#ifdef CURVE25519_ENABLED
+#include "crypto_curve25519.h"
+#endif
 
 /** Run unit tests for Diffie-Hellman functionality. */
 static void
@@ -832,6 +836,177 @@ test_crypto_base32_decode(void)
   ;
 }
 
+static void
+test_crypto_kdf_TAP(void *arg)
+{
+  uint8_t key_material[100];
+  int r;
+  char *mem_op_hex_tmp = NULL;
+
+  (void)arg;
+#define EXPAND(s)                                \
+  r = crypto_expand_key_material_TAP(            \
+    (const uint8_t*)(s), strlen(s),              \
+    key_material, 100)
+
+  /* Test vectors generated with a little python script; feel free to write
+   * your own. */
+  memset(key_material, 0, sizeof(key_material));
+  EXPAND("");
+  tt_int_op(r, ==, 0);
+  test_memeq_hex(key_material,
+                 "5ba93c9db0cff93f52b521d7420e43f6eda2784fbf8b4530d8"
+                 "d246dd74ac53a13471bba17941dff7c4ea21bb365bbeeaf5f2"
+                 "c654883e56d11e43c44e9842926af7ca0a8cca12604f945414"
+                 "f07b01e13da42c6cf1de3abfdea9b95f34687cbbe92b9a7383");
+
+  EXPAND("Tor");
+  tt_int_op(r, ==, 0);
+  test_memeq_hex(key_material,
+                 "776c6214fc647aaa5f683c737ee66ec44f03d0372e1cce6922"
+                 "7950f236ddf1e329a7ce7c227903303f525a8c6662426e8034"
+                 "870642a6dabbd41b5d97ec9bf2312ea729992f48f8ea2d0ba8"
+                 "3f45dfda1a80bdc8b80de01b23e3e0ffae099b3e4ccf28dc28");
+
+  EXPAND("AN ALARMING ITEM TO FIND ON A MONTHLY AUTO-DEBIT NOTICE");
+  tt_int_op(r, ==, 0);
+  test_memeq_hex(key_material,
+                 "a340b5d126086c3ab29c2af4179196dbf95e1c72431419d331"
+                 "4844bf8f6afb6098db952b95581fb6c33625709d6f4400b8e7"
+                 "ace18a70579fad83c0982ef73f89395bcc39493ad53a685854"
+                 "daf2ba9b78733b805d9a6824c907ee1dba5ac27a1e466d4d10");
+
+ done:
+  tor_free(mem_op_hex_tmp);
+
+#undef EXPAND
+}
+
+static void
+test_crypto_hkdf_sha256(void *arg)
+{
+  uint8_t key_material[100];
+  const uint8_t salt[] = "ntor-curve25519-sha256-1:key_extract";
+  const size_t salt_len = strlen((char*)salt);
+  const uint8_t m_expand[] = "ntor-curve25519-sha256-1:key_expand";
+  const size_t m_expand_len = strlen((char*)m_expand);
+  int r;
+  char *mem_op_hex_tmp = NULL;
+
+  (void)arg;
+
+#define EXPAND(s) \
+  r = crypto_expand_key_material_rfc5869_sha256( \
+    (const uint8_t*)(s), strlen(s),              \
+    salt, salt_len,                              \
+    m_expand, m_expand_len,                      \
+    key_material, 100)
+
+  /* Test vectors generated with ntor_ref.py */
+  memset(key_material, 0, sizeof(key_material));
+  EXPAND("");
+  tt_int_op(r, ==, 0);
+  test_memeq_hex(key_material,
+                 "d3490ed48b12a48f9547861583573fe3f19aafe3f81dc7fc75"
+                 "eeed96d741b3290f941576c1f9f0b2d463d1ec7ab2c6bf71cd"
+                 "d7f826c6298c00dbfe6711635d7005f0269493edf6046cc7e7"
+                 "dcf6abe0d20c77cf363e8ffe358927817a3d3e73712cee28d8");
+
+  EXPAND("Tor");
+  tt_int_op(r, ==, 0);
+  test_memeq_hex(key_material,
+                 "5521492a85139a8d9107a2d5c0d9c91610d0f95989975ebee6"
+                 "c02a4f8d622a6cfdf9b7c7edd3832e2760ded1eac309b76f8d"
+                 "66c4a3c4d6225429b3a016e3c3d45911152fc87bc2de9630c3"
+                 "961be9fdb9f93197ea8e5977180801926d3321fa21513e59ac");
+
+  EXPAND("AN ALARMING ITEM TO FIND ON YOUR CREDIT-RATING STATEMENT");
+  tt_int_op(r, ==, 0);
+  test_memeq_hex(key_material,
+                 "a2aa9b50da7e481d30463adb8f233ff06e9571a0ca6ab6df0f"
+                 "b206fa34e5bc78d063fc291501beec53b36e5a0e434561200c"
+                 "5f8bd13e0f88b3459600b4dc21d69363e2895321c06184879d"
+                 "94b18f078411be70b767c7fc40679a9440a0c95ea83a23efbf");
+
+ done:
+  tor_free(mem_op_hex_tmp);
+#undef EXPAND
+}
+
+#ifdef CURVE25519_ENABLED
+static void
+test_crypto_curve25519_impl(void *arg)
+{
+  /* adapted from curve25519_donna, which adapted it from test-curve25519
+     version 20050915, by D. J. Bernstein, Public domain. */
+
+  unsigned char e1k[32];
+  unsigned char e2k[32];
+  unsigned char e1e2k[32];
+  unsigned char e2e1k[32];
+  unsigned char e1[32] = {3};
+  unsigned char e2[32] = {5};
+  unsigned char k[32] = {9};
+  int loop, i;
+  const int loop_max=10000;
+  char *mem_op_hex_tmp = NULL;
+
+  (void)arg;
+
+  for (loop = 0; loop < loop_max; ++loop) {
+    curve25519_impl(e1k,e1,k);
+    curve25519_impl(e2e1k,e2,e1k);
+    curve25519_impl(e2k,e2,k);
+    curve25519_impl(e1e2k,e1,e2k);
+    test_memeq(e1e2k, e2e1k, 32);
+    if (loop == loop_max-1) {
+      break;
+    }
+    for (i = 0;i < 32;++i) e1[i] ^= e2k[i];
+    for (i = 0;i < 32;++i) e2[i] ^= e1k[i];
+    for (i = 0;i < 32;++i) k[i] ^= e1e2k[i];
+  }
+
+  test_memeq_hex(e1,
+                 "4faf81190869fd742a33691b0e0824d5"
+                 "7e0329f4dd2819f5f32d130f1296b500");
+  test_memeq_hex(e2k,
+                 "05aec13f92286f3a781ccae98995a3b9"
+                 "e0544770bc7de853b38f9100489e3e79");
+  test_memeq_hex(e1e2k,
+                 "cd6e8269104eb5aaee886bd2071fba88"
+                 "bd13861475516bc2cd2b6e005e805064");
+
+ done:
+  tor_free(mem_op_hex_tmp);
+}
+
+static void
+test_crypto_curve25519_wrappers(void *arg)
+{
+  curve25519_public_key_t pubkey1, pubkey2;
+  curve25519_secret_key_t seckey1, seckey2;
+
+  uint8_t output1[CURVE25519_OUTPUT_LEN];
+  uint8_t output2[CURVE25519_OUTPUT_LEN];
+  (void)arg;
+
+  /* Test a simple handshake, serializing and deserializing some stuff. */
+  curve25519_secret_key_generate(&seckey1, 0);
+  curve25519_secret_key_generate(&seckey2, 1);
+  curve25519_public_key_generate(&pubkey1, &seckey1);
+  curve25519_public_key_generate(&pubkey2, &seckey2);
+  test_assert(curve25519_public_key_is_ok(&pubkey1));
+  test_assert(curve25519_public_key_is_ok(&pubkey2));
+  curve25519_handshake(output1, &seckey1, &pubkey2);
+  curve25519_handshake(output2, &seckey2, &pubkey1);
+  test_memeq(output1, output2, sizeof(output1));
+
+ done:
+  ;
+}
+#endif
+
 static void *
 pass_data_setup_fn(const struct testcase_t *testcase)
 {
@@ -863,6 +1038,12 @@ struct testcase_t crypto_tests[] = {
   { "aes_iv_AES", test_crypto_aes_iv, TT_FORK, &pass_data, (void*)"aes" },
   { "aes_iv_EVP", test_crypto_aes_iv, TT_FORK, &pass_data, (void*)"evp" },
   CRYPTO_LEGACY(base32_decode),
+  { "kdf_TAP", test_crypto_kdf_TAP, 0, NULL, NULL },
+  { "hkdf_sha256", test_crypto_hkdf_sha256, 0, NULL, NULL },
+#ifdef CURVE25519_ENABLED
+  { "curve25519_impl", test_crypto_curve25519_impl, 0, NULL, NULL },
+  { "curve25519_wrappers", test_crypto_curve25519_wrappers, 0, NULL, NULL },
+#endif
   END_OF_TESTCASES
 };
 

+ 166 - 0
src/test/test_ntor_cl.c

@@ -0,0 +1,166 @@
+/* Copyright (c) 2012, The Tor Project, Inc. */
+/* See LICENSE for licensing information */
+
+#include "orconfig.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#define ONION_NTOR_PRIVATE
+#include "or.h"
+#include "util.h"
+#include "compat.h"
+#include "crypto.h"
+#include "crypto_curve25519.h"
+#include "onion_ntor.h"
+
+#ifndef CURVE25519_ENABLED
+#error "This isn't going to work without curve25519."
+#endif
+
+#define N_ARGS(n) STMT_BEGIN {                                  \
+    if (argc < (n)) {                                           \
+      fprintf(stderr, "%s needs %d arguments.\n",argv[1],n);    \
+      return 1;                                                 \
+    }                                                           \
+  } STMT_END
+#define BASE16(idx, var, n) STMT_BEGIN {                                \
+    const char *s = argv[(idx)];                                        \
+    if (base16_decode((char*)var, n, s, strlen(s)) < 0 ) {              \
+      fprintf(stderr, "couldn't decode argument %d (%s)\n",idx,s);      \
+      return 1;                                                         \
+    }                                                                   \
+  } STMT_END
+#define INT(idx, var) STMT_BEGIN {                                      \
+    var = atoi(argv[(idx)]);                                            \
+    if (var <= 0) {                                                     \
+      fprintf(stderr, "bad integer argument %d (%s)\n",idx,argv[(idx)]); \
+    }                                                                   \
+  } STMT_END
+
+static int
+client1(int argc, char **argv)
+{
+  /* client1 nodeID B -> msg state */
+  curve25519_public_key_t B;
+  uint8_t node_id[DIGEST_LEN];
+  ntor_handshake_state_t *state;
+  uint8_t msg[NTOR_ONIONSKIN_LEN];
+
+  char buf[1024];
+
+  memset(&state, 0, sizeof(state));
+
+  N_ARGS(4);
+  BASE16(2, node_id, DIGEST_LEN);
+  BASE16(3, B.public_key, CURVE25519_PUBKEY_LEN);
+
+  if (onion_skin_ntor_create(node_id, &B, &state, msg)<0) {
+    fprintf(stderr, "handshake failed");
+    return 2;
+  }
+
+  base16_encode(buf, sizeof(buf), (const char*)msg, sizeof(msg));
+  printf("%s\n", buf);
+  base16_encode(buf, sizeof(buf), (void*)state, sizeof(*state));
+  printf("%s\n", buf);
+  ntor_handshake_state_free(state);
+  return 0;
+}
+
+static int
+server1(int argc, char **argv)
+{
+  uint8_t msg_in[NTOR_ONIONSKIN_LEN];
+  curve25519_keypair_t kp;
+  di_digest256_map_t *keymap=NULL;
+  uint8_t node_id[DIGEST_LEN];
+  int keybytes;
+
+  uint8_t msg_out[NTOR_REPLY_LEN];
+  uint8_t *keys;
+  char *hexkeys;
+
+  char buf[256];
+
+  /* server1: b nodeID msg N -> msg keys */
+  N_ARGS(6);
+  BASE16(2, kp.seckey.secret_key, CURVE25519_SECKEY_LEN);
+  BASE16(3, node_id, DIGEST_LEN);
+  BASE16(4, msg_in, NTOR_ONIONSKIN_LEN);
+  INT(5, keybytes);
+
+  curve25519_public_key_generate(&kp.pubkey, &kp.seckey);
+  dimap_add_entry(&keymap, kp.pubkey.public_key, &kp);
+
+  keys = tor_malloc(keybytes);
+  hexkeys = tor_malloc(keybytes*2+1);
+  if (onion_skin_ntor_server_handshake(
+                                msg_in, keymap, NULL, node_id, msg_out, keys,
+                                (size_t)keybytes)<0) {
+    fprintf(stderr, "handshake failed");
+    return 2;
+  }
+
+  base16_encode(buf, sizeof(buf), (const char*)msg_out, sizeof(msg_out));
+  printf("%s\n", buf);
+  base16_encode(hexkeys, keybytes*2+1, (const char*)keys, keybytes);
+  printf("%s\n", hexkeys);
+
+  tor_free(keys);
+  tor_free(hexkeys);
+  return 0;
+}
+
+static int
+client2(int argc, char **argv)
+{
+  struct ntor_handshake_state_t state;
+  uint8_t msg[NTOR_REPLY_LEN];
+  int keybytes;
+  uint8_t *keys;
+  char *hexkeys;
+
+  N_ARGS(5);
+  BASE16(2, (&state), sizeof(state));
+  BASE16(3, msg, sizeof(msg));
+  INT(4, keybytes);
+
+  keys = tor_malloc(keybytes);
+  hexkeys = tor_malloc(keybytes*2+1);
+  if (onion_skin_ntor_client_handshake(&state, msg, keys, keybytes)<0) {
+    fprintf(stderr, "handshake failed");
+    return 2;
+  }
+
+  base16_encode(hexkeys, keybytes*2+1, (const char*)keys, keybytes);
+  printf("%s\n", hexkeys);
+
+  tor_free(keys);
+  tor_free(hexkeys);
+
+  return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+  /*
+    client1: nodeID B -> msg state
+    server1: b nodeID msg N -> msg keys
+    client2: state msg N -> keys
+  */
+  if (argc < 2) {
+    fprintf(stderr, "I need arguments. Read source for more info.\n");
+    return 1;
+  } else if (!strcmp(argv[1], "client1")) {
+    return client1(argc, argv);
+  } else if (!strcmp(argv[1], "server1")) {
+    return server1(argc, argv);
+  } else if (!strcmp(argv[1], "client2")) {
+    return client2(argc, argv);
+  } else {
+    fprintf(stderr, "What's a %s?\n", argv[1]);
+    return 1;
+  }
+}
+

+ 10 - 0
src/test/test_util.c

@@ -2843,6 +2843,16 @@ test_util_di_ops(void)
     test_eq(neq1, !eq1);
   }
 
+  tt_int_op(1, ==, safe_mem_is_zero("", 0));
+  tt_int_op(1, ==, safe_mem_is_zero("", 1));
+  tt_int_op(0, ==, safe_mem_is_zero("a", 1));
+  tt_int_op(0, ==, safe_mem_is_zero("a", 2));
+  tt_int_op(0, ==, safe_mem_is_zero("\0a", 2));
+  tt_int_op(1, ==, safe_mem_is_zero("\0\0a", 2));
+  tt_int_op(1, ==, safe_mem_is_zero("\0\0\0\0\0\0\0\0", 8));
+  tt_int_op(1, ==, safe_mem_is_zero("\0\0\0\0\0\0\0\0a", 8));
+  tt_int_op(0, ==, safe_mem_is_zero("\0\0\0\0\0\0\0\0a", 9));
+
  done:
   ;
 }

+ 2 - 0
src/tools/include.am

@@ -8,12 +8,14 @@ src_tools_tor_resolve_LDADD = src/common/libor.a @TOR_LIB_MATH@ @TOR_LIB_WS32@
 src_tools_tor_gencert_SOURCES = src/tools/tor-gencert.c
 src_tools_tor_gencert_LDFLAGS = @TOR_LDFLAGS_zlib@ @TOR_LDFLAGS_openssl@
 src_tools_tor_gencert_LDADD = src/common/libor.a src/common/libor-crypto.a \
+	$(LIBDONNA) \
         @TOR_LIB_MATH@ @TOR_ZLIB_LIBS@ @TOR_OPENSSL_LIBS@ \
         @TOR_LIB_WS32@ @TOR_LIB_GDI@
 
 src_tools_tor_checkkey_SOURCES = src/tools/tor-checkkey.c
 src_tools_tor_checkkey_LDFLAGS = @TOR_LDFLAGS_zlib@ @TOR_LDFLAGS_openssl@
 src_tools_tor_checkkey_LDADD = src/common/libor.a src/common/libor-crypto.a \
+	$(LIBDONNA) \
         @TOR_LIB_MATH@ @TOR_ZLIB_LIBS@ @TOR_OPENSSL_LIBS@ \
         @TOR_LIB_WS32@ @TOR_LIB_GDI@