Browse Source

Merge branch 'more_aes_hackery_rebased'

Conflicts:
	changes/aes_hackery
Nick Mathewson 14 years ago
parent
commit
25c9e3aab9
5 changed files with 186 additions and 70 deletions
  1. 10 0
      changes/aes_hackery
  2. 142 66
      src/common/aes.c
  3. 2 0
      src/common/aes.h
  4. 3 0
      src/common/crypto.c
  5. 29 4
      src/test/test_crypto.c

+ 10 - 0
changes/aes_hackery

@@ -0,0 +1,10 @@
+  o Major features (AES performance):
+    - When using OpenSSL 1.0.0 or later, use OpenSSL's counter mode
+      implementation; it makes AES_CTR about 7% faster than our old one
+      (which was about 10% faster than the one OpenSSL used to provide).
+      Resolves issue #4526.
+
+  o Major bugfixes (AES performance):
+    - Only use the EVP interface when AES acceleration is enabled,
+      to avoid a 5-7% performance regression.  Resolves issue #4525,
+      bugfix on 0.2.3.8-alpha.

+ 142 - 66
src/common/aes.c

@@ -14,34 +14,39 @@
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
+#include <openssl/aes.h>
+#include <openssl/evp.h>
+#include <openssl/engine.h>
+#if OPENSSL_VERSION_NUMBER >= 0x10000000L
+/* See comments about which counter mode implementation to use below. */
+#include <openssl/modes.h>
+#define USE_OPENSSL_CTR
+#endif
 #include "compat.h"
 #include "aes.h"
 #include "util.h"
 #include "torlog.h"
 
-/* We have 2 strategies for getting AES: Via OpenSSL's AES_encrypt function,
- * via OpenSSL's EVP_EncryptUpdate function. */
-
-/** Defined iff we're using OpenSSL's AES functions for AES. */
-#undef USE_OPENSSL_AES
-/** Defined iff we're using OpenSSL's EVP code for AES. */
-#undef USE_OPENSSL_EVP
-
-/* Here we pick which to use, if none is force-defined above */
-#if (!defined(USE_OPENSSL_AES) && \
-     !defined(USE_OPENSSL_EVP))
-
-#define USE_OPENSSL_EVP
-
+#ifdef ANDROID
+/* Android's OpenSSL seems to have removed all of its Engine support. */
+#define DISABLE_ENGINES
 #endif
 
-/* Include OpenSSL headers as needed. */
-#ifdef USE_OPENSSL_AES
-# include <openssl/aes.h>
-#endif
-#ifdef USE_OPENSSL_EVP
-# include <openssl/evp.h>
-#endif
+/* We have 2 strategies for getting AES: Via OpenSSL's AES_encrypt function,
+ * via OpenSSL's EVP_EncryptUpdate function.
+ *
+ * If there's any hardware acceleration in play, we want to be using EVP_* so
+ * we can get it.  Otherwise, we'll want AES_*, which seems to be about 5%
+ * faster than indirecting through the EVP layer.
+ */
+
+/* We have 2 strategies for counter mode: use our own, or use OpenSSL's.
+ *
+ * Here we have a counter mode that's faster than the one shipping with
+ * OpenSSL pre-1.0 (by about 10%!).  But OpenSSL 1.0.0 added a counter mode
+ * implementation faster than the one here (by about 7%).  So we pick which
+ * one to used based on the Openssl version above.
+ */
 
 /*======================================================================*/
 /* Interface to AES code, and counter implementation */
@@ -49,13 +54,12 @@
 /** Implements an AES counter-mode cipher. */
 struct aes_cnt_cipher {
 /** This next element (however it's defined) is the AES key. */
-#if defined(USE_OPENSSL_EVP)
-  EVP_CIPHER_CTX key;
-#elif defined(USE_OPENSSL_AES)
-  AES_KEY key;
-#endif
+  union {
+    EVP_CIPHER_CTX evp;
+    AES_KEY aes;
+  } key;
 
-#if !defined(WORDS_BIGENDIAN)
+#if !defined(WORDS_BIGENDIAN) && !defined(USE_OPENSSL_CTR)
 #define USING_COUNTER_VARS
   /** These four values, together, implement a 128-bit counter, with
    * counter0 as the low-order word and counter3 as the high-order word. */
@@ -77,9 +81,51 @@ struct aes_cnt_cipher {
   /** The encrypted value of ctr_buf. */
   uint8_t buf[16];
   /** Our current stream position within buf. */
+#ifdef USE_OPENSSL_CTR
+  unsigned int pos;
+#else
   uint8_t pos;
+#endif
+
+  /** True iff we're using the evp implementation of this cipher. */
+  uint8_t using_evp;
 };
 
+/** True if we should prefer the EVP implementation for AES, either because
+ * we're testing it or because we have hardware acceleration configured */
+static int should_use_EVP = 0;
+
+/** Check whether we should use the EVP interface for AES. If <b>force_val</b>
+ * is nonnegative, we use use EVP iff it is true.  Otherwise, we use EVP
+ * if there is an engine enabled for aes-ecb. */
+int
+evaluate_evp_for_aes(int force_val)
+{
+  ENGINE *e;
+
+  if (force_val >= 0) {
+    should_use_EVP = force_val;
+    return 0;
+  }
+#ifdef DISABLE_ENGINES
+  should_use_EVP = 0;
+#else
+  e = ENGINE_get_cipher_engine(NID_aes_128_ecb);
+
+  if (e) {
+    log_notice(LD_CRYPTO, "AES engine \"%s\" found; using EVP_* functions.",
+               ENGINE_get_name(e));
+    should_use_EVP = 1;
+  } else {
+    log_notice(LD_CRYPTO, "No AES engine found; using AES_* functions.");
+    should_use_EVP = 0;
+  }
+#endif
+
+  return 0;
+}
+
+#ifndef USE_OPENSSL_CTR
 #if !defined(USING_COUNTER_VARS)
 #define COUNTER(c, n) ((c)->ctr_buf.buf32[3-(n)])
 #else
@@ -100,16 +146,15 @@ _aes_fill_buf(aes_cnt_cipher_t *cipher)
    * None of these issues are insurmountable in principle.
    */
 
-#if defined(USE_OPENSSL_EVP)
-  {
+  if (cipher->using_evp) {
     int outl=16, inl=16;
-    EVP_EncryptUpdate(&cipher->key, cipher->buf, &outl,
+    EVP_EncryptUpdate(&cipher->key.evp, cipher->buf, &outl,
                       cipher->ctr_buf.buf, inl);
+  } else {
+    AES_encrypt(cipher->ctr_buf.buf, cipher->buf, &cipher->key.aes);
   }
-#elif defined(USE_OPENSSL_AES)
-  AES_encrypt(cipher->ctr_buf.buf, cipher->buf, &cipher->key);
-#endif
 }
+#endif
 
 /**
  * Return a newly allocated counter-mode AES128 cipher implementation.
@@ -129,18 +174,21 @@ aes_new_cipher(void)
 void
 aes_set_key(aes_cnt_cipher_t *cipher, const char *key, int key_bits)
 {
-#if defined(USE_OPENSSL_EVP)
-  const EVP_CIPHER *c;
-  switch (key_bits) {
-    case 128: c = EVP_aes_128_ecb(); break;
-    case 192: c = EVP_aes_192_ecb(); break;
-    case 256: c = EVP_aes_256_ecb(); break;
-    default: tor_assert(0);
+  if (should_use_EVP) {
+    const EVP_CIPHER *c;
+    switch (key_bits) {
+      case 128: c = EVP_aes_128_ecb(); break;
+      case 192: c = EVP_aes_192_ecb(); break;
+      case 256: c = EVP_aes_256_ecb(); break;
+      default: tor_assert(0);
+    }
+    EVP_EncryptInit(&cipher->key.evp, c, (const unsigned char*)key, NULL);
+    cipher->using_evp = 1;
+  } else {
+    AES_set_encrypt_key((const unsigned char *)key, key_bits, &cipher->key.aes);
+    cipher->using_evp = 0;
   }
-  EVP_EncryptInit(&cipher->key, c, (const unsigned char*)key, NULL);
-#elif defined(USE_OPENSSL_AES)
-  AES_set_encrypt_key((const unsigned char *)key, key_bits, &(cipher->key));
-#endif
+
 #ifdef USING_COUNTER_VARS
   cipher->counter0 = 0;
   cipher->counter1 = 0;
@@ -151,7 +199,12 @@ aes_set_key(aes_cnt_cipher_t *cipher, const char *key, int key_bits)
   memset(cipher->ctr_buf.buf, 0, sizeof(cipher->ctr_buf.buf));
 
   cipher->pos = 0;
+
+#ifdef USE_OPENSSL_CTR
+  memset(cipher->buf, 0, sizeof(cipher->buf));
+#else
   _aes_fill_buf(cipher);
+#endif
 }
 
 /** Release storage held by <b>cipher</b>
@@ -161,9 +214,9 @@ aes_free_cipher(aes_cnt_cipher_t *cipher)
 {
   if (!cipher)
     return;
-#ifdef USE_OPENSSL_EVP
-  EVP_CIPHER_CTX_cleanup(&cipher->key);
-#endif
+  if (cipher->using_evp) {
+    EVP_CIPHER_CTX_cleanup(&cipher->key.evp);
+  }
   memset(cipher, 0, sizeof(aes_cnt_cipher_t));
   tor_free(cipher);
 }
@@ -176,6 +229,18 @@ aes_free_cipher(aes_cnt_cipher_t *cipher)
 #define UPDATE_CTR_BUF(c, n)
 #endif
 
+#ifdef USE_OPENSSL_CTR
+/* Helper function to use EVP with openssl's counter-mode wrapper. */
+static void evp_block128_fn(const uint8_t in[16],
+                            uint8_t out[16],
+                            const void *key)
+{
+  EVP_CIPHER_CTX *ctx = (void*)key;
+  int inl=16, outl=16;
+  EVP_EncryptUpdate(ctx, out, &outl, in, inl);
+}
+#endif
+
 /** Encrypt <b>len</b> bytes from <b>input</b>, storing the result in
  * <b>output</b>.  Uses the key in <b>cipher</b>, and advances the counter
  * by <b>len</b> bytes as it encrypts.
@@ -184,20 +249,29 @@ void
 aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len,
           char *output)
 {
-  /* This function alone is up to 5% of our runtime in some profiles; anything
-   * we could do to make it faster would be great.
-   *
-   * Experimenting suggests that unrolling the inner loop into a switch
-   * statement doesn't help.  What does seem to help is making the input and
-   * output buffers word aligned, and never crypting anything besides an
-   * integer number of words at a time -- it shaves maybe 4-5% of the per-byte
-   * encryption time measured by bench_aes. We can't do that with the current
-   * Tor protocol, though: Tor really likes to crypt things in 509-byte
-   * chunks.
-   *
-   * If we were really ambitous, we'd force len to be a multiple of the block
-   * size, and shave maybe another 4-5% off.
-   */
+#ifdef USE_OPENSSL_CTR
+  if (cipher->using_evp) {
+    /* In openssl 1.0.0, there's an if'd out EVP_aes_128_ctr in evp.h.  If
+     * it weren't disabled, it might be better just to use that.
+     */
+    CRYPTO_ctr128_encrypt((const unsigned char *)input,
+                          (unsigned char *)output,
+                          len,
+                          &cipher->key.evp,
+                          cipher->ctr_buf.buf,
+                          cipher->buf,
+                          &cipher->pos,
+                          evp_block128_fn);
+  } else {
+    AES_ctr128_encrypt((const unsigned char *)input,
+                       (unsigned char *)output,
+                       len,
+                       &cipher->key.aes,
+                       cipher->ctr_buf.buf,
+                       cipher->buf,
+                       &cipher->pos);
+  }
+#else
   int c = cipher->pos;
   if (PREDICT_UNLIKELY(!len)) return;
 
@@ -220,6 +294,7 @@ aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len,
     UPDATE_CTR_BUF(cipher, 0);
     _aes_fill_buf(cipher);
   }
+#endif
 }
 
 /** Encrypt <b>len</b> bytes from <b>input</b>, storing the results in place.
@@ -229,11 +304,9 @@ aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len,
 void
 aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len)
 {
-
-  /* XXXX This function is up to 5% of our runtime in some profiles;
-   * we should look into unrolling some of the loops; taking advantage
-   * of alignment, using a bigger buffer, and so on. Not till after 0.1.2.x,
-   * though. */
+#ifdef USE_OPENSSL_CTR
+  aes_crypt(cipher, data, len, data);
+#else
   int c = cipher->pos;
   if (PREDICT_UNLIKELY(!len)) return;
 
@@ -256,6 +329,7 @@ aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len)
     UPDATE_CTR_BUF(cipher, 0);
     _aes_fill_buf(cipher);
   }
+#endif
 }
 
 /** Reset the 128-bit counter of <b>cipher</b> to the 16-bit big-endian value
@@ -272,6 +346,8 @@ aes_set_iv(aes_cnt_cipher_t *cipher, const char *iv)
   cipher->pos = 0;
   memcpy(cipher->ctr_buf.buf, iv, 16);
 
+#ifndef USE_OPENSSL_CTR
   _aes_fill_buf(cipher);
+#endif
 }
 

+ 2 - 0
src/common/aes.h

@@ -24,5 +24,7 @@ void aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len,
 void aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len);
 void aes_set_iv(aes_cnt_cipher_t *cipher, const char *iv);
 
+int evaluate_evp_for_aes(int force_value);
+
 #endif
 

+ 3 - 0
src/common/crypto.c

@@ -276,6 +276,9 @@ crypto_global_init(int useAccel, const char *accelName, const char *accelDir)
     } else {
       log_info(LD_CRYPTO, "NOT using OpenSSL engine support.");
     }
+
+    evaluate_evp_for_aes(-1);
+
     return crypto_seed_rng(1);
   }
   return 0;

+ 29 - 4
src/test/test_crypto.c

@@ -7,6 +7,7 @@
 #define CRYPTO_PRIVATE
 #include "or.h"
 #include "test.h"
+#include "aes.h"
 
 /** Run unit tests for Diffie-Hellman functionality. */
 static void
@@ -95,13 +96,16 @@ test_crypto_rng(void)
 
 /** Run unit tests for our AES functionality */
 static void
-test_crypto_aes(void)
+test_crypto_aes(void *arg)
 {
   char *data1 = NULL, *data2 = NULL, *data3 = NULL;
   crypto_cipher_env_t *env1 = NULL, *env2 = NULL;
   int i, j;
   char *mem_op_hex_tmp=NULL;
 
+  int use_evp = !strcmp(arg,"evp");
+  evaluate_evp_for_aes(use_evp);
+
   data1 = tor_malloc(1024);
   data2 = tor_malloc(1024);
   data3 = tor_malloc(1024);
@@ -670,7 +674,7 @@ test_crypto_s2k(void)
 
 /** Test AES-CTR encryption and decryption with IV. */
 static void
-test_crypto_aes_iv(void)
+test_crypto_aes_iv(void *arg)
 {
   crypto_cipher_env_t *cipher;
   char *plain, *encrypted1, *encrypted2, *decrypted1, *decrypted2;
@@ -678,6 +682,9 @@ test_crypto_aes_iv(void)
   char key1[16], key2[16];
   ssize_t encrypted_size, decrypted_size;
 
+  int use_evp = !strcmp(arg,"evp");
+  evaluate_evp_for_aes(use_evp);
+
   plain = tor_malloc(4095);
   encrypted1 = tor_malloc(4095 + 1 + 16);
   encrypted2 = tor_malloc(4095 + 1 + 16);
@@ -851,18 +858,36 @@ test_crypto_base32_decode(void)
   ;
 }
 
+static void *
+pass_data_setup_fn(const struct testcase_t *testcase)
+{
+  return testcase->setup_data;
+}
+static int
+pass_data_cleanup_fn(const struct testcase_t *testcase, void *ptr)
+{
+  (void)ptr;
+  (void)testcase;
+  return 1;
+}
+static const struct testcase_setup_t pass_data = {
+  pass_data_setup_fn, pass_data_cleanup_fn
+};
+
 #define CRYPTO_LEGACY(name)                                            \
   { #name, legacy_test_helper, 0, &legacy_setup, test_crypto_ ## name }
 
 struct testcase_t crypto_tests[] = {
   CRYPTO_LEGACY(formats),
   CRYPTO_LEGACY(rng),
-  CRYPTO_LEGACY(aes),
+  { "aes_AES", test_crypto_aes, TT_FORK, &pass_data, (void*)"aes" },
+  { "aes_EVP", test_crypto_aes, TT_FORK, &pass_data, (void*)"evp" },
   CRYPTO_LEGACY(sha),
   CRYPTO_LEGACY(pk),
   CRYPTO_LEGACY(dh),
   CRYPTO_LEGACY(s2k),
-  CRYPTO_LEGACY(aes_iv),
+  { "aes_iv_AES", test_crypto_aes_iv, TT_FORK, &pass_data, (void*)"aes" },
+  { "aes_iv_EVP", test_crypto_aes_iv, TT_FORK, &pass_data, (void*)"evp" },
   CRYPTO_LEGACY(base32_decode),
   END_OF_TESTCASES
 };