Browse Source

Use openssl's counter mode implementation when we have 1.0.0 or later

This shaves about 7% off our per-cell AES crypto time for me; the
effect for accelerated AES crypto should be even more, since the AES
calculation itself will make an even smaller portion of the
counter-mode performance.

(We don't want to do this for pre-1.0.0 OpenSSL, since our AES_CTR
implementation was actually faster than OpenSSL's there, by about
10%.)

Fixes issue #4526.
Nick Mathewson 14 years ago
parent
commit
9814019a54
2 changed files with 72 additions and 24 deletions
  1. 5 3
      changes/aes_hackery
  2. 67 21
      src/common/aes.c

+ 5 - 3
changes/aes_hackery

@@ -4,11 +4,13 @@
       relatively few servers should still be on any version of OpenSSL
       relatively few servers should still be on any version of OpenSSL
       that doesn't have good optimized assembly AES.
       that doesn't have good optimized assembly AES.
 
 
-  o Major features:
+  o Major features (AES performance):
     - Use OpenSSL's EVP interface for AES encryption, so that all
     - Use OpenSSL's EVP interface for AES encryption, so that all
       AES operations can use hardware acceleration (if present).
       AES operations can use hardware acceleration (if present).
       Resolves issue #4442.
       Resolves issue #4442.
     - But only use the EVP interface when AES acceleration is enabled,
     - But only use the EVP interface when AES acceleration is enabled,
       to avoid a performance regression.  Resolves issue #4525.
       to avoid a performance regression.  Resolves issue #4525.
-
-
+    - When using OpenSSL 1.0.0 or later, use OpenSSL's counter mode
+      implementation; it makes AES_CTR about 7% faster than our old one
+      (which was about 10% faster than the one OpenSSL used to provide).
+      Resolves issue #4526.

+ 67 - 21
src/common/aes.c

@@ -17,6 +17,11 @@
 #include <openssl/aes.h>
 #include <openssl/aes.h>
 #include <openssl/evp.h>
 #include <openssl/evp.h>
 #include <openssl/engine.h>
 #include <openssl/engine.h>
+#if OPENSSL_VERSION_NUMBER >= 0x10000000L
+/* See comments about which counter mode implementation to use below. */
+#include <openssl/modes.h>
+#define USE_OPENSSL_CTR
+#endif
 #include "compat.h"
 #include "compat.h"
 #include "aes.h"
 #include "aes.h"
 #include "util.h"
 #include "util.h"
@@ -35,7 +40,13 @@
  * faster than indirecting through the EVP layer.
  * faster than indirecting through the EVP layer.
  */
  */
 
 
-/* Include OpenSSL headers as needed. */
+/* We have 2 strategies for counter mode: use our own, or use OpenSSL's.
+ *
+ * Here we have a counter mode that's faster than the one shipping with
+ * OpenSSL pre-1.0 (by about 10%!).  But OpenSSL 1.0.0 added a counter mode
+ * implementation faster than the one here (by about 7%).  So we pick which
+ * one to used based on the Openssl version above.
+ */
 
 
 /*======================================================================*/
 /*======================================================================*/
 /* Interface to AES code, and counter implementation */
 /* Interface to AES code, and counter implementation */
@@ -48,7 +59,7 @@ struct aes_cnt_cipher {
     AES_KEY aes;
     AES_KEY aes;
   } key;
   } key;
 
 
-#if !defined(WORDS_BIGENDIAN)
+#if !defined(WORDS_BIGENDIAN) && !defined(USE_OPENSSL_CTR)
 #define USING_COUNTER_VARS
 #define USING_COUNTER_VARS
   /** These four values, together, implement a 128-bit counter, with
   /** These four values, together, implement a 128-bit counter, with
    * counter0 as the low-order word and counter3 as the high-order word. */
    * counter0 as the low-order word and counter3 as the high-order word. */
@@ -70,7 +81,11 @@ struct aes_cnt_cipher {
   /** The encrypted value of ctr_buf. */
   /** The encrypted value of ctr_buf. */
   uint8_t buf[16];
   uint8_t buf[16];
   /** Our current stream position within buf. */
   /** Our current stream position within buf. */
+#ifdef USE_OPENSSL_CTR
+  unsigned int pos;
+#else
   uint8_t pos;
   uint8_t pos;
+#endif
 
 
   /** True iff we're using the evp implementation of this cipher. */
   /** True iff we're using the evp implementation of this cipher. */
   uint8_t using_evp;
   uint8_t using_evp;
@@ -110,6 +125,7 @@ evaluate_evp_for_aes(int force_val)
   return 0;
   return 0;
 }
 }
 
 
+#ifndef USE_OPENSSL_CTR
 #if !defined(USING_COUNTER_VARS)
 #if !defined(USING_COUNTER_VARS)
 #define COUNTER(c, n) ((c)->ctr_buf.buf32[3-(n)])
 #define COUNTER(c, n) ((c)->ctr_buf.buf32[3-(n)])
 #else
 #else
@@ -138,6 +154,7 @@ _aes_fill_buf(aes_cnt_cipher_t *cipher)
     AES_encrypt(cipher->ctr_buf.buf, cipher->buf, &cipher->key.aes);
     AES_encrypt(cipher->ctr_buf.buf, cipher->buf, &cipher->key.aes);
   }
   }
 }
 }
+#endif
 
 
 /**
 /**
  * Return a newly allocated counter-mode AES128 cipher implementation.
  * Return a newly allocated counter-mode AES128 cipher implementation.
@@ -171,6 +188,7 @@ aes_set_key(aes_cnt_cipher_t *cipher, const char *key, int key_bits)
     AES_set_encrypt_key((const unsigned char *)key, key_bits, &cipher->key.aes);
     AES_set_encrypt_key((const unsigned char *)key, key_bits, &cipher->key.aes);
     cipher->using_evp = 0;
     cipher->using_evp = 0;
   }
   }
+
 #ifdef USING_COUNTER_VARS
 #ifdef USING_COUNTER_VARS
   cipher->counter0 = 0;
   cipher->counter0 = 0;
   cipher->counter1 = 0;
   cipher->counter1 = 0;
@@ -181,7 +199,12 @@ aes_set_key(aes_cnt_cipher_t *cipher, const char *key, int key_bits)
   memset(cipher->ctr_buf.buf, 0, sizeof(cipher->ctr_buf.buf));
   memset(cipher->ctr_buf.buf, 0, sizeof(cipher->ctr_buf.buf));
 
 
   cipher->pos = 0;
   cipher->pos = 0;
+
+#ifdef USE_OPENSSL_CTR
+  memset(cipher->buf, 0, sizeof(cipher->buf));
+#else
   _aes_fill_buf(cipher);
   _aes_fill_buf(cipher);
+#endif
 }
 }
 
 
 /** Release storage held by <b>cipher</b>
 /** Release storage held by <b>cipher</b>
@@ -206,6 +229,18 @@ aes_free_cipher(aes_cnt_cipher_t *cipher)
 #define UPDATE_CTR_BUF(c, n)
 #define UPDATE_CTR_BUF(c, n)
 #endif
 #endif
 
 
+#ifdef USE_OPENSSL_CTR
+/* Helper function to use EVP with openssl's counter-mode wrapper. */
+static void evp_block128_fn(const uint8_t in[16],
+                            uint8_t out[16],
+                            const void *key)
+{
+  EVP_CIPHER_CTX *ctx = (void*)key;
+  int inl=16, outl=16;
+  EVP_EncryptUpdate(ctx, out, &outl, in, inl);
+}
+#endif
+
 /** Encrypt <b>len</b> bytes from <b>input</b>, storing the result in
 /** Encrypt <b>len</b> bytes from <b>input</b>, storing the result in
  * <b>output</b>.  Uses the key in <b>cipher</b>, and advances the counter
  * <b>output</b>.  Uses the key in <b>cipher</b>, and advances the counter
  * by <b>len</b> bytes as it encrypts.
  * by <b>len</b> bytes as it encrypts.
@@ -214,20 +249,29 @@ void
 aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len,
 aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len,
           char *output)
           char *output)
 {
 {
-  /* This function alone is up to 5% of our runtime in some profiles; anything
-   * we could do to make it faster would be great.
-   *
-   * Experimenting suggests that unrolling the inner loop into a switch
-   * statement doesn't help.  What does seem to help is making the input and
-   * output buffers word aligned, and never crypting anything besides an
-   * integer number of words at a time -- it shaves maybe 4-5% of the per-byte
-   * encryption time measured by bench_aes. We can't do that with the current
-   * Tor protocol, though: Tor really likes to crypt things in 509-byte
-   * chunks.
-   *
-   * If we were really ambitous, we'd force len to be a multiple of the block
-   * size, and shave maybe another 4-5% off.
-   */
+#ifdef USE_OPENSSL_CTR
+  if (cipher->using_evp) {
+    /* In openssl 1.0.0, there's an if'd out EVP_aes_128_ctr in evp.h.  If
+     * it weren't disabled, it might be better just to use that.
+     */
+    CRYPTO_ctr128_encrypt((const unsigned char *)input,
+                          (unsigned char *)output,
+                          len,
+                          &cipher->key.evp,
+                          cipher->ctr_buf.buf,
+                          cipher->buf,
+                          &cipher->pos,
+                          evp_block128_fn);
+  } else {
+    AES_ctr128_encrypt((const unsigned char *)input,
+                       (unsigned char *)output,
+                       len,
+                       &cipher->key.aes,
+                       cipher->ctr_buf.buf,
+                       cipher->buf,
+                       &cipher->pos);
+  }
+#else
   int c = cipher->pos;
   int c = cipher->pos;
   if (PREDICT_UNLIKELY(!len)) return;
   if (PREDICT_UNLIKELY(!len)) return;
 
 
@@ -250,6 +294,7 @@ aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len,
     UPDATE_CTR_BUF(cipher, 0);
     UPDATE_CTR_BUF(cipher, 0);
     _aes_fill_buf(cipher);
     _aes_fill_buf(cipher);
   }
   }
+#endif
 }
 }
 
 
 /** Encrypt <b>len</b> bytes from <b>input</b>, storing the results in place.
 /** Encrypt <b>len</b> bytes from <b>input</b>, storing the results in place.
@@ -259,11 +304,9 @@ aes_crypt(aes_cnt_cipher_t *cipher, const char *input, size_t len,
 void
 void
 aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len)
 aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len)
 {
 {
-
-  /* XXXX This function is up to 5% of our runtime in some profiles;
-   * we should look into unrolling some of the loops; taking advantage
-   * of alignment, using a bigger buffer, and so on. Not till after 0.1.2.x,
-   * though. */
+#ifdef USE_OPENSSL_CTR
+  aes_crypt(cipher, data, len, data);
+#else
   int c = cipher->pos;
   int c = cipher->pos;
   if (PREDICT_UNLIKELY(!len)) return;
   if (PREDICT_UNLIKELY(!len)) return;
 
 
@@ -286,6 +329,7 @@ aes_crypt_inplace(aes_cnt_cipher_t *cipher, char *data, size_t len)
     UPDATE_CTR_BUF(cipher, 0);
     UPDATE_CTR_BUF(cipher, 0);
     _aes_fill_buf(cipher);
     _aes_fill_buf(cipher);
   }
   }
+#endif
 }
 }
 
 
 /** Reset the 128-bit counter of <b>cipher</b> to the 16-bit big-endian value
 /** Reset the 128-bit counter of <b>cipher</b> to the 16-bit big-endian value
@@ -302,6 +346,8 @@ aes_set_iv(aes_cnt_cipher_t *cipher, const char *iv)
   cipher->pos = 0;
   cipher->pos = 0;
   memcpy(cipher->ctr_buf.buf, iv, 16);
   memcpy(cipher->ctr_buf.buf, iv, 16);
 
 
+#ifndef USE_OPENSSL_CTR
   _aes_fill_buf(cipher);
   _aes_fill_buf(cipher);
+#endif
 }
 }