Browse Source

Improve keccak-tiny performance by 15% on LE intel

The 64-bit load and store code was generating pretty bad output with
my compiler, so I extracted the code from csiphash and used that instead.

Close ticket 21737
Nick Mathewson 7 years ago
parent
commit
9014dc111a
5 changed files with 79 additions and 48 deletions
  1. 4 0
      changes/faster-keccak
  2. 67 0
      src/ext/byteorder.h
  3. 1 35
      src/ext/csiphash.c
  4. 1 0
      src/ext/include.am
  5. 6 13
      src/ext/keccak-tiny/keccak-tiny-unrolled.c

+ 4 - 0
changes/faster-keccak

@@ -0,0 +1,4 @@
+  o Minor features (performance):
+    - The minimal keccak implementation we include now accesses memory
+      more efficiently, especially on little-endian systems.
+      Closes ticket 21737.

+ 67 - 0
src/ext/byteorder.h

@@ -0,0 +1,67 @@
+/* <MIT License>
+ Copyright (c) 2013-2014  Marek Majkowski <marek@popcount.org>
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+ </MIT License>
+
+ Original location:
+    https://github.com/majek/csiphash/
+
+ Solution inspired by code from:
+    Samuel Neves (supercop/crypto_auth/siphash24/little)
+    djb (supercop/crypto_auth/siphash24/little2)
+    Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
+*/
+
+/* This code is extracted from csiphash.h */
+
+#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) &&      \
+	__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#  define _le64toh(x) ((uint64_t)(x))
+#elif defined(_WIN32)
+/* Windows is always little endian, unless you're on xbox360
+   http://msdn.microsoft.com/en-us/library/b0084kay(v=vs.80).aspx */
+#  define _le64toh(x) ((uint64_t)(x))
+#elif defined(__APPLE__)
+#  include <libkern/OSByteOrder.h>
+#  define _le64toh(x) OSSwapLittleToHostInt64(x)
+#elif defined(sun) || defined(__sun)
+#  include <sys/byteorder.h>
+#  define _le64toh(x) LE_64(x)
+
+#else
+
+/* See: http://sourceforge.net/p/predef/wiki/Endianness/ */
+#  if defined(__FreeBSD__) || defined(__NetBSD__) || defined(OpenBSD)
+#    include <sys/endian.h>
+#  else
+#    include <endian.h>
+#  endif
+#  if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
+	__BYTE_ORDER == __LITTLE_ENDIAN
+#    define _le64toh(x) ((uint64_t)(x))
+#  else
+#    if defined(OpenBSD)
+#      define _le64toh(x) letoh64(x)
+#    else
+#      define _le64toh(x) le64toh(x)
+#    endif
+#  endif
+
+#endif

+ 1 - 35
src/ext/csiphash.c

@@ -35,41 +35,7 @@
 #include "util.h"
 /* for memcpy */
 #include <string.h>
-
-#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
-	__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
-#  define _le64toh(x) ((uint64_t)(x))
-#elif defined(_WIN32)
-/* Windows is always little endian, unless you're on xbox360
-   http://msdn.microsoft.com/en-us/library/b0084kay(v=vs.80).aspx */
-#  define _le64toh(x) ((uint64_t)(x))
-#elif defined(__APPLE__)
-#  include <libkern/OSByteOrder.h>
-#  define _le64toh(x) OSSwapLittleToHostInt64(x)
-#elif defined(sun) || defined(__sun)
-#  include <sys/byteorder.h>
-#  define _le64toh(x) LE_64(x)
-
-#else
-
-/* See: http://sourceforge.net/p/predef/wiki/Endianness/ */
-#  if defined(__FreeBSD__) || defined(__NetBSD__) || defined(OpenBSD)
-#    include <sys/endian.h>
-#  else
-#    include <endian.h>
-#  endif
-#  if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
-	__BYTE_ORDER == __LITTLE_ENDIAN
-#    define _le64toh(x) ((uint64_t)(x))
-#  else
-#    if defined(OpenBSD)
-#      define _le64toh(x) letoh64(x)
-#    else
-#      define _le64toh(x) le64toh(x)
-#    endif
-#  endif
-
-#endif
+#include "byteorder.h"
 
 #define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) )
 

+ 1 - 0
src/ext/include.am

@@ -5,6 +5,7 @@ EXTRA_DIST += src/ext/README
 
 EXTHEADERS = \
   src/ext/ht.h		\
+  src/ext/byteorder.h   \
   src/ext/tinytest.h	\
   src/ext/tor_readpassphrase.h \
   src/ext/strlcat.c	\

+ 6 - 13
src/ext/keccak-tiny/keccak-tiny-unrolled.c

@@ -10,28 +10,21 @@
 
 #include <string.h>
 #include "crypto.h"
+#include "byteorder.h"
 
 /******** Endianness conversion helpers ********/
 
 static inline uint64_t
 loadu64le(const unsigned char *x) {
-  uint64_t r = 0;
-  size_t i;
-
-  for (i = 0; i < 8; ++i) {
-    r |= (uint64_t)x[i] << 8 * i;
-  }
-  return r;
+  uint64_t r;
+  memcpy(&r, x, sizeof(r));
+  return _le64toh(r);
 }
 
 static inline void
 storeu64le(uint8_t *x, uint64_t u) {
-  size_t i;
-
-  for(i=0; i<8; ++i) {
-    x[i] = u;
-    u >>= 8;
-  }
+  uint64_t val = _le64toh(u);
+  memcpy(x, &val, sizeof(u));
 }
 
 /******** The Keccak-f[1600] permutation ********/