ed25519-donna-portable.h 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. #include "ed25519-donna-portable-identify.h"
  2. #define mul32x32_64(a,b) (((uint64_t)(a))*(b))
  3. /* platform */
  4. #if defined(COMPILER_MSVC)
  5. #include <intrin.h>
  6. #if !defined(_DEBUG)
  7. #undef mul32x32_64
  8. #define mul32x32_64(a,b) __emulu(a,b)
  9. #endif
  10. #undef inline
  11. #define inline __forceinline
  12. #define DONNA_INLINE __forceinline
  13. #define DONNA_NOINLINE __declspec(noinline)
  14. #define ALIGN(x) __declspec(align(x))
  15. #define ROTL32(a,b) _rotl(a,b)
  16. #define ROTR32(a,b) _rotr(a,b)
  17. #else
  18. #include <sys/param.h>
  19. #define DONNA_INLINE inline __attribute__((always_inline))
  20. #define DONNA_NOINLINE __attribute__((noinline))
  21. /* Tor: OSX pollutes the global namespace with an ALIGN macro. */
  22. #undef ALIGN
  23. #define ALIGN(x) __attribute__((aligned(x)))
  24. #define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b)))
  25. #define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b)))
  26. #endif
  27. /* uint128_t */
  28. #if defined(CPU_64BITS) && !defined(ED25519_FORCE_32BIT)
  29. #if defined(COMPILER_CLANG) && (COMPILER_CLANG >= 30100)
  30. #define HAVE_NATIVE_UINT128
  31. typedef unsigned __int128 uint128_t;
  32. #elif defined(COMPILER_MSVC)
  33. #define HAVE_UINT128
  34. typedef struct uint128_t {
  35. uint64_t lo, hi;
  36. } uint128_t;
  37. #define mul64x64_128(out,a,b) out.lo = _umul128(a,b,&out.hi);
  38. #define shr128_pair(out,hi,lo,shift) out = __shiftright128(lo, hi, shift);
  39. #define shl128_pair(out,hi,lo,shift) out = __shiftleft128(lo, hi, shift);
  40. #define shr128(out,in,shift) shr128_pair(out, in.hi, in.lo, shift)
  41. #define shl128(out,in,shift) shl128_pair(out, in.hi, in.lo, shift)
  42. #define add128(a,b) { uint64_t p = a.lo; a.lo += b.lo; a.hi += b.hi + (a.lo < p); }
  43. #define add128_64(a,b) { uint64_t p = a.lo; a.lo += b; a.hi += (a.lo < p); }
  44. #define lo128(a) (a.lo)
  45. #define hi128(a) (a.hi)
  46. #elif defined(COMPILER_GCC) && !defined(HAVE_NATIVE_UINT128)
  47. #if defined(__SIZEOF_INT128__)
  48. #define HAVE_NATIVE_UINT128
  49. typedef unsigned __int128 uint128_t;
  50. #elif (COMPILER_GCC >= 40400)
  51. #define HAVE_NATIVE_UINT128
  52. typedef unsigned uint128_t __attribute__((mode(TI)));
  53. #elif defined(CPU_X86_64)
  54. #define HAVE_UINT128
  55. typedef struct uint128_t {
  56. uint64_t lo, hi;
  57. } uint128_t;
  58. #define mul64x64_128(out,a,b) __asm__ ("mulq %3" : "=a" (out.lo), "=d" (out.hi) : "a" (a), "rm" (b));
  59. #define shr128_pair(out,hi,lo,shift) __asm__ ("shrdq %2,%1,%0" : "+r" (lo) : "r" (hi), "J" (shift)); out = lo;
  60. #define shl128_pair(out,hi,lo,shift) __asm__ ("shldq %2,%1,%0" : "+r" (hi) : "r" (lo), "J" (shift)); out = hi;
  61. #define shr128(out,in,shift) shr128_pair(out,in.hi, in.lo, shift)
  62. #define shl128(out,in,shift) shl128_pair(out,in.hi, in.lo, shift)
  63. #define add128(a,b) __asm__ ("addq %4,%2; adcq %5,%3" : "=r" (a.hi), "=r" (a.lo) : "1" (a.lo), "0" (a.hi), "rm" (b.lo), "rm" (b.hi) : "cc");
  64. #define add128_64(a,b) __asm__ ("addq %4,%2; adcq $0,%3" : "=r" (a.hi), "=r" (a.lo) : "1" (a.lo), "0" (a.hi), "rm" (b) : "cc");
  65. #define lo128(a) (a.lo)
  66. #define hi128(a) (a.hi)
  67. #endif
  68. #endif
  69. #if defined(HAVE_NATIVE_UINT128)
  70. #define HAVE_UINT128
  71. #define mul64x64_128(out,a,b) out = (uint128_t)a * b;
  72. #define shr128_pair(out,hi,lo,shift) out = (uint64_t)((((uint128_t)hi << 64) | lo) >> (shift));
  73. #define shl128_pair(out,hi,lo,shift) out = (uint64_t)(((((uint128_t)hi << 64) | lo) << (shift)) >> 64);
  74. #define shr128(out,in,shift) out = (uint64_t)(in >> (shift));
  75. #define shl128(out,in,shift) out = (uint64_t)((in << shift) >> 64);
  76. #define add128(a,b) a += b;
  77. #define add128_64(a,b) a += (uint64_t)b;
  78. #define lo128(a) ((uint64_t)a)
  79. #define hi128(a) ((uint64_t)(a >> 64))
  80. #endif
  81. #if !defined(HAVE_UINT128)
  82. #error Need a uint128_t implementation!
  83. #endif
  84. #endif
  85. /* endian */
  86. #if !defined(ED25519_OPENSSLRNG)
  87. static inline void U32TO8_LE(unsigned char *p, const uint32_t v) {
  88. p[0] = (unsigned char)(v );
  89. p[1] = (unsigned char)(v >> 8);
  90. p[2] = (unsigned char)(v >> 16);
  91. p[3] = (unsigned char)(v >> 24);
  92. }
  93. #endif
  94. #if !defined(HAVE_UINT128)
  95. static inline uint32_t U8TO32_LE(const unsigned char *p) {
  96. return
  97. (((uint32_t)(p[0]) ) |
  98. ((uint32_t)(p[1]) << 8) |
  99. ((uint32_t)(p[2]) << 16) |
  100. ((uint32_t)(p[3]) << 24));
  101. }
  102. #else
  103. static inline uint64_t U8TO64_LE(const unsigned char *p) {
  104. return
  105. (((uint64_t)(p[0]) ) |
  106. ((uint64_t)(p[1]) << 8) |
  107. ((uint64_t)(p[2]) << 16) |
  108. ((uint64_t)(p[3]) << 24) |
  109. ((uint64_t)(p[4]) << 32) |
  110. ((uint64_t)(p[5]) << 40) |
  111. ((uint64_t)(p[6]) << 48) |
  112. ((uint64_t)(p[7]) << 56));
  113. }
  114. static inline void U64TO8_LE(unsigned char *p, const uint64_t v) {
  115. p[0] = (unsigned char)(v );
  116. p[1] = (unsigned char)(v >> 8);
  117. p[2] = (unsigned char)(v >> 16);
  118. p[3] = (unsigned char)(v >> 24);
  119. p[4] = (unsigned char)(v >> 32);
  120. p[5] = (unsigned char)(v >> 40);
  121. p[6] = (unsigned char)(v >> 48);
  122. p[7] = (unsigned char)(v >> 56);
  123. }
  124. #endif
  125. /* Tor: Detect and disable inline assembly when clang's AddressSanitizer
  126. * is present, due to compilation failing because it runs out of registers.
  127. *
  128. * The alternative is to annotate `ge25519_scalarmult_base_choose_niels`
  129. * and selectively disable AddressSanitizer insturmentation, however doing
  130. * things this way results in a "more sanitized" binary.
  131. */
  132. #if defined(__has_feature)
  133. #if __has_feature(address_sanitizer)
  134. #define ED25519_NO_INLINE_ASM
  135. #endif
  136. #endif
  137. /* Tor: Force enable SSE2 on 32 bit x86 systems if the compile target
  138. * architecture supports it. This is not done on x86-64 as the non-SSE2
  139. * code benchmarks better, at least on Haswell.
  140. */
  141. #if defined(__SSE2__) && !defined(CPU_X86_64)
  142. /* undef in case it's manually specified... */
  143. #undef ED25519_SSE2
  144. #define ED25519_SSE2
  145. #endif
  146. /* Tor: GCC's Stack Protector freaks out and produces variable length
  147. * buffer warnings when alignment is requested that is greater than
  148. * STACK_BOUNDARY (x86 has special code to deal with this for SSE2).
  149. *
  150. * Since the only reason things are 16 byte aligned in the first place
  151. * is for SSE2, only request variable alignment for SSE2 builds.
  152. *
  153. * See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59674
  154. */
  155. #if !defined(ED25519_SSE2)
  156. #undef ALIGN
  157. #define ALIGN(x)
  158. #endif
  159. #include <stdlib.h>
  160. #include <string.h>