ed25519-donna-64bit-x86.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. #if defined(ED25519_GCC_64BIT_X86_CHOOSE)
  2. #define HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS
  3. #ifdef __clang__
  4. #pragma clang diagnostic push
  5. #pragma clang diagnostic ignored "-Woverlength-strings"
  6. #endif
  7. DONNA_NOINLINE static void
  8. ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const uint8_t table[256][96], uint32_t pos, signed char b) {
  9. int64_t breg = (int64_t)b;
  10. uint64_t sign = (uint64_t)breg >> 63;
  11. uint64_t mask = ~(sign - 1);
  12. uint64_t u = (breg + mask) ^ mask;
  13. __asm__ __volatile__ (
  14. /* ysubx+xaddy+t2d */
  15. "movq %0, %%rax ;\n"
  16. "movd %%rax, %%xmm14 ;\n"
  17. "pshufd $0x00, %%xmm14, %%xmm14 ;\n"
  18. "pxor %%xmm0, %%xmm0 ;\n"
  19. "pxor %%xmm1, %%xmm1 ;\n"
  20. "pxor %%xmm2, %%xmm2 ;\n"
  21. "pxor %%xmm3, %%xmm3 ;\n"
  22. "pxor %%xmm4, %%xmm4 ;\n"
  23. "pxor %%xmm5, %%xmm5 ;\n"
  24. /* 0 */
  25. "movq $0, %%rax ;\n"
  26. "movd %%rax, %%xmm15 ;\n"
  27. "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
  28. "pcmpeqd %%xmm14, %%xmm15 ;\n"
  29. "movq $1, %%rax ;\n"
  30. "movd %%rax, %%xmm6 ;\n"
  31. "pxor %%xmm7, %%xmm7 ;\n"
  32. "pand %%xmm15, %%xmm6 ;\n"
  33. "pand %%xmm15, %%xmm7 ;\n"
  34. "por %%xmm6, %%xmm0 ;\n"
  35. "por %%xmm7, %%xmm1 ;\n"
  36. "por %%xmm6, %%xmm2 ;\n"
  37. "por %%xmm7, %%xmm3 ;\n"
  38. /* 1 */
  39. "movq $1, %%rax ;\n"
  40. "movd %%rax, %%xmm15 ;\n"
  41. "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
  42. "pcmpeqd %%xmm14, %%xmm15 ;\n"
  43. "movdqa 0(%1), %%xmm6 ;\n"
  44. "movdqa 16(%1), %%xmm7 ;\n"
  45. "movdqa 32(%1), %%xmm8 ;\n"
  46. "movdqa 48(%1), %%xmm9 ;\n"
  47. "movdqa 64(%1), %%xmm10 ;\n"
  48. "movdqa 80(%1), %%xmm11 ;\n"
  49. "pand %%xmm15, %%xmm6 ;\n"
  50. "pand %%xmm15, %%xmm7 ;\n"
  51. "pand %%xmm15, %%xmm8 ;\n"
  52. "pand %%xmm15, %%xmm9 ;\n"
  53. "pand %%xmm15, %%xmm10 ;\n"
  54. "pand %%xmm15, %%xmm11 ;\n"
  55. "por %%xmm6, %%xmm0 ;\n"
  56. "por %%xmm7, %%xmm1 ;\n"
  57. "por %%xmm8, %%xmm2 ;\n"
  58. "por %%xmm9, %%xmm3 ;\n"
  59. "por %%xmm10, %%xmm4 ;\n"
  60. "por %%xmm11, %%xmm5 ;\n"
  61. /* 2 */
  62. "movq $2, %%rax ;\n"
  63. "movd %%rax, %%xmm15 ;\n"
  64. "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
  65. "pcmpeqd %%xmm14, %%xmm15 ;\n"
  66. "movdqa 96(%1), %%xmm6 ;\n"
  67. "movdqa 112(%1), %%xmm7 ;\n"
  68. "movdqa 128(%1), %%xmm8 ;\n"
  69. "movdqa 144(%1), %%xmm9 ;\n"
  70. "movdqa 160(%1), %%xmm10 ;\n"
  71. "movdqa 176(%1), %%xmm11 ;\n"
  72. "pand %%xmm15, %%xmm6 ;\n"
  73. "pand %%xmm15, %%xmm7 ;\n"
  74. "pand %%xmm15, %%xmm8 ;\n"
  75. "pand %%xmm15, %%xmm9 ;\n"
  76. "pand %%xmm15, %%xmm10 ;\n"
  77. "pand %%xmm15, %%xmm11 ;\n"
  78. "por %%xmm6, %%xmm0 ;\n"
  79. "por %%xmm7, %%xmm1 ;\n"
  80. "por %%xmm8, %%xmm2 ;\n"
  81. "por %%xmm9, %%xmm3 ;\n"
  82. "por %%xmm10, %%xmm4 ;\n"
  83. "por %%xmm11, %%xmm5 ;\n"
  84. /* 3 */
  85. "movq $3, %%rax ;\n"
  86. "movd %%rax, %%xmm15 ;\n"
  87. "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
  88. "pcmpeqd %%xmm14, %%xmm15 ;\n"
  89. "movdqa 192(%1), %%xmm6 ;\n"
  90. "movdqa 208(%1), %%xmm7 ;\n"
  91. "movdqa 224(%1), %%xmm8 ;\n"
  92. "movdqa 240(%1), %%xmm9 ;\n"
  93. "movdqa 256(%1), %%xmm10 ;\n"
  94. "movdqa 272(%1), %%xmm11 ;\n"
  95. "pand %%xmm15, %%xmm6 ;\n"
  96. "pand %%xmm15, %%xmm7 ;\n"
  97. "pand %%xmm15, %%xmm8 ;\n"
  98. "pand %%xmm15, %%xmm9 ;\n"
  99. "pand %%xmm15, %%xmm10 ;\n"
  100. "pand %%xmm15, %%xmm11 ;\n"
  101. "por %%xmm6, %%xmm0 ;\n"
  102. "por %%xmm7, %%xmm1 ;\n"
  103. "por %%xmm8, %%xmm2 ;\n"
  104. "por %%xmm9, %%xmm3 ;\n"
  105. "por %%xmm10, %%xmm4 ;\n"
  106. "por %%xmm11, %%xmm5 ;\n"
  107. /* 4 */
  108. "movq $4, %%rax ;\n"
  109. "movd %%rax, %%xmm15 ;\n"
  110. "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
  111. "pcmpeqd %%xmm14, %%xmm15 ;\n"
  112. "movdqa 288(%1), %%xmm6 ;\n"
  113. "movdqa 304(%1), %%xmm7 ;\n"
  114. "movdqa 320(%1), %%xmm8 ;\n"
  115. "movdqa 336(%1), %%xmm9 ;\n"
  116. "movdqa 352(%1), %%xmm10 ;\n"
  117. "movdqa 368(%1), %%xmm11 ;\n"
  118. "pand %%xmm15, %%xmm6 ;\n"
  119. "pand %%xmm15, %%xmm7 ;\n"
  120. "pand %%xmm15, %%xmm8 ;\n"
  121. "pand %%xmm15, %%xmm9 ;\n"
  122. "pand %%xmm15, %%xmm10 ;\n"
  123. "pand %%xmm15, %%xmm11 ;\n"
  124. "por %%xmm6, %%xmm0 ;\n"
  125. "por %%xmm7, %%xmm1 ;\n"
  126. "por %%xmm8, %%xmm2 ;\n"
  127. "por %%xmm9, %%xmm3 ;\n"
  128. "por %%xmm10, %%xmm4 ;\n"
  129. "por %%xmm11, %%xmm5 ;\n"
  130. /* 5 */
  131. "movq $5, %%rax ;\n"
  132. "movd %%rax, %%xmm15 ;\n"
  133. "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
  134. "pcmpeqd %%xmm14, %%xmm15 ;\n"
  135. "movdqa 384(%1), %%xmm6 ;\n"
  136. "movdqa 400(%1), %%xmm7 ;\n"
  137. "movdqa 416(%1), %%xmm8 ;\n"
  138. "movdqa 432(%1), %%xmm9 ;\n"
  139. "movdqa 448(%1), %%xmm10 ;\n"
  140. "movdqa 464(%1), %%xmm11 ;\n"
  141. "pand %%xmm15, %%xmm6 ;\n"
  142. "pand %%xmm15, %%xmm7 ;\n"
  143. "pand %%xmm15, %%xmm8 ;\n"
  144. "pand %%xmm15, %%xmm9 ;\n"
  145. "pand %%xmm15, %%xmm10 ;\n"
  146. "pand %%xmm15, %%xmm11 ;\n"
  147. "por %%xmm6, %%xmm0 ;\n"
  148. "por %%xmm7, %%xmm1 ;\n"
  149. "por %%xmm8, %%xmm2 ;\n"
  150. "por %%xmm9, %%xmm3 ;\n"
  151. "por %%xmm10, %%xmm4 ;\n"
  152. "por %%xmm11, %%xmm5 ;\n"
  153. /* 6 */
  154. "movq $6, %%rax ;\n"
  155. "movd %%rax, %%xmm15 ;\n"
  156. "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
  157. "pcmpeqd %%xmm14, %%xmm15 ;\n"
  158. "movdqa 480(%1), %%xmm6 ;\n"
  159. "movdqa 496(%1), %%xmm7 ;\n"
  160. "movdqa 512(%1), %%xmm8 ;\n"
  161. "movdqa 528(%1), %%xmm9 ;\n"
  162. "movdqa 544(%1), %%xmm10 ;\n"
  163. "movdqa 560(%1), %%xmm11 ;\n"
  164. "pand %%xmm15, %%xmm6 ;\n"
  165. "pand %%xmm15, %%xmm7 ;\n"
  166. "pand %%xmm15, %%xmm8 ;\n"
  167. "pand %%xmm15, %%xmm9 ;\n"
  168. "pand %%xmm15, %%xmm10 ;\n"
  169. "pand %%xmm15, %%xmm11 ;\n"
  170. "por %%xmm6, %%xmm0 ;\n"
  171. "por %%xmm7, %%xmm1 ;\n"
  172. "por %%xmm8, %%xmm2 ;\n"
  173. "por %%xmm9, %%xmm3 ;\n"
  174. "por %%xmm10, %%xmm4 ;\n"
  175. "por %%xmm11, %%xmm5 ;\n"
  176. /* 7 */
  177. "movq $7, %%rax ;\n"
  178. "movd %%rax, %%xmm15 ;\n"
  179. "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
  180. "pcmpeqd %%xmm14, %%xmm15 ;\n"
  181. "movdqa 576(%1), %%xmm6 ;\n"
  182. "movdqa 592(%1), %%xmm7 ;\n"
  183. "movdqa 608(%1), %%xmm8 ;\n"
  184. "movdqa 624(%1), %%xmm9 ;\n"
  185. "movdqa 640(%1), %%xmm10 ;\n"
  186. "movdqa 656(%1), %%xmm11 ;\n"
  187. "pand %%xmm15, %%xmm6 ;\n"
  188. "pand %%xmm15, %%xmm7 ;\n"
  189. "pand %%xmm15, %%xmm8 ;\n"
  190. "pand %%xmm15, %%xmm9 ;\n"
  191. "pand %%xmm15, %%xmm10 ;\n"
  192. "pand %%xmm15, %%xmm11 ;\n"
  193. "por %%xmm6, %%xmm0 ;\n"
  194. "por %%xmm7, %%xmm1 ;\n"
  195. "por %%xmm8, %%xmm2 ;\n"
  196. "por %%xmm9, %%xmm3 ;\n"
  197. "por %%xmm10, %%xmm4 ;\n"
  198. "por %%xmm11, %%xmm5 ;\n"
  199. /* 8 */
  200. "movq $8, %%rax ;\n"
  201. "movd %%rax, %%xmm15 ;\n"
  202. "pshufd $0x00, %%xmm15, %%xmm15 ;\n"
  203. "pcmpeqd %%xmm14, %%xmm15 ;\n"
  204. "movdqa 672(%1), %%xmm6 ;\n"
  205. "movdqa 688(%1), %%xmm7 ;\n"
  206. "movdqa 704(%1), %%xmm8 ;\n"
  207. "movdqa 720(%1), %%xmm9 ;\n"
  208. "movdqa 736(%1), %%xmm10 ;\n"
  209. "movdqa 752(%1), %%xmm11 ;\n"
  210. "pand %%xmm15, %%xmm6 ;\n"
  211. "pand %%xmm15, %%xmm7 ;\n"
  212. "pand %%xmm15, %%xmm8 ;\n"
  213. "pand %%xmm15, %%xmm9 ;\n"
  214. "pand %%xmm15, %%xmm10 ;\n"
  215. "pand %%xmm15, %%xmm11 ;\n"
  216. "por %%xmm6, %%xmm0 ;\n"
  217. "por %%xmm7, %%xmm1 ;\n"
  218. "por %%xmm8, %%xmm2 ;\n"
  219. "por %%xmm9, %%xmm3 ;\n"
  220. "por %%xmm10, %%xmm4 ;\n"
  221. "por %%xmm11, %%xmm5 ;\n"
  222. /* conditionally swap ysubx and xaddy */
  223. "movq %3, %%rax ;\n"
  224. "xorq $1, %%rax ;\n"
  225. "movd %%rax, %%xmm14 ;\n"
  226. "pxor %%xmm15, %%xmm15 ;\n"
  227. "pshufd $0x00, %%xmm14, %%xmm14 ;\n"
  228. "pxor %%xmm0, %%xmm2 ;\n"
  229. "pxor %%xmm1, %%xmm3 ;\n"
  230. "pcmpeqd %%xmm14, %%xmm15 ;\n"
  231. "movdqa %%xmm2, %%xmm6 ;\n"
  232. "movdqa %%xmm3, %%xmm7 ;\n"
  233. "pand %%xmm15, %%xmm6 ;\n"
  234. "pand %%xmm15, %%xmm7 ;\n"
  235. "pxor %%xmm6, %%xmm0 ;\n"
  236. "pxor %%xmm7, %%xmm1 ;\n"
  237. "pxor %%xmm0, %%xmm2 ;\n"
  238. "pxor %%xmm1, %%xmm3 ;\n"
  239. /* store ysubx */
  240. "movq $0x7ffffffffffff, %%rax ;\n"
  241. "movd %%xmm0, %%rcx ;\n"
  242. "movd %%xmm0, %%r8 ;\n"
  243. "movd %%xmm1, %%rsi ;\n"
  244. "pshufd $0xee, %%xmm0, %%xmm0 ;\n"
  245. "pshufd $0xee, %%xmm1, %%xmm1 ;\n"
  246. "movd %%xmm0, %%rdx ;\n"
  247. "movd %%xmm1, %%rdi ;\n"
  248. "shrdq $51, %%rdx, %%r8 ;\n"
  249. "shrdq $38, %%rsi, %%rdx ;\n"
  250. "shrdq $25, %%rdi, %%rsi ;\n"
  251. "shrq $12, %%rdi ;\n"
  252. "andq %%rax, %%rcx ;\n"
  253. "andq %%rax, %%r8 ;\n"
  254. "andq %%rax, %%rdx ;\n"
  255. "andq %%rax, %%rsi ;\n"
  256. "andq %%rax, %%rdi ;\n"
  257. "movq %%rcx, 0(%2) ;\n"
  258. "movq %%r8, 8(%2) ;\n"
  259. "movq %%rdx, 16(%2) ;\n"
  260. "movq %%rsi, 24(%2) ;\n"
  261. "movq %%rdi, 32(%2) ;\n"
  262. /* store xaddy */
  263. "movq $0x7ffffffffffff, %%rax ;\n"
  264. "movd %%xmm2, %%rcx ;\n"
  265. "movd %%xmm2, %%r8 ;\n"
  266. "movd %%xmm3, %%rsi ;\n"
  267. "pshufd $0xee, %%xmm2, %%xmm2 ;\n"
  268. "pshufd $0xee, %%xmm3, %%xmm3 ;\n"
  269. "movd %%xmm2, %%rdx ;\n"
  270. "movd %%xmm3, %%rdi ;\n"
  271. "shrdq $51, %%rdx, %%r8 ;\n"
  272. "shrdq $38, %%rsi, %%rdx ;\n"
  273. "shrdq $25, %%rdi, %%rsi ;\n"
  274. "shrq $12, %%rdi ;\n"
  275. "andq %%rax, %%rcx ;\n"
  276. "andq %%rax, %%r8 ;\n"
  277. "andq %%rax, %%rdx ;\n"
  278. "andq %%rax, %%rsi ;\n"
  279. "andq %%rax, %%rdi ;\n"
  280. "movq %%rcx, 40(%2) ;\n"
  281. "movq %%r8, 48(%2) ;\n"
  282. "movq %%rdx, 56(%2) ;\n"
  283. "movq %%rsi, 64(%2) ;\n"
  284. "movq %%rdi, 72(%2) ;\n"
  285. /* extract t2d */
  286. "movq $0x7ffffffffffff, %%rax ;\n"
  287. "movd %%xmm4, %%rcx ;\n"
  288. "movd %%xmm4, %%r8 ;\n"
  289. "movd %%xmm5, %%rsi ;\n"
  290. "pshufd $0xee, %%xmm4, %%xmm4 ;\n"
  291. "pshufd $0xee, %%xmm5, %%xmm5 ;\n"
  292. "movd %%xmm4, %%rdx ;\n"
  293. "movd %%xmm5, %%rdi ;\n"
  294. "shrdq $51, %%rdx, %%r8 ;\n"
  295. "shrdq $38, %%rsi, %%rdx ;\n"
  296. "shrdq $25, %%rdi, %%rsi ;\n"
  297. "shrq $12, %%rdi ;\n"
  298. "andq %%rax, %%rcx ;\n"
  299. "andq %%rax, %%r8 ;\n"
  300. "andq %%rax, %%rdx ;\n"
  301. "andq %%rax, %%rsi ;\n"
  302. "andq %%rax, %%rdi ;\n"
  303. /* conditionally negate t2d */
  304. "movq %3, %%rax ;\n"
  305. "movq $0xfffffffffffda, %%r9 ;\n"
  306. "movq $0xffffffffffffe, %%r10 ;\n"
  307. "movq %%r10, %%r11 ;\n"
  308. "movq %%r10, %%r12 ;\n"
  309. "movq %%r10, %%r13 ;\n"
  310. "subq %%rcx, %%r9 ;\n"
  311. "subq %%r8, %%r10 ;\n"
  312. "subq %%rdx, %%r11 ;\n"
  313. "subq %%rsi, %%r12 ;\n"
  314. "subq %%rdi, %%r13 ;\n"
  315. "cmpq $1, %%rax ;\n"
  316. "cmove %%r9, %%rcx ;\n"
  317. "cmove %%r10, %%r8 ;\n"
  318. "cmove %%r11, %%rdx ;\n"
  319. "cmove %%r12, %%rsi ;\n"
  320. "cmove %%r13, %%rdi ;\n"
  321. /* store t2d */
  322. "movq %%rcx, 80(%2) ;\n"
  323. "movq %%r8, 88(%2) ;\n"
  324. "movq %%rdx, 96(%2) ;\n"
  325. "movq %%rsi, 104(%2) ;\n"
  326. "movq %%rdi, 112(%2) ;\n"
  327. :
  328. : "m"(u), "r"(&table[pos * 8]), "r"(t), "m"(sign) /* %0 = u, %1 = table, %2 = t, %3 = sign */
  329. :
  330. "%rax", "%rcx", "%rdx", "%rdi", "%rsi", "%r8", "%r9", "%r10", "%r11", "%r12", "%r13",
  331. "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm14", "%xmm14",
  332. "cc", "memory"
  333. );
  334. }
  335. #ifdef __clang__
  336. #pragma clang diagnostic pop
  337. #endif
  338. #endif /* defined(ED25519_GCC_64BIT_X86_CHOOSE) */