pcl_ghash-x86_64.s 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691
  1. /*
  2. * Copyright (C) 2011-2017 Intel Corporation. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. *
  8. * * Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * * Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in
  12. * the documentation and/or other materials provided with the
  13. * distribution.
  14. * * Neither the name of Intel Corporation nor the names of its
  15. * contributors may be used to endorse or promote products derived
  16. * from this software without specific prior written permission.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. *
  30. */
  31. /* ====================================================================
  32. * Copyright (c) 1998-2017 The OpenSSL Project. All rights reserved.
  33. *
  34. * Redistribution and use in source and binary forms, with or without
  35. * modification, are permitted provided that the following conditions
  36. * are met:
  37. *
  38. * 1. Redistributions of source code must retain the above copyright
  39. * notice, this list of conditions and the following disclaimer.
  40. *
  41. * 2. Redistributions in binary form must reproduce the above copyright
  42. * notice, this list of conditions and the following disclaimer in
  43. * the documentation and/or other materials provided with the
  44. * distribution.
  45. *
  46. * 3. All advertising materials mentioning features or use of this
  47. * software must display the following acknowledgment:
  48. * "This product includes software developed by the OpenSSL Project
  49. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  50. *
  51. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  52. * endorse or promote products derived from this software without
  53. * prior written permission. For written permission, please contact
  54. * openssl-core@openssl.org.
  55. *
  56. * 5. Products derived from this software may not be called "OpenSSL"
  57. * nor may "OpenSSL" appear in their names without prior written
  58. * permission of the OpenSSL Project.
  59. *
  60. * 6. Redistributions of any form whatsoever must retain the following
  61. * acknowledgment:
  62. * "This product includes software developed by the OpenSSL Project
  63. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  64. *
  65. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  66. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  67. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  68. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  69. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  70. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  71. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  72. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  73. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  74. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  75. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  76. * OF THE POSSIBILITY OF SUCH DAMAGE.
  77. * ====================================================================
  78. *
  79. * This product includes cryptographic software written by Eric Young
  80. * (eay@cryptsoft.com). This product includes software written by Tim
  81. * Hudson (tjh@cryptsoft.com).
  82. *
  83. */
  84. /*
  85. * Content from openssl-1.1.0e/crypto/modes/ghash-x86_64.s
  86. * which is auto-generated by openssl-1.1.0e/crypto/modes/asm/ghash-x86_64.pl
  87. */
  88. .text
  89. .globl pcl_gcm_init_clmul
  90. .type pcl_gcm_init_clmul,@function
  91. .align 16
  92. pcl_gcm_init_clmul:
  93. .L_init_clmul:
  94. movdqu (%rsi),%xmm2
  95. pshufd $78,%xmm2,%xmm2
  96. pshufd $255,%xmm2,%xmm4
  97. movdqa %xmm2,%xmm3
  98. psllq $1,%xmm2
  99. pxor %xmm5,%xmm5
  100. psrlq $63,%xmm3
  101. pcmpgtd %xmm4,%xmm5
  102. pslldq $8,%xmm3
  103. por %xmm3,%xmm2
  104. pand .L0x1c2_polynomial(%rip),%xmm5
  105. pxor %xmm5,%xmm2
  106. pshufd $78,%xmm2,%xmm6
  107. movdqa %xmm2,%xmm0
  108. pxor %xmm2,%xmm6
  109. movdqa %xmm0,%xmm1
  110. pshufd $78,%xmm0,%xmm3
  111. pxor %xmm0,%xmm3
  112. .byte 102,15,58,68,194,0
  113. .byte 102,15,58,68,202,17
  114. .byte 102,15,58,68,222,0
  115. pxor %xmm0,%xmm3
  116. pxor %xmm1,%xmm3
  117. movdqa %xmm3,%xmm4
  118. psrldq $8,%xmm3
  119. pslldq $8,%xmm4
  120. pxor %xmm3,%xmm1
  121. pxor %xmm4,%xmm0
  122. movdqa %xmm0,%xmm4
  123. movdqa %xmm0,%xmm3
  124. psllq $5,%xmm0
  125. pxor %xmm0,%xmm3
  126. psllq $1,%xmm0
  127. pxor %xmm3,%xmm0
  128. psllq $57,%xmm0
  129. movdqa %xmm0,%xmm3
  130. pslldq $8,%xmm0
  131. psrldq $8,%xmm3
  132. pxor %xmm4,%xmm0
  133. pxor %xmm3,%xmm1
  134. movdqa %xmm0,%xmm4
  135. psrlq $1,%xmm0
  136. pxor %xmm4,%xmm1
  137. pxor %xmm0,%xmm4
  138. psrlq $5,%xmm0
  139. pxor %xmm4,%xmm0
  140. psrlq $1,%xmm0
  141. pxor %xmm1,%xmm0
  142. pshufd $78,%xmm2,%xmm3
  143. pshufd $78,%xmm0,%xmm4
  144. pxor %xmm2,%xmm3
  145. movdqu %xmm2,0(%rdi)
  146. pxor %xmm0,%xmm4
  147. movdqu %xmm0,16(%rdi)
  148. .byte 102,15,58,15,227,8
  149. movdqu %xmm4,32(%rdi)
  150. movdqa %xmm0,%xmm1
  151. pshufd $78,%xmm0,%xmm3
  152. pxor %xmm0,%xmm3
  153. .byte 102,15,58,68,194,0
  154. .byte 102,15,58,68,202,17
  155. .byte 102,15,58,68,222,0
  156. pxor %xmm0,%xmm3
  157. pxor %xmm1,%xmm3
  158. movdqa %xmm3,%xmm4
  159. psrldq $8,%xmm3
  160. pslldq $8,%xmm4
  161. pxor %xmm3,%xmm1
  162. pxor %xmm4,%xmm0
  163. movdqa %xmm0,%xmm4
  164. movdqa %xmm0,%xmm3
  165. psllq $5,%xmm0
  166. pxor %xmm0,%xmm3
  167. psllq $1,%xmm0
  168. pxor %xmm3,%xmm0
  169. psllq $57,%xmm0
  170. movdqa %xmm0,%xmm3
  171. pslldq $8,%xmm0
  172. psrldq $8,%xmm3
  173. pxor %xmm4,%xmm0
  174. pxor %xmm3,%xmm1
  175. movdqa %xmm0,%xmm4
  176. psrlq $1,%xmm0
  177. pxor %xmm4,%xmm1
  178. pxor %xmm0,%xmm4
  179. psrlq $5,%xmm0
  180. pxor %xmm4,%xmm0
  181. psrlq $1,%xmm0
  182. pxor %xmm1,%xmm0
  183. movdqa %xmm0,%xmm5
  184. movdqa %xmm0,%xmm1
  185. pshufd $78,%xmm0,%xmm3
  186. pxor %xmm0,%xmm3
  187. .byte 102,15,58,68,194,0
  188. .byte 102,15,58,68,202,17
  189. .byte 102,15,58,68,222,0
  190. pxor %xmm0,%xmm3
  191. pxor %xmm1,%xmm3
  192. movdqa %xmm3,%xmm4
  193. psrldq $8,%xmm3
  194. pslldq $8,%xmm4
  195. pxor %xmm3,%xmm1
  196. pxor %xmm4,%xmm0
  197. movdqa %xmm0,%xmm4
  198. movdqa %xmm0,%xmm3
  199. psllq $5,%xmm0
  200. pxor %xmm0,%xmm3
  201. psllq $1,%xmm0
  202. pxor %xmm3,%xmm0
  203. psllq $57,%xmm0
  204. movdqa %xmm0,%xmm3
  205. pslldq $8,%xmm0
  206. psrldq $8,%xmm3
  207. pxor %xmm4,%xmm0
  208. pxor %xmm3,%xmm1
  209. movdqa %xmm0,%xmm4
  210. psrlq $1,%xmm0
  211. pxor %xmm4,%xmm1
  212. pxor %xmm0,%xmm4
  213. psrlq $5,%xmm0
  214. pxor %xmm4,%xmm0
  215. psrlq $1,%xmm0
  216. pxor %xmm1,%xmm0
  217. pshufd $78,%xmm5,%xmm3
  218. pshufd $78,%xmm0,%xmm4
  219. pxor %xmm5,%xmm3
  220. movdqu %xmm5,48(%rdi)
  221. pxor %xmm0,%xmm4
  222. movdqu %xmm0,64(%rdi)
  223. .byte 102,15,58,15,227,8
  224. movdqu %xmm4,80(%rdi)
  225. .byte 0xf3,0xc3
  226. .size pcl_gcm_init_clmul,.-pcl_gcm_init_clmul
  227. .globl pcl_gcm_gmult_clmul
  228. .type pcl_gcm_gmult_clmul,@function
  229. .align 16
  230. pcl_gcm_gmult_clmul:
  231. .L_gmult_clmul:
  232. movdqu (%rdi),%xmm0
  233. movdqa .Lbswap_mask(%rip),%xmm5
  234. movdqu (%rsi),%xmm2
  235. movdqu 32(%rsi),%xmm4
  236. .byte 102,15,56,0,197
  237. movdqa %xmm0,%xmm1
  238. pshufd $78,%xmm0,%xmm3
  239. pxor %xmm0,%xmm3
  240. .byte 102,15,58,68,194,0
  241. .byte 102,15,58,68,202,17
  242. .byte 102,15,58,68,220,0
  243. pxor %xmm0,%xmm3
  244. pxor %xmm1,%xmm3
  245. movdqa %xmm3,%xmm4
  246. psrldq $8,%xmm3
  247. pslldq $8,%xmm4
  248. pxor %xmm3,%xmm1
  249. pxor %xmm4,%xmm0
  250. movdqa %xmm0,%xmm4
  251. movdqa %xmm0,%xmm3
  252. psllq $5,%xmm0
  253. pxor %xmm0,%xmm3
  254. psllq $1,%xmm0
  255. pxor %xmm3,%xmm0
  256. psllq $57,%xmm0
  257. movdqa %xmm0,%xmm3
  258. pslldq $8,%xmm0
  259. psrldq $8,%xmm3
  260. pxor %xmm4,%xmm0
  261. pxor %xmm3,%xmm1
  262. movdqa %xmm0,%xmm4
  263. psrlq $1,%xmm0
  264. pxor %xmm4,%xmm1
  265. pxor %xmm0,%xmm4
  266. psrlq $5,%xmm0
  267. pxor %xmm4,%xmm0
  268. psrlq $1,%xmm0
  269. pxor %xmm1,%xmm0
  270. .byte 102,15,56,0,197
  271. movdqu %xmm0,(%rdi)
  272. .byte 0xf3,0xc3
  273. .size pcl_gcm_gmult_clmul,.-pcl_gcm_gmult_clmul
  274. .globl pcl_gcm_ghash_clmul
  275. .type pcl_gcm_ghash_clmul,@function
  276. .align 32
  277. pcl_gcm_ghash_clmul:
  278. .L_ghash_clmul:
  279. movdqa .Lbswap_mask(%rip),%xmm10
  280. movdqu (%rdi),%xmm0
  281. movdqu (%rsi),%xmm2
  282. movdqu 32(%rsi),%xmm7
  283. .byte 102,65,15,56,0,194
  284. subq $0x10,%rcx
  285. jz .Lodd_tail
  286. movdqu 16(%rsi),%xmm6
  287. # Commenting out Silvermont optimizations: movl OPENSSL_ia32cap_P+4(%rip),%eax
  288. cmpq $0x30,%rcx
  289. jb .Lskip4x
  290. # Commenting out Silvermont optimizations: andl $71303168,%eax
  291. # Commenting out Silvermont optimizations: cmpl $4194304,%eax
  292. # Commenting out Silvermont optimizations: je .Lskip4x
  293. subq $0x30,%rcx
  294. movq $0xA040608020C0E000,%rax
  295. movdqu 48(%rsi),%xmm14
  296. movdqu 64(%rsi),%xmm15
  297. movdqu 48(%rdx),%xmm3
  298. movdqu 32(%rdx),%xmm11
  299. .byte 102,65,15,56,0,218
  300. .byte 102,69,15,56,0,218
  301. movdqa %xmm3,%xmm5
  302. pshufd $78,%xmm3,%xmm4
  303. pxor %xmm3,%xmm4
  304. .byte 102,15,58,68,218,0
  305. .byte 102,15,58,68,234,17
  306. .byte 102,15,58,68,231,0
  307. movdqa %xmm11,%xmm13
  308. pshufd $78,%xmm11,%xmm12
  309. pxor %xmm11,%xmm12
  310. .byte 102,68,15,58,68,222,0
  311. .byte 102,68,15,58,68,238,17
  312. .byte 102,68,15,58,68,231,16
  313. xorps %xmm11,%xmm3
  314. xorps %xmm13,%xmm5
  315. movups 80(%rsi),%xmm7
  316. xorps %xmm12,%xmm4
  317. movdqu 16(%rdx),%xmm11
  318. movdqu 0(%rdx),%xmm8
  319. .byte 102,69,15,56,0,218
  320. .byte 102,69,15,56,0,194
  321. movdqa %xmm11,%xmm13
  322. pshufd $78,%xmm11,%xmm12
  323. pxor %xmm8,%xmm0
  324. pxor %xmm11,%xmm12
  325. .byte 102,69,15,58,68,222,0
  326. movdqa %xmm0,%xmm1
  327. pshufd $78,%xmm0,%xmm8
  328. pxor %xmm0,%xmm8
  329. .byte 102,69,15,58,68,238,17
  330. .byte 102,68,15,58,68,231,0
  331. xorps %xmm11,%xmm3
  332. xorps %xmm13,%xmm5
  333. leaq 64(%rdx),%rdx
  334. subq $0x40,%rcx
  335. jc .Ltail4x
  336. jmp .Lmod4_loop
  337. .align 32
  338. .Lmod4_loop:
  339. .byte 102,65,15,58,68,199,0
  340. xorps %xmm12,%xmm4
  341. movdqu 48(%rdx),%xmm11
  342. .byte 102,69,15,56,0,218
  343. .byte 102,65,15,58,68,207,17
  344. xorps %xmm3,%xmm0
  345. movdqu 32(%rdx),%xmm3
  346. movdqa %xmm11,%xmm13
  347. .byte 102,68,15,58,68,199,16
  348. pshufd $78,%xmm11,%xmm12
  349. xorps %xmm5,%xmm1
  350. pxor %xmm11,%xmm12
  351. .byte 102,65,15,56,0,218
  352. movups 32(%rsi),%xmm7
  353. xorps %xmm4,%xmm8
  354. .byte 102,68,15,58,68,218,0
  355. pshufd $78,%xmm3,%xmm4
  356. pxor %xmm0,%xmm8
  357. movdqa %xmm3,%xmm5
  358. pxor %xmm1,%xmm8
  359. pxor %xmm3,%xmm4
  360. movdqa %xmm8,%xmm9
  361. .byte 102,68,15,58,68,234,17
  362. pslldq $8,%xmm8
  363. psrldq $8,%xmm9
  364. pxor %xmm8,%xmm0
  365. movdqa .L7_mask(%rip),%xmm8
  366. pxor %xmm9,%xmm1
  367. .byte 102,76,15,110,200
  368. pand %xmm0,%xmm8
  369. .byte 102,69,15,56,0,200
  370. pxor %xmm0,%xmm9
  371. .byte 102,68,15,58,68,231,0
  372. psllq $57,%xmm9
  373. movdqa %xmm9,%xmm8
  374. pslldq $8,%xmm9
  375. .byte 102,15,58,68,222,0
  376. psrldq $8,%xmm8
  377. pxor %xmm9,%xmm0
  378. pxor %xmm8,%xmm1
  379. movdqu 0(%rdx),%xmm8
  380. movdqa %xmm0,%xmm9
  381. psrlq $1,%xmm0
  382. .byte 102,15,58,68,238,17
  383. xorps %xmm11,%xmm3
  384. movdqu 16(%rdx),%xmm11
  385. .byte 102,69,15,56,0,218
  386. .byte 102,15,58,68,231,16
  387. xorps %xmm13,%xmm5
  388. movups 80(%rsi),%xmm7
  389. .byte 102,69,15,56,0,194
  390. pxor %xmm9,%xmm1
  391. pxor %xmm0,%xmm9
  392. psrlq $5,%xmm0
  393. movdqa %xmm11,%xmm13
  394. pxor %xmm12,%xmm4
  395. pshufd $78,%xmm11,%xmm12
  396. pxor %xmm9,%xmm0
  397. pxor %xmm8,%xmm1
  398. pxor %xmm11,%xmm12
  399. .byte 102,69,15,58,68,222,0
  400. psrlq $1,%xmm0
  401. pxor %xmm1,%xmm0
  402. movdqa %xmm0,%xmm1
  403. .byte 102,69,15,58,68,238,17
  404. xorps %xmm11,%xmm3
  405. pshufd $78,%xmm0,%xmm8
  406. pxor %xmm0,%xmm8
  407. .byte 102,68,15,58,68,231,0
  408. xorps %xmm13,%xmm5
  409. leaq 64(%rdx),%rdx
  410. subq $0x40,%rcx
  411. jnc .Lmod4_loop
  412. .Ltail4x:
  413. .byte 102,65,15,58,68,199,0
  414. .byte 102,65,15,58,68,207,17
  415. .byte 102,68,15,58,68,199,16
  416. xorps %xmm12,%xmm4
  417. xorps %xmm3,%xmm0
  418. xorps %xmm5,%xmm1
  419. pxor %xmm0,%xmm1
  420. pxor %xmm4,%xmm8
  421. pxor %xmm1,%xmm8
  422. pxor %xmm0,%xmm1
  423. movdqa %xmm8,%xmm9
  424. psrldq $8,%xmm8
  425. pslldq $8,%xmm9
  426. pxor %xmm8,%xmm1
  427. pxor %xmm9,%xmm0
  428. movdqa %xmm0,%xmm4
  429. movdqa %xmm0,%xmm3
  430. psllq $5,%xmm0
  431. pxor %xmm0,%xmm3
  432. psllq $1,%xmm0
  433. pxor %xmm3,%xmm0
  434. psllq $57,%xmm0
  435. movdqa %xmm0,%xmm3
  436. pslldq $8,%xmm0
  437. psrldq $8,%xmm3
  438. pxor %xmm4,%xmm0
  439. pxor %xmm3,%xmm1
  440. movdqa %xmm0,%xmm4
  441. psrlq $1,%xmm0
  442. pxor %xmm4,%xmm1
  443. pxor %xmm0,%xmm4
  444. psrlq $5,%xmm0
  445. pxor %xmm4,%xmm0
  446. psrlq $1,%xmm0
  447. pxor %xmm1,%xmm0
  448. addq $0x40,%rcx
  449. jz .Ldone
  450. movdqu 32(%rsi),%xmm7
  451. subq $0x10,%rcx
  452. jz .Lodd_tail
  453. .Lskip4x:
  454. movdqu (%rdx),%xmm8
  455. movdqu 16(%rdx),%xmm3
  456. .byte 102,69,15,56,0,194
  457. .byte 102,65,15,56,0,218
  458. pxor %xmm8,%xmm0
  459. movdqa %xmm3,%xmm5
  460. pshufd $78,%xmm3,%xmm4
  461. pxor %xmm3,%xmm4
  462. .byte 102,15,58,68,218,0
  463. .byte 102,15,58,68,234,17
  464. .byte 102,15,58,68,231,0
  465. leaq 32(%rdx),%rdx
  466. nop
  467. subq $0x20,%rcx
  468. jbe .Leven_tail
  469. nop
  470. jmp .Lmod_loop
  471. .align 32
  472. .Lmod_loop:
  473. movdqa %xmm0,%xmm1
  474. movdqa %xmm4,%xmm8
  475. pshufd $78,%xmm0,%xmm4
  476. pxor %xmm0,%xmm4
  477. .byte 102,15,58,68,198,0
  478. .byte 102,15,58,68,206,17
  479. .byte 102,15,58,68,231,16
  480. pxor %xmm3,%xmm0
  481. pxor %xmm5,%xmm1
  482. movdqu (%rdx),%xmm9
  483. pxor %xmm0,%xmm8
  484. .byte 102,69,15,56,0,202
  485. movdqu 16(%rdx),%xmm3
  486. pxor %xmm1,%xmm8
  487. pxor %xmm9,%xmm1
  488. pxor %xmm8,%xmm4
  489. .byte 102,65,15,56,0,218
  490. movdqa %xmm4,%xmm8
  491. psrldq $8,%xmm8
  492. pslldq $8,%xmm4
  493. pxor %xmm8,%xmm1
  494. pxor %xmm4,%xmm0
  495. movdqa %xmm3,%xmm5
  496. movdqa %xmm0,%xmm9
  497. movdqa %xmm0,%xmm8
  498. psllq $5,%xmm0
  499. pxor %xmm0,%xmm8
  500. .byte 102,15,58,68,218,0
  501. psllq $1,%xmm0
  502. pxor %xmm8,%xmm0
  503. psllq $57,%xmm0
  504. movdqa %xmm0,%xmm8
  505. pslldq $8,%xmm0
  506. psrldq $8,%xmm8
  507. pxor %xmm9,%xmm0
  508. pshufd $78,%xmm5,%xmm4
  509. pxor %xmm8,%xmm1
  510. pxor %xmm5,%xmm4
  511. movdqa %xmm0,%xmm9
  512. psrlq $1,%xmm0
  513. .byte 102,15,58,68,234,17
  514. pxor %xmm9,%xmm1
  515. pxor %xmm0,%xmm9
  516. psrlq $5,%xmm0
  517. pxor %xmm9,%xmm0
  518. leaq 32(%rdx),%rdx
  519. psrlq $1,%xmm0
  520. .byte 102,15,58,68,231,0
  521. pxor %xmm1,%xmm0
  522. subq $0x20,%rcx
  523. ja .Lmod_loop
  524. .Leven_tail:
  525. movdqa %xmm0,%xmm1
  526. movdqa %xmm4,%xmm8
  527. pshufd $78,%xmm0,%xmm4
  528. pxor %xmm0,%xmm4
  529. .byte 102,15,58,68,198,0
  530. .byte 102,15,58,68,206,17
  531. .byte 102,15,58,68,231,16
  532. pxor %xmm3,%xmm0
  533. pxor %xmm5,%xmm1
  534. pxor %xmm0,%xmm8
  535. pxor %xmm1,%xmm8
  536. pxor %xmm8,%xmm4
  537. movdqa %xmm4,%xmm8
  538. psrldq $8,%xmm8
  539. pslldq $8,%xmm4
  540. pxor %xmm8,%xmm1
  541. pxor %xmm4,%xmm0
  542. movdqa %xmm0,%xmm4
  543. movdqa %xmm0,%xmm3
  544. psllq $5,%xmm0
  545. pxor %xmm0,%xmm3
  546. psllq $1,%xmm0
  547. pxor %xmm3,%xmm0
  548. psllq $57,%xmm0
  549. movdqa %xmm0,%xmm3
  550. pslldq $8,%xmm0
  551. psrldq $8,%xmm3
  552. pxor %xmm4,%xmm0
  553. pxor %xmm3,%xmm1
  554. movdqa %xmm0,%xmm4
  555. psrlq $1,%xmm0
  556. pxor %xmm4,%xmm1
  557. pxor %xmm0,%xmm4
  558. psrlq $5,%xmm0
  559. pxor %xmm4,%xmm0
  560. psrlq $1,%xmm0
  561. pxor %xmm1,%xmm0
  562. testq %rcx,%rcx
  563. jnz .Ldone
  564. .Lodd_tail:
  565. movdqu (%rdx),%xmm8
  566. .byte 102,69,15,56,0,194
  567. pxor %xmm8,%xmm0
  568. movdqa %xmm0,%xmm1
  569. pshufd $78,%xmm0,%xmm3
  570. pxor %xmm0,%xmm3
  571. .byte 102,15,58,68,194,0
  572. .byte 102,15,58,68,202,17
  573. .byte 102,15,58,68,223,0
  574. pxor %xmm0,%xmm3
  575. pxor %xmm1,%xmm3
  576. movdqa %xmm3,%xmm4
  577. psrldq $8,%xmm3
  578. pslldq $8,%xmm4
  579. pxor %xmm3,%xmm1
  580. pxor %xmm4,%xmm0
  581. movdqa %xmm0,%xmm4
  582. movdqa %xmm0,%xmm3
  583. psllq $5,%xmm0
  584. pxor %xmm0,%xmm3
  585. psllq $1,%xmm0
  586. pxor %xmm3,%xmm0
  587. psllq $57,%xmm0
  588. movdqa %xmm0,%xmm3
  589. pslldq $8,%xmm0
  590. psrldq $8,%xmm3
  591. pxor %xmm4,%xmm0
  592. pxor %xmm3,%xmm1
  593. movdqa %xmm0,%xmm4
  594. psrlq $1,%xmm0
  595. pxor %xmm4,%xmm1
  596. pxor %xmm0,%xmm4
  597. psrlq $5,%xmm0
  598. pxor %xmm4,%xmm0
  599. psrlq $1,%xmm0
  600. pxor %xmm1,%xmm0
  601. .Ldone:
  602. .byte 102,65,15,56,0,194
  603. movdqu %xmm0,(%rdi)
  604. .byte 0xf3,0xc3
  605. .size pcl_gcm_ghash_clmul,.-pcl_gcm_ghash_clmul
  606. .align 64
  607. .Lbswap_mask:
  608. .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
  609. .L0x1c2_polynomial:
  610. .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
  611. .L7_mask:
  612. .long 7,0,7,0
  613. .L7_mask_poly:
  614. .long 7,0,450,0