/* * Copyright (C) 2011-2017 Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ /* ==================================================================== * Copyright (c) 1998-2017 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgment: * "This product includes software developed by the OpenSSL Project * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" * * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to * endorse or promote products derived from this software without * prior written permission. For written permission, please contact * openssl-core@openssl.org. * * 5. Products derived from this software may not be called "OpenSSL" * nor may "OpenSSL" appear in their names without prior written * permission of the OpenSSL Project. * * 6. Redistributions of any form whatsoever must retain the following * acknowledgment: * "This product includes software developed by the OpenSSL Project * for use in the OpenSSL Toolkit (http://www.openssl.org/)" * * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * ==================================================================== * * This product includes cryptographic software written by Eric Young * (eay@cryptsoft.com). This product includes software written by Tim * Hudson (tjh@cryptsoft.com). * */ /* * Content from openssl-1.1.0e/crypto/modes/ghash-x86_64.s * which is auto-generated by openssl-1.1.0e/crypto/modes/asm/ghash-x86_64.pl */ .text .globl pcl_gcm_init_clmul .type pcl_gcm_init_clmul,@function .align 16 pcl_gcm_init_clmul: .L_init_clmul: movdqu (%rsi),%xmm2 pshufd $78,%xmm2,%xmm2 pshufd $255,%xmm2,%xmm4 movdqa %xmm2,%xmm3 psllq $1,%xmm2 pxor %xmm5,%xmm5 psrlq $63,%xmm3 pcmpgtd %xmm4,%xmm5 pslldq $8,%xmm3 por %xmm3,%xmm2 pand .L0x1c2_polynomial(%rip),%xmm5 pxor %xmm5,%xmm2 pshufd $78,%xmm2,%xmm6 movdqa %xmm2,%xmm0 pxor %xmm2,%xmm6 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pxor %xmm0,%xmm3 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,222,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 pshufd $78,%xmm2,%xmm3 pshufd $78,%xmm0,%xmm4 pxor %xmm2,%xmm3 movdqu %xmm2,0(%rdi) pxor %xmm0,%xmm4 movdqu %xmm0,16(%rdi) .byte 102,15,58,15,227,8 movdqu %xmm4,32(%rdi) movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pxor %xmm0,%xmm3 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,222,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 movdqa %xmm0,%xmm5 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pxor %xmm0,%xmm3 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,222,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 pshufd $78,%xmm5,%xmm3 pshufd $78,%xmm0,%xmm4 pxor %xmm5,%xmm3 movdqu %xmm5,48(%rdi) pxor %xmm0,%xmm4 movdqu %xmm0,64(%rdi) .byte 102,15,58,15,227,8 movdqu %xmm4,80(%rdi) .byte 0xf3,0xc3 .size pcl_gcm_init_clmul,.-pcl_gcm_init_clmul .globl pcl_gcm_gmult_clmul .type pcl_gcm_gmult_clmul,@function .align 16 pcl_gcm_gmult_clmul: .L_gmult_clmul: movdqu (%rdi),%xmm0 movdqa .Lbswap_mask(%rip),%xmm5 movdqu (%rsi),%xmm2 movdqu 32(%rsi),%xmm4 .byte 102,15,56,0,197 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pxor %xmm0,%xmm3 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,220,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 .byte 102,15,56,0,197 movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 .size pcl_gcm_gmult_clmul,.-pcl_gcm_gmult_clmul .globl pcl_gcm_ghash_clmul .type pcl_gcm_ghash_clmul,@function .align 32 pcl_gcm_ghash_clmul: .L_ghash_clmul: movdqa .Lbswap_mask(%rip),%xmm10 movdqu (%rdi),%xmm0 movdqu (%rsi),%xmm2 movdqu 32(%rsi),%xmm7 .byte 102,65,15,56,0,194 subq $0x10,%rcx jz .Lodd_tail movdqu 16(%rsi),%xmm6 # Commenting out Silvermont optimizations: movl OPENSSL_ia32cap_P+4(%rip),%eax cmpq $0x30,%rcx jb .Lskip4x # Commenting out Silvermont optimizations: andl $71303168,%eax # Commenting out Silvermont optimizations: cmpl $4194304,%eax # Commenting out Silvermont optimizations: je .Lskip4x subq $0x30,%rcx movq $0xA040608020C0E000,%rax movdqu 48(%rsi),%xmm14 movdqu 64(%rsi),%xmm15 movdqu 48(%rdx),%xmm3 movdqu 32(%rdx),%xmm11 .byte 102,65,15,56,0,218 .byte 102,69,15,56,0,218 movdqa %xmm3,%xmm5 pshufd $78,%xmm3,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,68,218,0 .byte 102,15,58,68,234,17 .byte 102,15,58,68,231,0 movdqa %xmm11,%xmm13 pshufd $78,%xmm11,%xmm12 pxor %xmm11,%xmm12 .byte 102,68,15,58,68,222,0 .byte 102,68,15,58,68,238,17 .byte 102,68,15,58,68,231,16 xorps %xmm11,%xmm3 xorps %xmm13,%xmm5 movups 80(%rsi),%xmm7 xorps %xmm12,%xmm4 movdqu 16(%rdx),%xmm11 movdqu 0(%rdx),%xmm8 .byte 102,69,15,56,0,218 .byte 102,69,15,56,0,194 movdqa %xmm11,%xmm13 pshufd $78,%xmm11,%xmm12 pxor %xmm8,%xmm0 pxor %xmm11,%xmm12 .byte 102,69,15,58,68,222,0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm8 pxor %xmm0,%xmm8 .byte 102,69,15,58,68,238,17 .byte 102,68,15,58,68,231,0 xorps %xmm11,%xmm3 xorps %xmm13,%xmm5 leaq 64(%rdx),%rdx subq $0x40,%rcx jc .Ltail4x jmp .Lmod4_loop .align 32 .Lmod4_loop: .byte 102,65,15,58,68,199,0 xorps %xmm12,%xmm4 movdqu 48(%rdx),%xmm11 .byte 102,69,15,56,0,218 .byte 102,65,15,58,68,207,17 xorps %xmm3,%xmm0 movdqu 32(%rdx),%xmm3 movdqa %xmm11,%xmm13 .byte 102,68,15,58,68,199,16 pshufd $78,%xmm11,%xmm12 xorps %xmm5,%xmm1 pxor %xmm11,%xmm12 .byte 102,65,15,56,0,218 movups 32(%rsi),%xmm7 xorps %xmm4,%xmm8 .byte 102,68,15,58,68,218,0 pshufd $78,%xmm3,%xmm4 pxor %xmm0,%xmm8 movdqa %xmm3,%xmm5 pxor %xmm1,%xmm8 pxor %xmm3,%xmm4 movdqa %xmm8,%xmm9 .byte 102,68,15,58,68,234,17 pslldq $8,%xmm8 psrldq $8,%xmm9 pxor %xmm8,%xmm0 movdqa .L7_mask(%rip),%xmm8 pxor %xmm9,%xmm1 .byte 102,76,15,110,200 pand %xmm0,%xmm8 .byte 102,69,15,56,0,200 pxor %xmm0,%xmm9 .byte 102,68,15,58,68,231,0 psllq $57,%xmm9 movdqa %xmm9,%xmm8 pslldq $8,%xmm9 .byte 102,15,58,68,222,0 psrldq $8,%xmm8 pxor %xmm9,%xmm0 pxor %xmm8,%xmm1 movdqu 0(%rdx),%xmm8 movdqa %xmm0,%xmm9 psrlq $1,%xmm0 .byte 102,15,58,68,238,17 xorps %xmm11,%xmm3 movdqu 16(%rdx),%xmm11 .byte 102,69,15,56,0,218 .byte 102,15,58,68,231,16 xorps %xmm13,%xmm5 movups 80(%rsi),%xmm7 .byte 102,69,15,56,0,194 pxor %xmm9,%xmm1 pxor %xmm0,%xmm9 psrlq $5,%xmm0 movdqa %xmm11,%xmm13 pxor %xmm12,%xmm4 pshufd $78,%xmm11,%xmm12 pxor %xmm9,%xmm0 pxor %xmm8,%xmm1 pxor %xmm11,%xmm12 .byte 102,69,15,58,68,222,0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 movdqa %xmm0,%xmm1 .byte 102,69,15,58,68,238,17 xorps %xmm11,%xmm3 pshufd $78,%xmm0,%xmm8 pxor %xmm0,%xmm8 .byte 102,68,15,58,68,231,0 xorps %xmm13,%xmm5 leaq 64(%rdx),%rdx subq $0x40,%rcx jnc .Lmod4_loop .Ltail4x: .byte 102,65,15,58,68,199,0 .byte 102,65,15,58,68,207,17 .byte 102,68,15,58,68,199,16 xorps %xmm12,%xmm4 xorps %xmm3,%xmm0 xorps %xmm5,%xmm1 pxor %xmm0,%xmm1 pxor %xmm4,%xmm8 pxor %xmm1,%xmm8 pxor %xmm0,%xmm1 movdqa %xmm8,%xmm9 psrldq $8,%xmm8 pslldq $8,%xmm9 pxor %xmm8,%xmm1 pxor %xmm9,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 addq $0x40,%rcx jz .Ldone movdqu 32(%rsi),%xmm7 subq $0x10,%rcx jz .Lodd_tail .Lskip4x: movdqu (%rdx),%xmm8 movdqu 16(%rdx),%xmm3 .byte 102,69,15,56,0,194 .byte 102,65,15,56,0,218 pxor %xmm8,%xmm0 movdqa %xmm3,%xmm5 pshufd $78,%xmm3,%xmm4 pxor %xmm3,%xmm4 .byte 102,15,58,68,218,0 .byte 102,15,58,68,234,17 .byte 102,15,58,68,231,0 leaq 32(%rdx),%rdx nop subq $0x20,%rcx jbe .Leven_tail nop jmp .Lmod_loop .align 32 .Lmod_loop: movdqa %xmm0,%xmm1 movdqa %xmm4,%xmm8 pshufd $78,%xmm0,%xmm4 pxor %xmm0,%xmm4 .byte 102,15,58,68,198,0 .byte 102,15,58,68,206,17 .byte 102,15,58,68,231,16 pxor %xmm3,%xmm0 pxor %xmm5,%xmm1 movdqu (%rdx),%xmm9 pxor %xmm0,%xmm8 .byte 102,69,15,56,0,202 movdqu 16(%rdx),%xmm3 pxor %xmm1,%xmm8 pxor %xmm9,%xmm1 pxor %xmm8,%xmm4 .byte 102,65,15,56,0,218 movdqa %xmm4,%xmm8 psrldq $8,%xmm8 pslldq $8,%xmm4 pxor %xmm8,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm3,%xmm5 movdqa %xmm0,%xmm9 movdqa %xmm0,%xmm8 psllq $5,%xmm0 pxor %xmm0,%xmm8 .byte 102,15,58,68,218,0 psllq $1,%xmm0 pxor %xmm8,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm8 pslldq $8,%xmm0 psrldq $8,%xmm8 pxor %xmm9,%xmm0 pshufd $78,%xmm5,%xmm4 pxor %xmm8,%xmm1 pxor %xmm5,%xmm4 movdqa %xmm0,%xmm9 psrlq $1,%xmm0 .byte 102,15,58,68,234,17 pxor %xmm9,%xmm1 pxor %xmm0,%xmm9 psrlq $5,%xmm0 pxor %xmm9,%xmm0 leaq 32(%rdx),%rdx psrlq $1,%xmm0 .byte 102,15,58,68,231,0 pxor %xmm1,%xmm0 subq $0x20,%rcx ja .Lmod_loop .Leven_tail: movdqa %xmm0,%xmm1 movdqa %xmm4,%xmm8 pshufd $78,%xmm0,%xmm4 pxor %xmm0,%xmm4 .byte 102,15,58,68,198,0 .byte 102,15,58,68,206,17 .byte 102,15,58,68,231,16 pxor %xmm3,%xmm0 pxor %xmm5,%xmm1 pxor %xmm0,%xmm8 pxor %xmm1,%xmm8 pxor %xmm8,%xmm4 movdqa %xmm4,%xmm8 psrldq $8,%xmm8 pslldq $8,%xmm4 pxor %xmm8,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 testq %rcx,%rcx jnz .Ldone .Lodd_tail: movdqu (%rdx),%xmm8 .byte 102,69,15,56,0,194 pxor %xmm8,%xmm0 movdqa %xmm0,%xmm1 pshufd $78,%xmm0,%xmm3 pxor %xmm0,%xmm3 .byte 102,15,58,68,194,0 .byte 102,15,58,68,202,17 .byte 102,15,58,68,223,0 pxor %xmm0,%xmm3 pxor %xmm1,%xmm3 movdqa %xmm3,%xmm4 psrldq $8,%xmm3 pslldq $8,%xmm4 pxor %xmm3,%xmm1 pxor %xmm4,%xmm0 movdqa %xmm0,%xmm4 movdqa %xmm0,%xmm3 psllq $5,%xmm0 pxor %xmm0,%xmm3 psllq $1,%xmm0 pxor %xmm3,%xmm0 psllq $57,%xmm0 movdqa %xmm0,%xmm3 pslldq $8,%xmm0 psrldq $8,%xmm3 pxor %xmm4,%xmm0 pxor %xmm3,%xmm1 movdqa %xmm0,%xmm4 psrlq $1,%xmm0 pxor %xmm4,%xmm1 pxor %xmm0,%xmm4 psrlq $5,%xmm0 pxor %xmm4,%xmm0 psrlq $1,%xmm0 pxor %xmm1,%xmm0 .Ldone: .byte 102,65,15,56,0,194 movdqu %xmm0,(%rdi) .byte 0xf3,0xc3 .size pcl_gcm_ghash_clmul,.-pcl_gcm_ghash_clmul .align 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .L0x1c2_polynomial: .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 .L7_mask: .long 7,0,7,0 .L7_mask_poly: .long 7,0,450,0