| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386 | 
							- /*
 
- * Math library
 
- *
 
- * Copyright (C) 2016 Intel Corporation. All rights reserved.
 
- *
 
- * Redistribution and use in source and binary forms, with or without
 
- * modification, are permitted provided that the following conditions
 
- * are met:
 
- *
 
- *   * Redistributions of source code must retain the above copyright
 
- *     notice, this list of conditions and the following disclaimer.
 
- *   * Redistributions in binary form must reproduce the above copyright
 
- *     notice, this list of conditions and the following disclaimer in
 
- *     the documentation and/or other materials provided with the
 
- *     distribution.
 
- *   * Neither the name of Intel Corporation nor the names of its
 
- *     contributors may be used to endorse or promote products derived
 
- *     from this software without specific prior written permission.
 
- *
 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
- *
 
- *
 
- * Author Name <jingwei.zhang@intel.com>
 
- *   History:
 
- *   03-14-2016 Initial version. numerics svn rev. 12864
 
- */
 
- 	.file "csqrtf.c"
 
- 	.text
 
- ..TXTST0:
 
- # -- Begin  csqrtf
 
- 	.text
 
-        .align    16,0x90
 
- 	.globl csqrtf
 
- csqrtf:
 
- # parameter 1: %xmm0
 
- ..B1.1:
 
- 	.cfi_startproc
 
- ..___tag_value_csqrtf.1:
 
- ..L2:
 
- ..B1.2:
 
-         movq      %rsp, %rax
 
-         subq      $120, %rsp
 
- 	.cfi_def_cfa_offset 128
 
-         movq      %rax, 24(%rsp)
 
-         movss     %xmm0, (%rsp)
 
-         pshufd    $1, %xmm0, %xmm1
 
-         movss     %xmm1, 8(%rsp)
 
-         movq      %rbx, 32(%rsp)
 
-         movd      (%rsp), %xmm0
 
-         movd      8(%rsp), %xmm1
 
-         movl      (%rsp), %eax
 
-         movl      8(%rsp), %ecx
 
-         unpcklps  %xmm1, %xmm0
 
-         movl      %ecx, %edx
 
-         lea       csqrtf_table(%rip), %r8
 
-         andl      $2139095040, %eax
 
-         andl      $2139095040, %ecx
 
-         subl      $8388608, %eax
 
-         subl      $8388608, %ecx
 
-         andl      $2139095040, %eax
 
-         andl      $2139095040, %ecx
 
-         subl      $2130706432, %eax
 
-         subl      $2130706432, %ecx
 
-         testl     %ecx, %eax
 
-         jns       .L_2TAG_PACKET_0.0.1
 
-         cvtps2pd  %xmm0, %xmm0
 
-         pxor      %xmm4, %xmm4
 
-         movl      $16, %eax
 
-         movapd    %xmm0, %xmm1
 
-         unpckhpd  %xmm1, %xmm1
 
-         movapd    %xmm0, %xmm7
 
-         mulsd     %xmm0, %xmm0
 
-         movapd    %xmm7, %xmm6
 
-         mulsd     %xmm1, %xmm1
 
-         pinsrw    $3, %eax, %xmm4
 
-         addsd     %xmm1, %xmm0
 
-         andpd     (%r8), %xmm7
 
-         sqrtsd    %xmm0, %xmm0
 
-         addsd     %xmm7, %xmm0
 
-         psubd     %xmm4, %xmm0
 
-         movsd     %xmm0, %xmm7
 
-         movdqa    %xmm0, %xmm1
 
-         pand      16(%r8), %xmm0
 
-         movdqa    %xmm1, %xmm2
 
-         paddd     32(%r8), %xmm0
 
-         psrld     $1, %xmm1
 
-         psrlq     $29, %xmm0
 
-         pand      48(%r8), %xmm1
 
-         rsqrtss   %xmm0, %xmm0
 
-         psubd     64(%r8), %xmm1
 
-         psllq     $29, %xmm0
 
-         movapd    80(%r8), %xmm3
 
-         psubd     %xmm1, %xmm0
 
-         movapd    96(%r8), %xmm1
 
-         mulsd     %xmm0, %xmm2
 
-         movapd    48(%r8), %xmm4
 
-         mulsd     %xmm0, %xmm2
 
-         subsd     %xmm4, %xmm2
 
-         mulsd     %xmm2, %xmm3
 
-         addsd     %xmm1, %xmm3
 
-         mulsd     %xmm2, %xmm3
 
-         mulsd     %xmm0, %xmm3
 
-         addsd     %xmm3, %xmm0
 
-         mulpd     112(%r8), %xmm7
 
-         unpcklpd  %xmm0, %xmm0
 
- .L_2TAG_PACKET_1.0.1:
 
-         pextrw    $3, %xmm6, %eax
 
-         mulpd     %xmm7, %xmm0
 
-         andl      $-2147483648, %edx
 
-         cvtpd2ps  %xmm0, %xmm1
 
-         testl     $32768, %eax
 
-         pshufd    $17, %xmm1, %xmm2
 
-         je        .L_2TAG_PACKET_2.0.1
 
-         movd      %xmm1, %ecx
 
-         movd      %xmm2, %eax
 
-         orl       %ecx, %edx
 
-         testl     $2139095040, %eax
 
-         jmp       .L_2TAG_PACKET_3.0.1
 
- .L_2TAG_PACKET_2.0.1:
 
-         movd      %xmm2, %ecx
 
-         movd      %xmm1, %eax
 
-         orl       %ecx, %edx
 
-         testl     $2139095040, %ecx
 
- .L_2TAG_PACKET_3.0.1:
 
-         je        .L_2TAG_PACKET_4.0.1
 
-         shlq      $32, %rdx
 
-         orq       %rdx, %rax
 
-         shrq      $32, %rdx
 
-         movd      %rax, %xmm0
 
-         movq      32(%rsp), %rbx
 
-         movq      24(%rsp), %rsp
 
-         ret       
 
- .L_2TAG_PACKET_4.0.1:
 
-         testl     $2147483647, 8(%rsp)
 
-         jne       .L_2TAG_PACKET_5.0.1
 
-         shlq      $32, %rdx
 
-         orq       %rdx, %rax
 
-         shrq      $32, %rdx
 
-         movd      %rax, %xmm0
 
-         movq      32(%rsp), %rbx
 
-         movq      24(%rsp), %rsp
 
-         ret       
 
- .L_2TAG_PACKET_5.0.1:
 
-         pshufd    $14, %xmm0, %xmm0
 
-         mulsd     %xmm0, %xmm0
 
-         cvtsd2ss  %xmm0, %xmm0
 
-         shlq      $32, %rdx
 
-         orq       %rdx, %rax
 
-         shrq      $32, %rdx
 
-         movd      %rax, %xmm0
 
-         movq      32(%rsp), %rbx
 
-         movq      24(%rsp), %rsp
 
-         ret       
 
- .L_2TAG_PACKET_0.0.1:
 
-         movdqa    %xmm0, %xmm2
 
-         movdqa    128(%r8), %xmm4
 
-         pshufd    $80, %xmm0, %xmm0
 
-         pxor      %xmm5, %xmm5
 
-         movdqa    %xmm2, %xmm3
 
-         pand      144(%r8), %xmm0
 
-         pshufd    $115, %xmm2, %xmm2
 
-         pcmpeqd   %xmm4, %xmm0
 
-         movdqa    %xmm2, %xmm6
 
-         movmskps  %xmm0, %eax
 
-         pand      %xmm2, %xmm4
 
-         testl     %eax, %eax
 
-         jne       .L_2TAG_PACKET_6.0.1
 
-         pxor      %xmm0, %xmm0
 
- .L_2TAG_PACKET_7.0.1:
 
-         pand      (%r8), %xmm2
 
-         pcmpeqd   %xmm5, %xmm4
 
-         movdqa    %xmm4, %xmm3
 
-         pand      160(%r8), %xmm4
 
-         psrlq     $3, %xmm2
 
-         pand      176(%r8), %xmm3
 
-         por       %xmm4, %xmm2
 
-         paddd     192(%r8), %xmm3
 
-         subpd     %xmm4, %xmm2
 
-         paddd     %xmm3, %xmm2
 
-         pandn     %xmm2, %xmm0
 
-         pxor      %xmm4, %xmm4
 
-         movl      $16, %eax
 
-         movapd    %xmm0, %xmm1
 
-         unpckhpd  %xmm1, %xmm1
 
-         movapd    %xmm0, %xmm7
 
-         mulsd     %xmm0, %xmm0
 
-         mulsd     %xmm1, %xmm1
 
-         pinsrw    $3, %eax, %xmm4
 
-         addsd     %xmm1, %xmm0
 
-         sqrtsd    %xmm0, %xmm0
 
-         addsd     %xmm7, %xmm0
 
-         psubd     %xmm4, %xmm0
 
-         movsd     %xmm0, %xmm7
 
-         sqrtsd    %xmm0, %xmm1
 
-         movapd    48(%r8), %xmm0
 
-         divsd     %xmm1, %xmm0
 
-         mulpd     112(%r8), %xmm7
 
-         unpcklpd  %xmm0, %xmm0
 
-         jmp       .L_2TAG_PACKET_1.0.1
 
- .L_2TAG_PACKET_6.0.1:
 
-         cmpl      $5, %eax
 
-         je        .L_2TAG_PACKET_8.0.1
 
-         testl     $10, %eax
 
-         jne       .L_2TAG_PACKET_9.0.1
 
-         pshufd    $160, %xmm0, %xmm0
 
-         jmp       .L_2TAG_PACKET_7.0.1
 
- .L_2TAG_PACKET_8.0.1:
 
-         xorl      %eax, %eax
 
-         andl      $-2147483648, %edx
 
-         shlq      $32, %rdx
 
-         orq       %rdx, %rax
 
-         shrq      $32, %rdx
 
-         movd      %rax, %xmm0
 
-         movq      32(%rsp), %rbx
 
-         movq      24(%rsp), %rsp
 
-         ret       
 
- .L_2TAG_PACKET_9.0.1:
 
-         movl      %edx, %ecx
 
-         andl      $2147483647, %ecx
 
-         movd      %xmm3, %eax
 
-         cmpl      $2139095040, %ecx
 
-         je        .L_2TAG_PACKET_10.0.1
 
-         ja        .L_2TAG_PACKET_11.0.1
 
-         andl      $-2147483648, %edx
 
-         cmpl      $-8388608, %eax
 
-         je        .L_2TAG_PACKET_12.0.1
 
-         cmpl      $2139095040, %eax
 
-         jne       .L_2TAG_PACKET_13.0.1
 
-         shlq      $32, %rdx
 
-         orq       %rdx, %rax
 
-         shrq      $32, %rdx
 
-         movd      %rax, %xmm0
 
-         movq      32(%rsp), %rbx
 
-         movq      24(%rsp), %rsp
 
-         ret       
 
- .L_2TAG_PACKET_12.0.1:
 
-         xorl      %eax, %eax
 
-         orl       $2139095040, %edx
 
-         shlq      $32, %rdx
 
-         orq       %rdx, %rax
 
-         shrq      $32, %rdx
 
-         movd      %rax, %xmm0
 
-         movq      32(%rsp), %rbx
 
-         movq      24(%rsp), %rsp
 
-         ret       
 
- .L_2TAG_PACKET_11.0.1:
 
-         cmpl      $2139095040, %eax
 
-         je        .L_2TAG_PACKET_14.0.1
 
-         cmpl      $-8388608, %eax
 
-         je        .L_2TAG_PACKET_15.0.1
 
- .L_2TAG_PACKET_13.0.1:
 
-         mulss     %xmm3, %xmm1
 
-         movss     (%rsp), %xmm0
 
-         mulss     8(%rsp), %xmm0
 
-         movd      %xmm1, %eax
 
-         movl      %eax, %edx
 
-         shlq      $32, %rdx
 
-         orq       %rdx, %rax
 
-         shrq      $32, %rdx
 
-         movd      %rax, %xmm0
 
-         movq      32(%rsp), %rbx
 
-         movq      24(%rsp), %rsp
 
-         ret       
 
- .L_2TAG_PACKET_14.0.1:
 
-         movss     8(%rsp), %xmm0
 
-         mulss     %xmm0, %xmm0
 
-         orl       $4194304, %edx
 
-         shlq      $32, %rdx
 
-         orq       %rdx, %rax
 
-         shrq      $32, %rdx
 
-         movd      %rax, %xmm0
 
-         movq      32(%rsp), %rbx
 
-         movq      24(%rsp), %rsp
 
-         ret       
 
- .L_2TAG_PACKET_15.0.1:
 
-         movss     8(%rsp), %xmm0
 
-         mulss     %xmm0, %xmm0
 
-         movl      %edx, %eax
 
-         movl      $2139095040, %edx
 
-         orl       $4194304, %eax
 
-         shlq      $32, %rdx
 
-         orq       %rdx, %rax
 
-         shrq      $32, %rdx
 
-         movd      %rax, %xmm0
 
-         movq      32(%rsp), %rbx
 
-         movq      24(%rsp), %rsp
 
-         ret       
 
- .L_2TAG_PACKET_10.0.1:
 
-         movss     (%rsp), %xmm0
 
-         mulss     208(%r8), %xmm0
 
-         movl      $2139095040, %eax
 
-         shlq      $32, %rdx
 
-         orq       %rdx, %rax
 
-         shrq      $32, %rdx
 
-         movd      %rax, %xmm0
 
-         movq      32(%rsp), %rbx
 
-         movq      24(%rsp), %rsp
 
-         ret       
 
- 	.cfi_def_cfa_offset 8
 
- ..B1.3:
 
-         .align    16,0x90
 
- 	.cfi_endproc
 
- 	.type	csqrtf,@function
 
- 	.size	csqrtf,.-csqrtf
 
- 	.data
 
- # -- End  csqrtf
 
- 	.section .rodata, "a"
 
- 	.align 16
 
- 	.align 16
 
- csqrtf_table:
 
- 	.long	4294967295
 
- 	.long	2147483647
 
- 	.long	4294967295
 
- 	.long	2147483647
 
- 	.long	4294967295
 
- 	.long	2097151
 
- 	.long	4294967295
 
- 	.long	2097151
 
- 	.long	0
 
- 	.long	132120576
 
- 	.long	0
 
- 	.long	132120576
 
- 	.long	0
 
- 	.long	1072693248
 
- 	.long	0
 
- 	.long	1072693248
 
- 	.long	0
 
- 	.long	1475346432
 
- 	.long	0
 
- 	.long	1475346432
 
- 	.long	2148429837
 
- 	.long	1071120401
 
- 	.long	2148429837
 
- 	.long	1071120401
 
- 	.long	195330
 
- 	.long	3219128325
 
- 	.long	195330
 
- 	.long	3219128325
 
- 	.long	0
 
- 	.long	1072693248
 
- 	.long	0
 
- 	.long	1071644672
 
- 	.long	0
 
- 	.long	2139095040
 
- 	.long	0
 
- 	.long	2139095040
 
- 	.long	2147483647
 
- 	.long	2139095040
 
- 	.long	2147483647
 
- 	.long	2139095040
 
- 	.long	0
 
- 	.long	133169152
 
- 	.long	0
 
- 	.long	133169152
 
- 	.long	0
 
- 	.long	4162846720
 
- 	.long	0
 
- 	.long	4162846720
 
- 	.long	0
 
- 	.long	939524096
 
- 	.long	0
 
- 	.long	939524096
 
- 	.long	1065353216
 
- 	.long	1065353216
 
- 	.long	1065353216
 
- 	.long	1065353216
 
- 	.type	csqrtf_table,@object
 
- 	.size	csqrtf_table,224
 
- 	.data
 
- 	.section .note.GNU-stack, ""
 
- // -- Begin DWARF2 SEGMENT .eh_frame
 
- 	.section .eh_frame,"a",@progbits
 
- .eh_frame_seg:
 
- 	.align 1
 
- # End
 
 
  |