/* * Math library * * Copyright (C) 2016 Intel Corporation. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * * Author Name * History: * 03-14-2016 Initial version. numerics svn rev. 12864 */ .file "dpml_ux_sqrt.c" .text ..TXTST0: # -- Begin __dpml_ux_sqrt_evaluation__ .text .align 16,0x90 .globl __dpml_ux_sqrt_evaluation__ __dpml_ux_sqrt_evaluation__: # parameter 1: %rdi # parameter 2: %rsi # parameter 3: %rdx ..B1.1: .cfi_startproc ..___tag_value___dpml_ux_sqrt_evaluation__.1: ..L2: pushq %r12 .cfi_def_cfa_offset 16 .cfi_offset 12, -16 pushq %r13 .cfi_def_cfa_offset 24 .cfi_offset 13, -24 pushq %r14 .cfi_def_cfa_offset 32 .cfi_offset 14, -32 pushq %r15 .cfi_def_cfa_offset 40 .cfi_offset 15, -40 pushq %rbx .cfi_def_cfa_offset 48 .cfi_offset 3, -48 pushq %rbp .cfi_def_cfa_offset 56 .cfi_offset 6, -56 subq $104, %rsp .cfi_def_cfa_offset 160 movq %rdi, %rbx movq %fs:40, %rax lea (%rsp), %rdi xorq %rsp, %rax movq %rdx, %r12 movq %rax, 88(%rdi) movq %rsi, %rbp ..___tag_value___dpml_ux_sqrt_evaluation__.16: call fegetenv@PLT ..___tag_value___dpml_ux_sqrt_evaluation__.17: ..B1.2: movq 8(%rbx), %r8 movq %r8, %r10 shrq $11, %r10 movq $0x3fd0000000000000, %r9 movl 4(%rbx), %eax addq %r9, %r10 movl %eax, %r9d movq %r8, %r13 andl $1, %r9d movq %r8, %r15 addl %r9d, %eax pxor %xmm2, %xmm2 shrq $56, %r8 pxor %xmm4, %xmm4 movq %r10, 80(%rsp) pxor %xmm7, %xmm7 movq __sqrt_t_table@GOTPCREL(%rip), %r10 lea 40(%r9), %r14d shll $7, %r9d movl %r14d, %ecx xorq %r9, %r8 negl %ecx shlq $4, %r8 pxor %xmm6, %xmm6 shlq %cl, %r13 movl %r14d, %ecx movsd 80(%rsp), %xmm1 lea 56(%rsp), %rdx movq 16(%rbx), %r11 movaps %xmm1, %xmm0 cvtss2sd 4(%r10,%r8), %xmm2 cvtss2sd (%r10,%r8), %xmm4 mulsd %xmm1, %xmm0 mulsd %xmm1, %xmm2 mulsd %xmm0, %xmm4 addsd 8(%r10,%r8), %xmm2 shrq %cl, %r11 movl %r14d, %ecx shrq %cl, %r15 orq %r11, %r13 shrq $11, %r13 lea 128+__sqrt_x_table(%rip), %r11 cvtsi2sdq %r15, %xmm7 cvtsi2sdq %r13, %xmm6 addsd %xmm2, %xmm4 mulsd (%r11), %xmm7 lea 136+__sqrt_x_table(%rip), %r13 lea 96+__sqrt_x_table(%rip), %r8 movaps %xmm7, %xmm8 lea 152+__sqrt_x_table(%rip), %r15 lea 104+__sqrt_x_table(%rip), %r14 lea 32(%rsp), %rdi movsd .L_2il0floatpacket.21(%rip), %xmm12 movq %rbx, %rsi mulsd (%r13), %xmm6 mulsd (%r8), %xmm4 addsd %xmm6, %xmm8 movaps %xmm8, %xmm3 lea 144+__sqrt_x_table(%rip), %r8 mulsd %xmm4, %xmm3 cvtsd2ss %xmm4, %xmm4 cvtsd2ss %xmm3, %xmm3 cvtss2sd %xmm4, %xmm4 cvtss2sd %xmm3, %xmm3 mulsd %xmm4, %xmm7 mulsd %xmm4, %xmm6 subsd %xmm3, %xmm7 movsd (%r15), %xmm10 movaps %xmm4, %xmm9 mulsd %xmm8, %xmm10 movaps %xmm3, %xmm5 addsd %xmm6, %xmm7 mulsd %xmm4, %xmm9 mulsd %xmm4, %xmm5 mulsd %xmm4, %xmm7 mulsd %xmm9, %xmm10 movsd (%r14), %xmm11 movsd (%r8), %xmm15 lea 112+__sqrt_x_table(%rip), %r8 sarl $1, %eax subsd %xmm5, %xmm11 subsd %xmm10, %xmm15 subsd %xmm7, %xmm11 mulsd %xmm4, %xmm15 movsd (%r8), %xmm13 lea 120+__sqrt_x_table(%rip), %r8 mulsd %xmm4, %xmm13 negl %eax mulsd %xmm11, %xmm15 comisd %xmm12, %xmm13 mulsd (%r8), %xmm15 movaps %xmm13, %xmm14 cvttsd2si %xmm15, %r9 subsd %xmm12, %xmm14 jae ..L18 movaps %xmm13, %xmm14 ..L18: cvttsd2si %xmm14, %r10 movq %r9, %r8 incl %eax shlq $39, %r10 sarq $12, %r8 sarq $11, %r9 addq %r8, %r10 andq $1, %r9 movq $0x4000000000000000, %r8 addq %r9, %r10 andq %r10, %r8 testq %r10, %r10 movl $0, -24(%rdx) movl %eax, -20(%rdx) movq $0, -8(%rdx) lea -1(,%r8,2), %r8 cmovns %r8, %r10 movq %r10, -16(%rdx) ..___tag_value___dpml_ux_sqrt_evaluation__.19: call __dpml_multiply__@PLT ..___tag_value___dpml_ux_sqrt_evaluation__.20: ..B1.3: movq %r12, %rdx lea 32(%rsp), %rdi lea 56(%rsp), %rsi ..___tag_value___dpml_ux_sqrt_evaluation__.21: call __dpml_multiply__@PLT ..___tag_value___dpml_ux_sqrt_evaluation__.22: ..B1.4: lea 160+__sqrt_x_table(%rip), %rdi movq %r12, %rsi movl $9, %edx movq %r12, %rcx ..___tag_value___dpml_ux_sqrt_evaluation__.23: call __dpml_addsub__@PLT ..___tag_value___dpml_ux_sqrt_evaluation__.24: ..B1.5: testq $1, %rbp lea 32(%rsp), %rsi movq %r12, %rdi lea 56(%rsp), %rax cmovne %rax, %rsi movq %r12, %rdx ..___tag_value___dpml_ux_sqrt_evaluation__.25: call __dpml_multiply__@PLT ..___tag_value___dpml_ux_sqrt_evaluation__.26: ..B1.6: decl 4(%r12) lea (%rsp), %rdi ..___tag_value___dpml_ux_sqrt_evaluation__.27: call fesetenv@PLT ..___tag_value___dpml_ux_sqrt_evaluation__.28: ..B1.7: xorl %r15d, %r15d testq $2, %rbp jne ..B1.11 ..B1.8: movq 88(%rsp), %rdx xorq %rsp, %rdx cmpq %fs:40, %rdx jne ..B1.10 ..B1.9: movq %r15, %rax addq $104, %rsp .cfi_def_cfa_offset 56 .cfi_restore 6 popq %rbp .cfi_def_cfa_offset 48 .cfi_restore 3 popq %rbx .cfi_def_cfa_offset 40 .cfi_restore 15 popq %r15 .cfi_def_cfa_offset 32 .cfi_restore 14 popq %r14 .cfi_def_cfa_offset 24 .cfi_restore 13 popq %r13 .cfi_def_cfa_offset 16 .cfi_restore 12 popq %r12 .cfi_def_cfa_offset 8 ret .cfi_def_cfa_offset 160 .cfi_offset 3, -48 .cfi_offset 6, -56 .cfi_offset 12, -16 .cfi_offset 13, -24 .cfi_offset 14, -32 .cfi_offset 15, -40 ..B1.10: call __stack_chk_fail@PLT ..B1.11: movq %r12, %rdi xorl %esi, %esi ..___tag_value___dpml_ux_sqrt_evaluation__.49: call __dpml_ffs_and_shift__@PLT ..___tag_value___dpml_ux_sqrt_evaluation__.50: ..B1.12: movq 16(%r12), %r14 lea 8(%r14), %r8 testq $16368, %r8 jne ..B1.8 ..B1.13: andq $-32768, %r14 movq %r12, %rdi movq %r12, %rsi lea 56(%rsp), %rdx lea 32(%rsp), %rcx lea 16384(%r14), %r13 movq %r13, 16(%r12) ..___tag_value___dpml_ux_sqrt_evaluation__.51: call __dpml_extended_multiply__@PLT ..___tag_value___dpml_ux_sqrt_evaluation__.52: ..B1.14: movq %rbx, %rdi lea 56(%rsp), %rsi movl $1, %edx movq %rsi, %rcx ..___tag_value___dpml_ux_sqrt_evaluation__.53: call __dpml_addsub__@PLT ..___tag_value___dpml_ux_sqrt_evaluation__.54: ..B1.15: movl $1, %edx lea 56(%rsp), %rdi movq %rdi, %rcx lea 32(%rsp), %rsi ..___tag_value___dpml_ux_sqrt_evaluation__.55: call __dpml_addsub__@PLT ..___tag_value___dpml_ux_sqrt_evaluation__.56: ..B1.16: movl 56(%rsp), %r8d testl %r8d, %r8d cmove %r13, %r14 testq $4, %rbp movq %r14, 16(%r12) je ..B1.8 ..B1.17: movl 4(%r12), %ebx xorl %edx, %edx addl $-113, %ebx lea 56(%rsp), %rdi subl %ebx, 4(%rdi) movl $1, %ebx testl %r8d, %r8d movq %r12, %rsi cmove %ebx, %edx movq %rdi, %rcx orl $8, %edx ..___tag_value___dpml_ux_sqrt_evaluation__.57: call __dpml_addsub__@PLT ..___tag_value___dpml_ux_sqrt_evaluation__.58: ..B1.18: xorl %r15d, %r15d cmpq $8192, 72(%rsp) cmove %ebx, %r15d jmp ..B1.8 .align 16,0x90 .cfi_endproc .type __dpml_ux_sqrt_evaluation__,@function .size __dpml_ux_sqrt_evaluation__,.-__dpml_ux_sqrt_evaluation__ .data # -- End __dpml_ux_sqrt_evaluation__ .text # -- Begin __sqrtq .text .align 16,0x90 .globl __sqrtq __sqrtq: # parameter 1: %xmm0 ..B2.1: .cfi_startproc ..___tag_value___sqrtq.60: ..L61: subq $136, %rsp .cfi_def_cfa_offset 144 xorl %esi, %esi lea __sqrt_x_table(%rip), %rcx lea 48(%rsp), %rdi movq %fs:40, %rax lea 88(%rsp), %rdx xorq %rsp, %rax lea 112(%rsp), %r8 movaps %xmm0, -40(%rdx) lea (%rsp), %r9 movq %rax, 40(%rdx) movq $1, -80(%rdx) movq $0, (%r9) ..___tag_value___sqrtq.63: call __dpml_unpack_x_or_y__@PLT ..___tag_value___sqrtq.64: ..B2.2: testq %rax, %rax jl ..B2.5 ..B2.3: movl $7, %esi lea 88(%rsp), %rdi lea 64(%rsp), %rdx ..___tag_value___sqrtq.65: call __dpml_ux_sqrt_evaluation__@PLT ..___tag_value___sqrtq.66: ..B2.4: movl $1, %edx xorl %ecx, %ecx testq %rax, %rax lea 64(%rsp), %rdi cmove %edx, %ecx xorl %edx, %edx lea 112(%rsp), %rsi movq %rcx, -104(%rsi) xorl %ecx, %ecx lea (%rsp), %r8 ..___tag_value___sqrtq.67: call __dpml_pack__@PLT ..___tag_value___sqrtq.68: ..B2.5: movq 128(%rsp), %rax xorq %rsp, %rax movaps 112(%rsp), %xmm0 cmpq %fs:40, %rax jne ..B2.7 ..B2.6: addq $136, %rsp .cfi_def_cfa_offset 8 ret .cfi_def_cfa_offset 144 ..B2.7: call __stack_chk_fail@PLT .align 16,0x90 .cfi_endproc .type __sqrtq,@function .size __sqrtq,.-__sqrtq .data # -- End __sqrtq .text # -- Begin __rsqrtq .text .align 16,0x90 .globl __rsqrtq __rsqrtq: # parameter 1: %xmm0 ..B3.1: .cfi_startproc ..___tag_value___rsqrtq.72: ..L73: subq $136, %rsp .cfi_def_cfa_offset 144 xorl %esi, %esi lea 8+__sqrt_x_table(%rip), %rcx lea 48(%rsp), %rdi movq %fs:40, %rax lea 88(%rsp), %rdx xorq %rsp, %rax lea 112(%rsp), %r8 movaps %xmm0, -40(%rdx) lea (%rsp), %r9 movq %rax, 40(%rdx) movq $1, -80(%rdx) movq $0, (%r9) ..___tag_value___rsqrtq.75: call __dpml_unpack_x_or_y__@PLT ..___tag_value___rsqrtq.76: ..B3.2: testq %rax, %rax jl ..B3.5 ..B3.3: xorl %esi, %esi lea 88(%rsp), %rdi lea 64(%rsp), %rdx ..___tag_value___rsqrtq.77: call __dpml_ux_sqrt_evaluation__@PLT ..___tag_value___rsqrtq.78: ..B3.4: movl $1, %edx xorl %ecx, %ecx testq %rax, %rax lea 64(%rsp), %rdi cmove %edx, %ecx xorl %edx, %edx lea 112(%rsp), %rsi movq %rcx, -104(%rsi) xorl %ecx, %ecx lea (%rsp), %r8 ..___tag_value___rsqrtq.79: call __dpml_pack__@PLT ..___tag_value___rsqrtq.80: ..B3.5: movq 128(%rsp), %rax xorq %rsp, %rax movaps 112(%rsp), %xmm0 cmpq %fs:40, %rax jne ..B3.7 ..B3.6: addq $136, %rsp .cfi_def_cfa_offset 8 ret .cfi_def_cfa_offset 144 ..B3.7: call __stack_chk_fail@PLT .align 16,0x90 .cfi_endproc .type __rsqrtq,@function .size __rsqrtq,.-__rsqrtq .data # -- End __rsqrtq .text # -- Begin __dpml_ux_hypot__ .text .align 16,0x90 .globl __dpml_ux_hypot__ __dpml_ux_hypot__: # parameter 1: %rdi # parameter 2: %rsi # parameter 3: %rdx ..B4.1: .cfi_startproc ..___tag_value___dpml_ux_hypot__.84: ..L85: pushq %r12 .cfi_def_cfa_offset 16 .cfi_offset 12, -16 pushq %rbp .cfi_def_cfa_offset 24 .cfi_offset 6, -24 subq $88, %rsp .cfi_def_cfa_offset 112 movq %rdx, %r12 movq %rsi, %rbp movq %rdi, %rsi movq %fs:40, %rax lea (%rsp), %rdx xorq %rsp, %rax movq %rax, 72(%rdx) ..___tag_value___dpml_ux_hypot__.91: call __dpml_multiply__@PLT ..___tag_value___dpml_ux_hypot__.92: ..B4.2: movq %rbp, %rdi movq %rbp, %rsi lea 24(%rsp), %rdx ..___tag_value___dpml_ux_hypot__.93: call __dpml_multiply__@PLT ..___tag_value___dpml_ux_hypot__.94: ..B4.3: xorl %edx, %edx lea (%rsp), %rdi lea 24(%rsp), %rsi lea 48(%rsp), %rcx ..___tag_value___dpml_ux_hypot__.95: call __dpml_addsub__@PLT ..___tag_value___dpml_ux_hypot__.96: ..B4.4: xorl %esi, %esi lea 48(%rsp), %rdi ..___tag_value___dpml_ux_hypot__.97: call __dpml_ffs_and_shift__@PLT ..___tag_value___dpml_ux_hypot__.98: ..B4.5: movl $7, %esi lea 48(%rsp), %rdi movq %r12, %rdx ..___tag_value___dpml_ux_hypot__.99: call __dpml_ux_sqrt_evaluation__@PLT ..___tag_value___dpml_ux_hypot__.100: ..B4.6: testq %rax, %rax je ..B4.18 ..B4.7: movl 4(%rsp), %ecx lea (%rsp), %rbp subl 28(%rbp), %ecx movslq %ecx, %rcx testq %rcx, %rcx jns ..B4.9 ..B4.8: negq %rcx lea 24(%rsp), %rbp ..B4.9: cmpq $128, %rcx jle ..B4.11 ..B4.10: xorl %eax, %eax jmp ..B4.18 ..B4.11: movl $1, %edx ..B4.12: movq 8(%rbp,%rdx,8), %rax testq %rax, %rax jne ..B4.10 ..B4.13: cmpq $64, %rcx jl ..B4.16 ..B4.14: addq $-64, %rcx decq %rdx jns ..B4.12 ..B4.16: movq %rax, %rdx movl $1, %ebp shrq %cl, %rdx xorl %esi, %esi shlq %cl, %rdx cmpq %rdx, %rax cmove %ebp, %esi movl %esi, %eax ..B4.18: movq 72(%rsp), %rdx xorq %rsp, %rdx cmpq %fs:40, %rdx jne ..B4.20 ..B4.19: addq $88, %rsp .cfi_def_cfa_offset 24 .cfi_restore 6 popq %rbp .cfi_def_cfa_offset 16 .cfi_restore 12 popq %r12 .cfi_def_cfa_offset 8 ret .cfi_def_cfa_offset 112 .cfi_offset 6, -24 .cfi_offset 12, -16 ..B4.20: call __stack_chk_fail@PLT .align 16,0x90 .cfi_endproc .type __dpml_ux_hypot__,@function .size __dpml_ux_hypot__,.-__dpml_ux_hypot__ .data # -- End __dpml_ux_hypot__ .section .rodata, "a" .align 16 .align 16 __sqrt_x_table: .long 1088750600 .long 605086734 .long 1088766984 .long 519758862 .long 112 .long 0 .long 113 .long 0 .long 114 .long 0 .long 0 .long 0 .long 0 .long 268435456 .long 858997845 .long 34 .long 1636176969 .long 409044504 .long 1099306057 .long 404751376 .long 545392672 .long 404783624 .long 142938632 .long 302522498 .long 1719614413 .long 1073127582 .long 0 .long 1072693248 .long 0 .long 1097859072 .long 0 .long 1151336448 .long 0 .long 1047527424 .long 0 .long 991952896 .long 0 .long 1072431104 .long 0 .long 1071120384 .long 0 .long 2 .long 0 .long 3221225472 .long 0 .long 0 .long 0 .long 0 .type __sqrt_x_table,@object .size __sqrt_x_table,192 .align 8 .L_2il0floatpacket.21: .long 0x00000000,0x43e00000 .type .L_2il0floatpacket.21,@object .size .L_2il0floatpacket.21,8 .align 4 .L_2il0floatpacket.20: .long 0x5f000000 .type .L_2il0floatpacket.20,@object .size .L_2il0floatpacket.20,4 .data .section .note.GNU-stack, "" // -- Begin DWARF2 SEGMENT .eh_frame .section .eh_frame,"a",@progbits .eh_frame_seg: .align 1 # End