123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425 |
- /*
- * Math library
- *
- * Copyright (C) 2016 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * Author Name <jingwei.zhang@intel.com>
- * History:
- * 03-14-2016 Initial version. numerics svn rev. 12864
- */
- .file "fmaf_wmt.c"
- .text
- ..TXTST0:
- # -- Begin static_func
- .text
- .align 16,0x90
- static_func:
- ..B1.1:
- ..L1:
- call ..L2
- ..L2:
- popl %eax
- lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
- lea static_const_table@GOTOFF(%eax), %eax
- ret
- .align 16,0x90
- .type static_func,@function
- .size static_func,.-static_func
- .data
- # -- End static_func
- .text
- # -- Begin fmaf
- .text
- .align 16,0x90
- .globl fmaf
- fmaf:
- # parameter 1: 8 + %ebp
- # parameter 2: 12 + %ebp
- # parameter 3: 16 + %ebp
- ..B2.1:
- ..L3:
- ..B2.2:
- pushl %ebp
- movl %esp, %ebp
- subl $136, %esp
- movl %ebx, 80(%esp)
- call static_func
- movl %eax, %ebx
- movss 144(%esp), %xmm0
- movss 148(%esp), %xmm1
- movss 152(%esp), %xmm2
- movss %xmm0, 8(%esp)
- movss %xmm1, 16(%esp)
- movss %xmm2, 24(%esp)
- movl 8(%esp), %eax
- ucomiss %xmm1, %xmm0
- movl 16(%esp), %ecx
- jp .L_2TAG_PACKET_0.0.2
- movl 24(%esp), %edx
- ucomiss %xmm2, %xmm2
- jp .L_2TAG_PACKET_1.0.2
- andl $2147483647, %eax
- je .L_2TAG_PACKET_2.0.2
- cmpl $1065353216, %eax
- je .L_2TAG_PACKET_3.0.2
- cmpl $2139095040, %eax
- je .L_2TAG_PACKET_4.0.2
- andl $2147483647, %ecx
- je .L_2TAG_PACKET_2.0.2
- cmpl $1065353216, %ecx
- je .L_2TAG_PACKET_3.0.2
- cmpl $2139095040, %ecx
- je .L_2TAG_PACKET_4.0.2
- andl $2147483647, %edx
- je .L_2TAG_PACKET_5.0.2
- cmpl $2139095040, %edx
- je .L_2TAG_PACKET_6.0.2
- cmpl $8388608, %eax
- jl .L_2TAG_PACKET_7.0.2
- cvtps2pd %xmm0, %xmm3
- .L_2TAG_PACKET_8.0.2:
- cmpl $8388608, %ecx
- jl .L_2TAG_PACKET_9.0.2
- cvtps2pd %xmm1, %xmm4
- .L_2TAG_PACKET_10.0.2:
- cmpl $8388608, %edx
- jl .L_2TAG_PACKET_11.0.2
- cvtps2pd %xmm2, %xmm0
- .L_2TAG_PACKET_12.0.2:
- mulsd %xmm4, %xmm3
- pextrw $3, %xmm3, %edx
- andl $32752, %edx
- movl $96, %eax
- pextrw $3, %xmm0, %ecx
- andl $32752, %ecx
- addl %edx, %eax
- subl %ecx, %eax
- cmpl $560, %eax
- jae .L_2TAG_PACKET_13.0.2
- addsd %xmm3, %xmm0
- jmp .L_2TAG_PACKET_14.0.2
- .L_2TAG_PACKET_0.0.2:
- .L_2TAG_PACKET_2.0.2:
- .L_2TAG_PACKET_3.0.2:
- .L_2TAG_PACKET_4.0.2:
- .L_2TAG_PACKET_5.0.2:
- flds 8(%esp)
- fmuls 16(%esp)
- fadds 24(%esp)
- jmp .L_2TAG_PACKET_15.0.2
- .L_2TAG_PACKET_1.0.2:
- flds 8(%esp)
- fadds 24(%esp)
- jmp .L_2TAG_PACKET_15.0.2
- .L_2TAG_PACKET_6.0.2:
- flds 24(%esp)
- jmp .L_2TAG_PACKET_15.0.2
- .L_2TAG_PACKET_7.0.2:
- movaps (%ebx), %xmm3
- pand %xmm0, %xmm3
- movaps 48(%ebx), %xmm5
- orpd 16(%ebx), %xmm3
- pand %xmm0, %xmm5
- subsd 16(%ebx), %xmm3
- psllq $32, %xmm5
- mulsd 32(%ebx), %xmm3
- orpd %xmm5, %xmm3
- jmp .L_2TAG_PACKET_8.0.2
- .L_2TAG_PACKET_9.0.2:
- movaps (%ebx), %xmm4
- pand %xmm1, %xmm4
- movaps 48(%ebx), %xmm5
- orpd 16(%ebx), %xmm4
- pand %xmm1, %xmm5
- subsd 16(%ebx), %xmm4
- psllq $32, %xmm5
- mulsd 32(%ebx), %xmm4
- orpd %xmm5, %xmm4
- jmp .L_2TAG_PACKET_10.0.2
- .L_2TAG_PACKET_11.0.2:
- movaps (%ebx), %xmm0
- pand %xmm2, %xmm0
- movaps 48(%ebx), %xmm5
- orpd 16(%ebx), %xmm0
- pand %xmm2, %xmm5
- subsd 16(%ebx), %xmm0
- psllq $32, %xmm5
- mulsd 32(%ebx), %xmm0
- orpd %xmm5, %xmm0
- jmp .L_2TAG_PACKET_12.0.2
- .L_2TAG_PACKET_13.0.2:
- pextrw $1, %xmm2, %ecx
- pextrw $3, %xmm3, %edx
- sarl $4, %eax
- xorl %edx, %ecx
- testl $32768, %ecx
- jne .L_2TAG_PACKET_16.0.2
- cmpl $53, %eax
- jge .L_2TAG_PACKET_17.0.2
- cmpl $-19, %eax
- jle .L_2TAG_PACKET_18.0.2
- cmpl $6, %eax
- jge .L_2TAG_PACKET_19.0.2
- movl $6, %ecx
- subl %eax, %ecx
- addl $58, %eax
- movsd 64(%ebx), %xmm1
- pand 64(%ebx), %xmm3
- pxor %xmm5, %xmm5
- por 96(%ebx), %xmm3
- pxor %xmm2, %xmm2
- pinsrw $0, %eax, %xmm5
- pinsrw $0, %ecx, %xmm2
- pand %xmm0, %xmm1
- pand 80(%ebx), %xmm0
- movdqa %xmm3, %xmm4
- psllq %xmm5, %xmm3
- por 96(%ebx), %xmm1
- psrlq %xmm2, %xmm4
- psrlq $40, %xmm3
- paddq %xmm4, %xmm1
- movdqa %xmm1, %xmm5
- psrlq $53, %xmm1
- movdqa %xmm5, %xmm4
- psrlq %xmm1, %xmm5
- pand %xmm1, %xmm4
- psllq $52, %xmm1
- por %xmm3, %xmm5
- paddq %xmm1, %xmm0
- por %xmm4, %xmm5
- pand 64(%ebx), %xmm5
- por %xmm5, %xmm0
- jmp .L_2TAG_PACKET_14.0.2
- .L_2TAG_PACKET_17.0.2:
- movapd 112(%ebx), %xmm0
- orpd %xmm3, %xmm0
- jmp .L_2TAG_PACKET_14.0.2
- .L_2TAG_PACKET_18.0.2:
- orpd 112(%ebx), %xmm0
- jmp .L_2TAG_PACKET_14.0.2
- .L_2TAG_PACKET_19.0.2:
- movl $70, %ecx
- subl %eax, %ecx
- subl $6, %eax
- movsd 64(%ebx), %xmm1
- pand 64(%ebx), %xmm0
- pxor %xmm5, %xmm5
- por 96(%ebx), %xmm0
- pxor %xmm2, %xmm2
- pinsrw $0, %ecx, %xmm5
- pinsrw $0, %eax, %xmm2
- pand %xmm3, %xmm1
- pand 80(%ebx), %xmm3
- movdqa %xmm0, %xmm4
- psllq %xmm5, %xmm0
- por 96(%ebx), %xmm1
- psrlq %xmm2, %xmm4
- pxor %xmm2, %xmm2
- psrlq $18, %xmm0
- psubq %xmm0, %xmm2
- paddq %xmm4, %xmm1
- psrlq $63, %xmm2
- movdqa %xmm1, %xmm0
- psrlq $53, %xmm1
- movdqa %xmm0, %xmm4
- psrlq %xmm1, %xmm0
- pand %xmm1, %xmm4
- psllq $52, %xmm1
- por %xmm2, %xmm0
- paddq %xmm1, %xmm3
- por %xmm4, %xmm0
- pand 64(%ebx), %xmm0
- por %xmm3, %xmm0
- jmp .L_2TAG_PACKET_14.0.2
- .L_2TAG_PACKET_16.0.2:
- cmpl $53, %eax
- jge .L_2TAG_PACKET_20.0.2
- cmpl $-22, %eax
- jle .L_2TAG_PACKET_21.0.2
- cmpl $6, %eax
- jge .L_2TAG_PACKET_22.0.2
- movl $6, %ecx
- subl %eax, %ecx
- addl $58, %eax
- movsd 64(%ebx), %xmm1
- pand 64(%ebx), %xmm3
- pxor %xmm5, %xmm5
- por 96(%ebx), %xmm3
- pxor %xmm2, %xmm2
- pinsrw $0, %eax, %xmm5
- pinsrw $0, %ecx, %xmm2
- pand %xmm0, %xmm1
- pand 80(%ebx), %xmm0
- movdqa %xmm3, %xmm4
- psllq %xmm5, %xmm3
- por 96(%ebx), %xmm1
- psrlq %xmm2, %xmm4
- pxor %xmm2, %xmm2
- psrlq $37, %xmm3
- psubq %xmm3, %xmm2
- psubq %xmm4, %xmm1
- psrlq $63, %xmm2
- psubq %xmm2, %xmm1
- movdqa %xmm1, %xmm3
- movsd 112(%ebx), %xmm2
- psrlq $52, %xmm1
- psubq %xmm1, %xmm2
- movdqa %xmm2, %xmm1
- psllq $52, %xmm2
- psllq %xmm1, %xmm3
- pand 64(%ebx), %xmm3
- psubq %xmm2, %xmm0
- por %xmm3, %xmm0
- jmp .L_2TAG_PACKET_14.0.2
- .L_2TAG_PACKET_20.0.2:
- movsd 64(%ebx), %xmm1
- pand %xmm3, %xmm1
- por 96(%ebx), %xmm1
- psubq 112(%ebx), %xmm1
- movapd %xmm1, %xmm0
- psrlq $52, %xmm1
- movapd 112(%ebx), %xmm4
- psubq %xmm1, %xmm4
- psllq %xmm4, %xmm0
- psllq $52, %xmm4
- pand 80(%ebx), %xmm3
- psubq %xmm4, %xmm3
- pand 64(%ebx), %xmm0
- por %xmm3, %xmm0
- jmp .L_2TAG_PACKET_14.0.2
- .L_2TAG_PACKET_21.0.2:
- movsd 64(%ebx), %xmm1
- pand %xmm0, %xmm1
- por 96(%ebx), %xmm1
- psubq 112(%ebx), %xmm1
- movapd %xmm1, %xmm2
- psrlq $52, %xmm1
- movapd 112(%ebx), %xmm3
- psubq %xmm1, %xmm3
- psllq %xmm3, %xmm2
- psllq $52, %xmm3
- pand 80(%ebx), %xmm0
- psubq %xmm3, %xmm0
- pand 64(%ebx), %xmm2
- por %xmm2, %xmm0
- jmp .L_2TAG_PACKET_14.0.2
- .L_2TAG_PACKET_22.0.2:
- movl $70, %ecx
- subl %eax, %ecx
- subl $6, %eax
- movsd 64(%ebx), %xmm1
- pand 64(%ebx), %xmm0
- pxor %xmm5, %xmm5
- por 96(%ebx), %xmm0
- pxor %xmm2, %xmm2
- pinsrw $0, %ecx, %xmm5
- pinsrw $0, %eax, %xmm2
- pand %xmm3, %xmm1
- pand 80(%ebx), %xmm3
- movdqa %xmm0, %xmm4
- psllq %xmm5, %xmm0
- por 96(%ebx), %xmm1
- psrlq %xmm2, %xmm4
- pxor %xmm2, %xmm2
- psrlq $18, %xmm0
- psubq %xmm0, %xmm2
- psubq %xmm4, %xmm1
- psrlq $63, %xmm2
- psubq %xmm2, %xmm1
- movdqa %xmm1, %xmm0
- movsd 112(%ebx), %xmm2
- psrlq $52, %xmm1
- psubq %xmm1, %xmm2
- movdqa %xmm2, %xmm1
- psllq $52, %xmm2
- psllq %xmm1, %xmm0
- pand 64(%ebx), %xmm0
- psubq %xmm2, %xmm3
- por %xmm3, %xmm0
- jmp .L_2TAG_PACKET_14.0.2
- .L_2TAG_PACKET_14.0.2:
- movq %xmm0, (%esp)
- fldl (%esp)
- fstps 32(%esp)
- flds 32(%esp)
- .L_2TAG_PACKET_15.0.2:
- movl 80(%esp), %ebx
- movl %ebp, %esp
- popl %ebp
- ret
- ..B2.3:
- .align 16,0x90
- .type fmaf,@function
- .size fmaf,.-fmaf
- .data
- # -- End fmaf
- .section .rodata, "a"
- .align 16
- .align 16
- static_const_table:
- .long 2147483647
- .long 0
- .long 0
- .long 0
- .long 0
- .long 1072693248
- .long 0
- .long 0
- .long 0
- .long 970981376
- .long 0
- .long 0
- .long 2147483648
- .long 0
- .long 0
- .long 0
- .long 4294967295
- .long 1048575
- .long 0
- .long 0
- .long 0
- .long 4293918720
- .long 0
- .long 0
- .long 0
- .long 1048576
- .long 0
- .long 0
- .long 1
- .long 0
- .long 0
- .long 0
- .type static_const_table,@object
- .size static_const_table,128
- .data
- .section .note.GNU-stack, ""
- # End
|