lroundf_wmt.S 6.3 KB


  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "lroundf_wmt.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin static_func
  41. .text
  42. .align 16,0x90
  43. static_func:
  44. ..B1.1:
  45. ..L1:
  46. call ..L2
  47. ..L2:
  48. popl %eax
  49. lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
  50. lea static_const_table@GOTOFF(%eax), %eax
  51. ret
  52. .align 16,0x90
  53. .type static_func,@function
  54. .size static_func,.-static_func
  55. .data
  56. # -- End static_func
  57. .text
  58. # -- Begin lroundf
  59. .text
  60. .align 16,0x90
  61. .globl lroundf
  62. lroundf:
  63. # parameter 1: 8 + %ebp
  64. ..B2.1:
  65. ..L3:
  66. ..B2.2:
  67. pushl %ebp
  68. movl %esp, %ebp
  69. subl $120, %esp
  70. movl %ebx, 64(%esp)
  71. call static_func
  72. movl %eax, %ebx
  73. movss 128(%esp), %xmm0
  74. movss %xmm0, 16(%esp)
  75. movd 16(%ebx), %xmm4
  76. movd 64(%ebx), %xmm5
  77. movd 112(%ebx), %xmm3
  78. movd (%ebx), %xmm1
  79. movdqa %xmm4, %xmm2
  80. pandn %xmm0, %xmm4
  81. pand %xmm0, %xmm2
  82. por %xmm5, %xmm2
  83. pextrw $1, %xmm0, %eax
  84. andl $32640, %eax
  85. subl $16256, %eax
  86. cmpl $2944, %eax
  87. jae .L_2TAG_PACKET_0.0.3
  88. psrld $23, %xmm4
  89. psubd %xmm4, %xmm3
  90. pslld %xmm3, %xmm1
  91. addss %xmm2, %xmm0
  92. pand %xmm1, %xmm0
  93. cvttss2si %xmm0, %eax
  94. jmp .L_2TAG_PACKET_1.0.3
  95. .L_2TAG_PACKET_0.0.3:
  96. js .L_2TAG_PACKET_2.0.3
  97. cmpl $3840, %eax
  98. jae .L_2TAG_PACKET_3.0.3
  99. cvttss2si %xmm0, %eax
  100. jmp .L_2TAG_PACKET_1.0.3
  101. .L_2TAG_PACKET_3.0.3:
  102. movdqa %xmm2, %xmm6
  103. movd 32(%ebx), %xmm7
  104. psrld $31, %xmm6
  105. paddd %xmm6, %xmm7
  106. cmpl $4096, %eax
  107. jge .L_2TAG_PACKET_4.0.3
  108. movd 96(%ebx), %xmm5
  109. movd 80(%ebx), %xmm2
  110. psrld $23, %xmm4
  111. pand %xmm0, %xmm5
  112. psubd %xmm3, %xmm4
  113. por %xmm2, %xmm5
  114. movd %xmm6, %edx
  115. psllq %xmm4, %xmm5
  116. movd %xmm5, %eax
  117. psubd %xmm5, %xmm7
  118. pmovmskb %xmm7, %ecx
  119. andl $136, %ecx
  120. jne .L_2TAG_PACKET_4.0.3
  121. testl %edx, %edx
  122. je .L_2TAG_PACKET_5.0.3
  123. negl %eax
  124. .L_2TAG_PACKET_5.0.3:
  125. jmp .L_2TAG_PACKET_1.0.3
  126. .L_2TAG_PACKET_2.0.3:
  127. addl $128, %eax
  128. js .L_2TAG_PACKET_6.0.3
  129. addss %xmm2, %xmm2
  130. cvttss2si %xmm2, %eax
  131. jmp .L_2TAG_PACKET_1.0.3
  132. .L_2TAG_PACKET_6.0.3:
  133. xorl %eax, %eax
  134. jmp .L_2TAG_PACKET_1.0.3
  135. .L_2TAG_PACKET_4.0.3:
  136. movd 48(%ebx), %xmm1
  137. movl $-2147483648, %eax
  138. pxor %xmm2, %xmm2
  139. movd %eax, %xmm7
  140. mulss %xmm2, %xmm1
  141. ucomiss %xmm0, %xmm0
  142. jc .L_2TAG_PACKET_7.0.3
  143. movq %xmm7, 8(%esp)
  144. subl $32, %esp
  145. lea 128(%esp), %eax
  146. movl %eax, (%esp)
  147. lea 128(%esp), %eax
  148. movl %eax, 4(%esp)
  149. lea 40(%esp), %eax
  150. movl %eax, 8(%esp)
  151. movl $191, %eax
  152. movl %eax, 12(%esp)
  153. call __libm_error_support
  154. addl $32, %esp
  155. movl 8(%esp), %eax
  156. .L_2TAG_PACKET_7.0.3:
  157. .L_2TAG_PACKET_1.0.3:
  158. movl 64(%esp), %ebx
  159. movl %ebp, %esp
  160. popl %ebp
  161. ret
  162. ..B2.3:
  163. .align 16,0x90
  164. .type lroundf,@function
  165. .size lroundf,.-lroundf
  166. .data
  167. # -- End lroundf
  168. .text
  169. # -- Begin __libm_error_support
  170. .text
  171. .align 16,0x90
  172. __libm_error_support:
  173. # parameter 1: 4 + %esp
  174. # parameter 2: 8 + %esp
  175. # parameter 3: 12 + %esp
  176. # parameter 4: 16 + %esp
  177. ..B3.1:
  178. ..L4:
  179. ret
  180. .align 16,0x90
  181. .type __libm_error_support,@function
  182. .size __libm_error_support,.-__libm_error_support
  183. .data
  184. # -- End __libm_error_support
  185. .section .rodata, "a"
  186. .align 16
  187. .align 16
  188. static_const_table:
  189. .long 4294967295
  190. .long 4294967295
  191. .long 4294967295
  192. .long 4294967295
  193. .long 2147483648
  194. .long 2147483648
  195. .long 2147483648
  196. .long 2147483648
  197. .long 2147483647
  198. .long 2147483647
  199. .long 2147483647
  200. .long 2147483647
  201. .long 2139095040
  202. .long 2139095040
  203. .long 2139095040
  204. .long 2139095040
  205. .long 1056964608
  206. .long 1056964608
  207. .long 1056964608
  208. .long 1056964608
  209. .long 8388608
  210. .long 8388608
  211. .long 8388608
  212. .long 8388608
  213. .long 8388607
  214. .long 8388607
  215. .long 8388607
  216. .long 8388607
  217. .long 150
  218. .long 150
  219. .long 150
  220. .long 150
  221. .type static_const_table,@object
  222. .size static_const_table,128
  223. .data
  224. .section .note.GNU-stack, ""
  225. # End