tanhf_wmt.S 7.8 KB


  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "tanhf_wmt.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin static_func
  41. .text
  42. .align 16,0x90
  43. static_func:
  44. ..B1.1:
  45. ..L1:
  46. call ..L2
  47. ..L2:
  48. popl %eax
  49. lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
  50. lea static_const_table@GOTOFF(%eax), %eax
  51. ret
  52. .align 16,0x90
  53. .type static_func,@function
  54. .size static_func,.-static_func
  55. .data
  56. # -- End static_func
  57. .text
  58. # -- Begin tanhf
  59. .text
  60. .align 16,0x90
  61. .globl tanhf
  62. tanhf:
  63. # parameter 1: 8 + %ebp
  64. ..B2.1:
  65. ..L3:
  66. ..B2.2:
  67. pushl %ebp
  68. movl %esp, %ebp
  69. subl $104, %esp
  70. movl %ebx, 40(%esp)
  71. call static_func
  72. movl %eax, %ebx
  73. movss 112(%esp), %xmm0
  74. xorpd %xmm1, %xmm1
  75. movsd 304(%ebx), %xmm3
  76. movl $14336, %eax
  77. pinsrw $3, %eax, %xmm1
  78. movsd 288(%ebx), %xmm6
  79. pextrw $1, %xmm0, %ecx
  80. psllq $33, %xmm0
  81. psrlq $4, %xmm0
  82. paddw %xmm0, %xmm1
  83. movapd 256(%ebx), %xmm4
  84. movl $32768, %edx
  85. andl %ecx, %edx
  86. andl $32767, %ecx
  87. subl $15744, %ecx
  88. cmpl $1056, %ecx
  89. jae .L_2TAG_PACKET_0.0.2
  90. mulsd %xmm1, %xmm3
  91. cvtsd2si %xmm3, %eax
  92. movapd %xmm3, %xmm2
  93. addsd %xmm6, %xmm3
  94. xorpd %xmm0, %xmm0
  95. subsd %xmm6, %xmm3
  96. movapd 272(%ebx), %xmm1
  97. subsd %xmm3, %xmm2
  98. movl $31, %ecx
  99. andl %eax, %ecx
  100. movsd (%ebx,%ecx,8), %xmm5
  101. shrl $1, %eax
  102. andl $65520, %eax
  103. subl $16368, %eax
  104. negl %eax
  105. pshufd $68, %xmm2, %xmm2
  106. pinsrw $3, %eax, %xmm0
  107. mulpd %xmm2, %xmm4
  108. mulpd %xmm2, %xmm1
  109. movsd 312(%ebx), %xmm7
  110. xorpd %xmm3, %xmm3
  111. mulpd %xmm2, %xmm4
  112. mulpd %xmm2, %xmm2
  113. mulsd %xmm5, %xmm0
  114. addpd %xmm4, %xmm1
  115. pinsrw $3, %edx, %xmm3
  116. mulsd %xmm2, %xmm1
  117. movapd %xmm7, %xmm6
  118. pshufd $238, %xmm1, %xmm2
  119. addsd %xmm0, %xmm7
  120. addsd %xmm2, %xmm1
  121. subsd %xmm0, %xmm6
  122. mulsd %xmm1, %xmm0
  123. addsd %xmm0, %xmm7
  124. subsd %xmm0, %xmm6
  125. xorpd %xmm3, %xmm7
  126. divsd %xmm7, %xmm6
  127. cvtsd2ss %xmm6, %xmm0
  128. jmp .L_2TAG_PACKET_1.0.2
  129. .L_2TAG_PACKET_0.0.2:
  130. addl $1152, %ecx
  131. cmpl $1152, %ecx
  132. jae .L_2TAG_PACKET_2.0.2
  133. movl $-1117975087, %eax
  134. movd %eax, %xmm1
  135. movss 112(%esp), %xmm0
  136. movl $-1096111445, %ecx
  137. movd %ecx, %xmm3
  138. movl $1040746633, %edx
  139. movd %edx, %xmm2
  140. pshufd $68, %xmm0, %xmm4
  141. mulss %xmm0, %xmm0
  142. mulss %xmm0, %xmm1
  143. mulss %xmm0, %xmm3
  144. mulss %xmm0, %xmm0
  145. addss %xmm2, %xmm1
  146. mulss %xmm1, %xmm0
  147. addss %xmm3, %xmm0
  148. mulss %xmm4, %xmm0
  149. addss %xmm4, %xmm0
  150. jmp .L_2TAG_PACKET_1.0.2
  151. .L_2TAG_PACKET_2.0.2:
  152. addl $14592, %ecx
  153. cmpl $15744, %ecx
  154. jae .L_2TAG_PACKET_3.0.2
  155. movss 112(%esp), %xmm0
  156. cmpl $128, %ecx
  157. jb .L_2TAG_PACKET_4.0.2
  158. movl $1333788672, %eax
  159. movd %eax, %xmm2
  160. mulss %xmm0, %xmm2
  161. addss %xmm0, %xmm2
  162. jmp .L_2TAG_PACKET_1.0.2
  163. .L_2TAG_PACKET_4.0.2:
  164. movss %xmm0, %xmm2
  165. mulss %xmm2, %xmm2
  166. jmp .L_2TAG_PACKET_1.0.2
  167. .L_2TAG_PACKET_3.0.2:
  168. cmpl $32640, %ecx
  169. jae .L_2TAG_PACKET_5.0.2
  170. movl $796917760, %eax
  171. movd %eax, %xmm2
  172. movss %xmm2, %xmm3
  173. mulss %xmm2, %xmm2
  174. addss %xmm3, %xmm2
  175. .L_2TAG_PACKET_6.0.2:
  176. xorps %xmm0, %xmm0
  177. orl $16256, %edx
  178. pinsrw $1, %edx, %xmm0
  179. jmp .L_2TAG_PACKET_1.0.2
  180. .L_2TAG_PACKET_5.0.2:
  181. movl 112(%esp), %eax
  182. andl $2147483647, %eax
  183. cmpl $2139095040, %eax
  184. je .L_2TAG_PACKET_6.0.2
  185. movss 112(%esp), %xmm0
  186. addss %xmm0, %xmm0
  187. jmp .L_2TAG_PACKET_1.0.2
  188. .L_2TAG_PACKET_1.0.2:
  189. movss %xmm0, 24(%esp)
  190. flds 24(%esp)
  191. movl 40(%esp), %ebx
  192. movl %ebp, %esp
  193. popl %ebp
  194. ret
  195. ..B2.3:
  196. .align 16,0x90
  197. .type tanhf,@function
  198. .size tanhf,.-tanhf
  199. .data
  200. # -- End tanhf
  201. .section .rodata, "a"
  202. .align 16
  203. .align 16
  204. static_const_table:
  205. .long 0
  206. .long 1072693248
  207. .long 1533953344
  208. .long 1072648310
  209. .long 2728693978
  210. .long 1072604335
  211. .long 863738719
  212. .long 1072561303
  213. .long 3707479175
  214. .long 1072519192
  215. .long 3706687593
  216. .long 1072477984
  217. .long 3716502172
  218. .long 1072437659
  219. .long 4076559943
  220. .long 1072398198
  221. .long 2572866477
  222. .long 1072359583
  223. .long 2990417245
  224. .long 1072321795
  225. .long 2191782032
  226. .long 1072284817
  227. .long 2966275557
  228. .long 1072248631
  229. .long 1110089947
  230. .long 1072213221
  231. .long 2571947539
  232. .long 1072178569
  233. .long 1944781191
  234. .long 1072144660
  235. .long 3907805044
  236. .long 1072111477
  237. .long 1719614413
  238. .long 1072079006
  239. .long 2956612997
  240. .long 1072047230
  241. .long 3712504873
  242. .long 1072016135
  243. .long 1453150082
  244. .long 1071985707
  245. .long 3577096743
  246. .long 1071955930
  247. .long 1617004845
  248. .long 1071926792
  249. .long 1276261410
  250. .long 1071898278
  251. .long 926591435
  252. .long 1071870375
  253. .long 171030293
  254. .long 1071843070
  255. .long 4112506593
  256. .long 1071816349
  257. .long 1853186616
  258. .long 1071790202
  259. .long 828946858
  260. .long 1071764615
  261. .long 1014845819
  262. .long 1071739576
  263. .long 3490863953
  264. .long 1071715073
  265. .long 1828292879
  266. .long 1071691096
  267. .long 3541402996
  268. .long 1071667632
  269. .long 1874480759
  270. .long 1044624043
  271. .long 4286760334
  272. .long 1060028349
  273. .long 3607404735
  274. .long 3200019208
  275. .long 4277811695
  276. .long 3214290498
  277. .long 0
  278. .long 1127743488
  279. .long 0
  280. .long 3275227136
  281. .long 1697350398
  282. .long 1079448903
  283. .long 0
  284. .long 1072693248
  285. .type static_const_table,@object
  286. .size static_const_table,320
  287. .data
  288. .section .note.GNU-stack, ""
  289. # End