atanh_gen.S 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "atanh_gen.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin atanh
  41. .text
  42. .align 16,0x90
  43. .globl atanh
  44. atanh:
  45. # parameter 1: %xmm0
  46. ..B1.1:
  47. .cfi_startproc
  48. ..___tag_value_atanh.1:
  49. ..L2:
  50. movsd %xmm0, -8(%rsp)
  51. movl -4(%rsp), %esi
  52. movl %esi, %edx
  53. andl $2147483647, %edx
  54. cmpl $1072693248, %edx
  55. jae ..B1.12
  56. ..B1.2:
  57. cmpl $1068498944, %edx
  58. jae ..B1.11
  59. ..B1.3:
  60. cmpl $1012924416, %edx
  61. jae ..B1.10
  62. ..B1.4:
  63. shrl $31, %esi
  64. cmpl $1048576, %edx
  65. jb ..B1.6
  66. ..B1.5:
  67. lea _ones(%rip), %rax
  68. movsd (%rax), %xmm1
  69. addsd %xmm0, %xmm1
  70. movsd %xmm1, -40(%rsp)
  71. movsd -40(%rsp), %xmm2
  72. mulsd %xmm2, %xmm0
  73. ret
  74. ..B1.6:
  75. movl -8(%rsp), %eax
  76. orl %eax, %edx
  77. je ..B1.8
  78. ..B1.7:
  79. lea _small_value_64(%rip), %rax
  80. movsd -8(%rsp), %xmm0
  81. movsd (%rax,%rsi,8), %xmm1
  82. mulsd (%rax), %xmm1
  83. movsd %xmm1, -40(%rsp)
  84. addsd %xmm1, %xmm0
  85. ret
  86. ..B1.8:
  87. movsd -8(%rsp), %xmm0
  88. ..B1.9:
  89. ret
  90. ..B1.10:
  91. movsd -8(%rsp), %xmm4
  92. lea 40+_PA(%rip), %rax
  93. movaps %xmm4, %xmm3
  94. lea 32+_PA(%rip), %rsi
  95. mulsd %xmm4, %xmm3
  96. lea 24+_PA(%rip), %rdx
  97. movaps %xmm3, %xmm1
  98. lea 16+_PA(%rip), %rdi
  99. mulsd %xmm3, %xmm1
  100. lea 8+_PA(%rip), %rcx
  101. movsd (%rax), %xmm0
  102. lea _PA(%rip), %r8
  103. mulsd %xmm1, %xmm0
  104. movsd (%rsi), %xmm2
  105. mulsd %xmm1, %xmm2
  106. addsd (%rdx), %xmm0
  107. mulsd %xmm1, %xmm0
  108. addsd (%rdi), %xmm2
  109. mulsd %xmm1, %xmm2
  110. addsd (%rcx), %xmm0
  111. mulsd %xmm3, %xmm0
  112. addsd (%r8), %xmm2
  113. addsd %xmm2, %xmm0
  114. mulsd %xmm4, %xmm0
  115. mulsd %xmm3, %xmm0
  116. addsd %xmm4, %xmm0
  117. ret
  118. ..B1.11:
  119. movsd -8(%rsp), %xmm9
  120. lea 8+_ones(%rip), %rdi
  121. andps .L_2il0floatpacket.0(%rip), %xmm9
  122. lea _ones(%rip), %r8
  123. movsd %xmm9, -8(%rsp)
  124. movq $0x3ff0000000000000, %r10
  125. andl $-268435456, -8(%rsp)
  126. pxor %xmm15, %xmm15
  127. movsd -8(%rsp), %xmm12
  128. movsd (%rdi), %xmm0
  129. movaps %xmm12, %xmm6
  130. movaps %xmm0, %xmm8
  131. lea 32+_PL(%rip), %rdi
  132. movsd (%r8), %xmm4
  133. lea 16+_PL(%rip), %r8
  134. movq __libm_rcp_table_256@GOTPCREL(%rip), %rax
  135. addsd %xmm0, %xmm6
  136. subsd %xmm12, %xmm9
  137. addsd %xmm4, %xmm12
  138. movaps %xmm6, %xmm2
  139. movaps %xmm6, %xmm5
  140. movaps %xmm9, %xmm3
  141. movaps %xmm12, %xmm1
  142. shrl $31, %esi
  143. addsd %xmm9, %xmm2
  144. divsd %xmm2, %xmm8
  145. movsd %xmm8, -32(%rsp)
  146. pxor %xmm2, %xmm2
  147. andl $-268435456, -32(%rsp)
  148. movsd -32(%rsp), %xmm10
  149. movaps %xmm10, %xmm7
  150. subsd %xmm10, %xmm8
  151. mulsd %xmm10, %xmm6
  152. mulsd %xmm10, %xmm12
  153. mulsd %xmm8, %xmm5
  154. addsd %xmm8, %xmm7
  155. addsd %xmm4, %xmm6
  156. mulsd %xmm7, %xmm3
  157. addsd %xmm3, %xmm5
  158. addsd %xmm5, %xmm6
  159. mulsd %xmm6, %xmm7
  160. addsd %xmm7, %xmm8
  161. mulsd %xmm8, %xmm1
  162. addsd %xmm10, %xmm8
  163. mulsd %xmm8, %xmm9
  164. addsd %xmm9, %xmm1
  165. movaps %xmm1, %xmm11
  166. addsd %xmm12, %xmm11
  167. movsd %xmm11, -8(%rsp)
  168. movl $0, -8(%rsp)
  169. movsd -8(%rsp), %xmm3
  170. movl -4(%rsp), %ecx
  171. subsd %xmm3, %xmm12
  172. sarl $20, %ecx
  173. addsd %xmm12, %xmm1
  174. addl $-1023, %ecx
  175. movslq %ecx, %r9
  176. shlq $52, %r9
  177. subq %r9, %r10
  178. lea _PL(%rip), %r9
  179. movq %r10, -24(%rsp)
  180. lea 24+_PL(%rip), %r10
  181. movsd -24(%rsp), %xmm13
  182. mulsd %xmm13, %xmm1
  183. cvtsi2sd %ecx, %xmm2
  184. mulsd %xmm13, %xmm3
  185. movaps %xmm1, %xmm14
  186. lea _LN2(%rip), %rcx
  187. movsd (%r10), %xmm13
  188. addsd %xmm3, %xmm14
  189. movsd %xmm14, -16(%rsp)
  190. movl -12(%rsp), %r11d
  191. shrl $12, %r11d
  192. movzbl %r11b, %edx
  193. lea 8+_PL(%rip), %r11
  194. movsd (%rdi), %xmm14
  195. cvtss2sd (%rax,%rdx,4), %xmm15
  196. mulsd %xmm15, %xmm3
  197. mulsd %xmm15, %xmm1
  198. addsd %xmm0, %xmm3
  199. movaps %xmm3, %xmm12
  200. shlq $4, %rdx
  201. addsd %xmm1, %xmm12
  202. movaps %xmm12, %xmm0
  203. mulsd %xmm12, %xmm0
  204. mulsd %xmm0, %xmm14
  205. mulsd %xmm0, %xmm13
  206. addsd (%r8), %xmm14
  207. addsd (%r11), %xmm13
  208. mulsd %xmm0, %xmm14
  209. mulsd %xmm0, %xmm13
  210. addsd (%r9), %xmm14
  211. mulsd %xmm12, %xmm13
  212. mulsd %xmm0, %xmm14
  213. movsd (%rcx), %xmm0
  214. lea halfs(%rip), %rcx
  215. mulsd %xmm2, %xmm0
  216. addsd %xmm13, %xmm14
  217. movq __libm_log_table_256@GOTPCREL(%rip), %rax
  218. addsd %xmm14, %xmm1
  219. addsd (%rax,%rdx), %xmm0
  220. movsd (%rcx,%rsi,8), %xmm4
  221. lea 8+_LN2(%rip), %rcx
  222. movsd %xmm4, -16(%rsp)
  223. addsd %xmm1, %xmm0
  224. movsd (%rcx), %xmm1
  225. mulsd %xmm1, %xmm2
  226. addsd 8(%rax,%rdx), %xmm2
  227. movsd %xmm2, -32(%rsp)
  228. addsd %xmm2, %xmm3
  229. movsd %xmm3, -8(%rsp)
  230. addsd %xmm3, %xmm0
  231. mulsd %xmm4, %xmm0
  232. ret
  233. ..B1.12:
  234. jne ..B1.15
  235. ..B1.13:
  236. cmpl $0, -8(%rsp)
  237. je ..B1.19
  238. ..B1.14:
  239. lea _infs(%rip), %rax
  240. lea _zeros(%rip), %rdx
  241. movsd (%rax), %xmm0
  242. mulsd (%rdx), %xmm0
  243. ret
  244. ..B1.15:
  245. cmpl $2146435072, %edx
  246. jb ..B1.14
  247. ..B1.16:
  248. jne ..B1.18
  249. ..B1.17:
  250. cmpl $0, -8(%rsp)
  251. je ..B1.14
  252. ..B1.18:
  253. lea _ones(%rip), %rax
  254. movsd -8(%rsp), %xmm0
  255. mulsd (%rax), %xmm0
  256. ret
  257. ..B1.19:
  258. lea _ones(%rip), %rax
  259. lea _zeros(%rip), %rdx
  260. shrl $31, %esi
  261. movsd (%rax,%rsi,8), %xmm0
  262. divsd (%rdx), %xmm0
  263. ret
  264. .align 16,0x90
  265. .cfi_endproc
  266. .type atanh,@function
  267. .size atanh,.-atanh
  268. .data
  269. # -- End atanh
  270. .section .rodata, "a"
  271. .align 16
  272. .align 16
  273. .L_2il0floatpacket.0:
  274. .long 0xffffffff,0x7fffffff,0x00000000,0x00000000
  275. .type .L_2il0floatpacket.0,@object
  276. .size .L_2il0floatpacket.0,16
  277. .align 8
  278. halfs:
  279. .long 0x00000000,0x3fe00000
  280. .long 0x00000000,0xbfe00000
  281. .type halfs,@object
  282. .size halfs,16
  283. .align 4
  284. _ones:
  285. .long 0
  286. .long 1072693248
  287. .long 0
  288. .long 3220176896
  289. .type _ones,@object
  290. .size _ones,16
  291. .align 4
  292. _small_value_64:
  293. .long 0
  294. .long 24117248
  295. .long 0
  296. .long 2171600896
  297. .type _small_value_64,@object
  298. .size _small_value_64,16
  299. .align 4
  300. _PA:
  301. .long 1431655765
  302. .long 1070945621
  303. .long 2576980801
  304. .long 1070176665
  305. .long 2453616913
  306. .long 1069697316
  307. .long 1427436931
  308. .long 1069314503
  309. .long 2129349532
  310. .long 1068975486
  311. .long 1629438381
  312. .long 1068756329
  313. .type _PA,@object
  314. .size _PA,48
  315. .align 4
  316. _PL:
  317. .long 0
  318. .long 3219128320
  319. .long 1431621855
  320. .long 1070945621
  321. .long 4294842013
  322. .long 3218079743
  323. .long 1289448124
  324. .long 1070176674
  325. .long 2077359316
  326. .long 3217380703
  327. .type _PL,@object
  328. .size _PL,40
  329. .align 4
  330. _LN2:
  331. .long 897137782
  332. .long 1038760431
  333. .long 4276092928
  334. .long 1072049730
  335. .type _LN2,@object
  336. .size _LN2,16
  337. .align 4
  338. _infs:
  339. .long 0
  340. .long 2146435072
  341. .long 0
  342. .long 4293918720
  343. .type _infs,@object
  344. .size _infs,16
  345. .align 4
  346. _zeros:
  347. .long 0
  348. .long 0
  349. .long 0
  350. .long 2147483648
  351. .type _zeros,@object
  352. .size _zeros,16
  353. .data
  354. .section .note.GNU-stack, ""
  355. // -- Begin DWARF2 SEGMENT .eh_frame
  356. .section .eh_frame,"a",@progbits
  357. .eh_frame_seg:
  358. .align 1
  359. # End