asinh_gen.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "asinh_gen.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin asinh
  41. .text
  42. .align 16,0x90
  43. .globl asinh
  44. asinh:
  45. # parameter 1: %xmm0
  46. ..B1.1:
  47. .cfi_startproc
  48. ..___tag_value_asinh.1:
  49. ..L2:
  50. movsd %xmm0, -16(%rsp)
  51. movl -12(%rsp), %esi
  52. movl %esi, %edx
  53. andl $2147483647, %edx
  54. cmpl $2146435072, %edx
  55. jae ..B1.18
  56. ..B1.2:
  57. cmpl $1102053376, %edx
  58. jae ..B1.16
  59. ..B1.3:
  60. cmpl $1074987008, %edx
  61. jae ..B1.14
  62. ..B1.4:
  63. cmpl $1068498944, %edx
  64. jae ..B1.13
  65. ..B1.5:
  66. cmpl $1012924416, %edx
  67. jae ..B1.12
  68. ..B1.6:
  69. shrl $31, %esi
  70. cmpl $1048576, %edx
  71. jb ..B1.8
  72. ..B1.7:
  73. lea _SCALE(%rip), %rax
  74. lea _MINNORM(%rip), %rdx
  75. movl %esi, %esi
  76. lea 8+_SCALE(%rip), %rcx
  77. movsd (%rax), %xmm1
  78. mulsd %xmm1, %xmm0
  79. subsd (%rdx,%rsi,8), %xmm0
  80. mulsd (%rcx), %xmm0
  81. ret
  82. ..B1.8:
  83. movl -16(%rsp), %eax
  84. orl %eax, %edx
  85. je ..B1.10
  86. ..B1.9:
  87. lea _small_value_64(%rip), %rax
  88. movsd -16(%rsp), %xmm0
  89. movsd (%rax,%rsi,8), %xmm1
  90. mulsd (%rax), %xmm1
  91. movsd %xmm1, -40(%rsp)
  92. subsd %xmm1, %xmm0
  93. ret
  94. ..B1.10:
  95. movsd -16(%rsp), %xmm0
  96. ..B1.11:
  97. ret
  98. ..B1.12:
  99. movsd -16(%rsp), %xmm0
  100. lea 32+_PA(%rip), %rax
  101. movaps %xmm0, %xmm4
  102. lea 24+_PA(%rip), %rcx
  103. mulsd %xmm0, %xmm4
  104. lea 16+_PA(%rip), %rdx
  105. movaps %xmm4, %xmm1
  106. lea 8+_PA(%rip), %rsi
  107. mulsd %xmm4, %xmm1
  108. lea _PA(%rip), %rdi
  109. movsd (%rax), %xmm3
  110. movsd (%rcx), %xmm2
  111. mulsd %xmm1, %xmm3
  112. mulsd %xmm1, %xmm2
  113. addsd (%rdx), %xmm3
  114. addsd (%rsi), %xmm2
  115. mulsd %xmm1, %xmm3
  116. mulsd %xmm4, %xmm2
  117. mulsd %xmm0, %xmm4
  118. addsd %xmm2, %xmm3
  119. addsd (%rdi), %xmm3
  120. mulsd %xmm3, %xmm4
  121. addsd %xmm4, %xmm0
  122. ret
  123. ..B1.13:
  124. movsd -16(%rsp), %xmm5
  125. xorl %eax, %eax
  126. andps .L_2il0floatpacket.2(%rip), %xmm5
  127. movsd %xmm5, -16(%rsp)
  128. movaps %xmm5, %xmm1
  129. movl %eax, -16(%rsp)
  130. movsd -16(%rsp), %xmm9
  131. movaps %xmm9, %xmm7
  132. subsd %xmm9, %xmm1
  133. mulsd %xmm9, %xmm7
  134. addsd %xmm9, %xmm5
  135. movsd .L_2il0floatpacket.3(%rip), %xmm0
  136. mulsd %xmm1, %xmm5
  137. addsd %xmm0, %xmm7
  138. movaps %xmm7, %xmm2
  139. addsd %xmm5, %xmm2
  140. sqrtsd %xmm2, %xmm2
  141. movsd %xmm2, -24(%rsp)
  142. movl %eax, -24(%rsp)
  143. movsd -24(%rsp), %xmm8
  144. movaps %xmm8, %xmm3
  145. movaps %xmm8, %xmm4
  146. mulsd %xmm8, %xmm3
  147. subsd %xmm8, %xmm2
  148. addsd %xmm8, %xmm9
  149. mulsd %xmm2, %xmm4
  150. subsd %xmm3, %xmm7
  151. movaps %xmm8, %xmm6
  152. subsd %xmm4, %xmm7
  153. addsd %xmm2, %xmm6
  154. addsd %xmm5, %xmm7
  155. divsd %xmm6, %xmm7
  156. addsd %xmm7, %xmm2
  157. mulsd .L_2il0floatpacket.1(%rip), %xmm2
  158. addsd %xmm2, %xmm1
  159. jmp ..B1.15
  160. ..B1.14:
  161. movsd -16(%rsp), %xmm1
  162. andps .L_2il0floatpacket.2(%rip), %xmm1
  163. movaps %xmm1, %xmm2
  164. mulsd %xmm1, %xmm2
  165. movsd .L_2il0floatpacket.3(%rip), %xmm0
  166. movaps %xmm0, %xmm4
  167. addsd %xmm0, %xmm2
  168. sqrtsd %xmm2, %xmm2
  169. movsd .L_2il0floatpacket.0(%rip), %xmm3
  170. addsd %xmm1, %xmm2
  171. mulsd %xmm3, %xmm1
  172. divsd %xmm2, %xmm4
  173. movsd %xmm1, -16(%rsp)
  174. movl $0, -16(%rsp)
  175. movsd -16(%rsp), %xmm9
  176. subsd %xmm9, %xmm1
  177. addsd %xmm4, %xmm1
  178. ..B1.15:
  179. movaps %xmm1, %xmm2
  180. movq $0x3ff0000000000000, %rax
  181. addsd %xmm9, %xmm2
  182. movsd %xmm2, -16(%rsp)
  183. movl $0, -16(%rsp)
  184. movsd -16(%rsp), %xmm8
  185. movzwl -10(%rsp), %ecx
  186. subsd %xmm8, %xmm9
  187. andl $32752, %ecx
  188. addsd %xmm9, %xmm1
  189. shrl $4, %ecx
  190. addl $-1023, %ecx
  191. movslq %ecx, %rdx
  192. negq %rdx
  193. shlq $52, %rdx
  194. addq %rax, %rdx
  195. movq %rdx, -32(%rsp)
  196. movsd -32(%rsp), %xmm3
  197. mulsd %xmm3, %xmm1
  198. mulsd %xmm3, %xmm8
  199. movaps %xmm1, %xmm4
  200. addsd %xmm8, %xmm4
  201. movsd %xmm4, -24(%rsp)
  202. movl -20(%rsp), %edi
  203. shrl $12, %edi
  204. movzbl %dil, %edx
  205. jmp ..B1.17
  206. ..B1.16:
  207. movsd -16(%rsp), %xmm0
  208. andps .L_2il0floatpacket.2(%rip), %xmm0
  209. movsd %xmm0, -16(%rsp)
  210. movzwl -10(%rsp), %eax
  211. movl %eax, %ecx
  212. andl $-32753, %eax
  213. andl $32752, %ecx
  214. orl $-49168, %eax
  215. movw %ax, -10(%rsp)
  216. movsd -16(%rsp), %xmm1
  217. movl $0, -16(%rsp)
  218. movl -12(%rsp), %edx
  219. movsd -16(%rsp), %xmm8
  220. shrl $4, %ecx
  221. subsd %xmm8, %xmm1
  222. shrl $12, %edx
  223. addl $-1022, %ecx
  224. movzbl %dl, %edx
  225. movsd .L_2il0floatpacket.3(%rip), %xmm0
  226. ..B1.17:
  227. movq __libm_rcp_table_256@GOTPCREL(%rip), %rdi
  228. pxor %xmm2, %xmm2
  229. lea 32+_PL(%rip), %r8
  230. lea 16+_PL(%rip), %r9
  231. lea 24+_PL(%rip), %r11
  232. lea _PL(%rip), %r10
  233. cvtss2sd (%rdi,%rdx,4), %xmm2
  234. mulsd %xmm2, %xmm8
  235. lea 8+_PL(%rip), %rdi
  236. mulsd %xmm2, %xmm1
  237. subsd %xmm0, %xmm8
  238. movaps %xmm8, %xmm3
  239. movsd (%r8), %xmm5
  240. addsd %xmm1, %xmm3
  241. movaps %xmm3, %xmm0
  242. mulsd %xmm3, %xmm0
  243. mulsd %xmm0, %xmm5
  244. movsd (%r11), %xmm4
  245. mulsd %xmm0, %xmm4
  246. addsd (%r9), %xmm5
  247. mulsd %xmm0, %xmm5
  248. addsd (%rdi), %xmm4
  249. mulsd %xmm0, %xmm4
  250. addsd (%r10), %xmm5
  251. mulsd %xmm0, %xmm5
  252. pxor %xmm0, %xmm0
  253. cvtsi2sd %ecx, %xmm0
  254. mulsd %xmm3, %xmm4
  255. lea _LN2(%rip), %rcx
  256. addsd %xmm4, %xmm5
  257. movsd (%rcx), %xmm6
  258. lea 8+_LN2(%rip), %rcx
  259. mulsd %xmm0, %xmm6
  260. addsd %xmm5, %xmm1
  261. movsd (%rcx), %xmm7
  262. mulsd %xmm7, %xmm0
  263. shlq $4, %rdx
  264. movq __libm_log_table_256@GOTPCREL(%rip), %rax
  265. shrl $31, %esi
  266. addsd (%rax,%rdx), %xmm6
  267. addsd 8(%rax,%rdx), %xmm0
  268. addsd %xmm6, %xmm1
  269. addsd %xmm8, %xmm0
  270. lea ones(%rip), %rdx
  271. movsd %xmm0, -16(%rsp)
  272. movsd (%rdx,%rsi,8), %xmm8
  273. mulsd %xmm8, %xmm0
  274. mulsd %xmm8, %xmm1
  275. movsd %xmm8, -24(%rsp)
  276. addsd %xmm1, %xmm0
  277. ret
  278. ..B1.18:
  279. movsd -16(%rsp), %xmm0
  280. ret
  281. .align 16,0x90
  282. .cfi_endproc
  283. .type asinh,@function
  284. .size asinh,.-asinh
  285. .data
  286. # -- End asinh
  287. .section .rodata, "a"
  288. .align 16
  289. .align 16
  290. .L_2il0floatpacket.2:
  291. .long 0xffffffff,0x7fffffff,0x00000000,0x00000000
  292. .type .L_2il0floatpacket.2,@object
  293. .size .L_2il0floatpacket.2,16
  294. .align 16
  295. _SCALE:
  296. .long 0
  297. .long 1177550848
  298. .long 0
  299. .long 967835648
  300. .type _SCALE,@object
  301. .size _SCALE,16
  302. .align 16
  303. _MINNORM:
  304. .long 0
  305. .long 1048576
  306. .long 0
  307. .long 2148532224
  308. .type _MINNORM,@object
  309. .size _MINNORM,16
  310. .align 8
  311. .L_2il0floatpacket.0:
  312. .long 0x00000000,0x40000000
  313. .type .L_2il0floatpacket.0,@object
  314. .size .L_2il0floatpacket.0,8
  315. .align 8
  316. .L_2il0floatpacket.1:
  317. .long 0x00000000,0x3fe00000
  318. .type .L_2il0floatpacket.1,@object
  319. .size .L_2il0floatpacket.1,8
  320. .align 8
  321. .L_2il0floatpacket.3:
  322. .long 0x00000000,0x3ff00000
  323. .type .L_2il0floatpacket.3,@object
  324. .size .L_2il0floatpacket.3,8
  325. .align 8
  326. ones:
  327. .long 0x00000000,0x3ff00000
  328. .long 0x00000000,0xbff00000
  329. .type ones,@object
  330. .size ones,16
  331. .align 4
  332. _small_value_64:
  333. .long 0
  334. .long 24117248
  335. .long 0
  336. .long 2171600896
  337. .type _small_value_64,@object
  338. .size _small_value_64,16
  339. .align 4
  340. _PA:
  341. .long 1431655746
  342. .long 3217380693
  343. .long 858875226
  344. .long 1068708659
  345. .long 2809361764
  346. .long 3215383405
  347. .long 1241898124
  348. .long 1067392054
  349. .long 966918340
  350. .long 3214324669
  351. .type _PA,@object
  352. .size _PA,40
  353. .align 4
  354. _PL:
  355. .long 0
  356. .long 3219128320
  357. .long 1431621855
  358. .long 1070945621
  359. .long 4294842013
  360. .long 3218079743
  361. .long 1289448124
  362. .long 1070176674
  363. .long 2077359316
  364. .long 3217380703
  365. .type _PL,@object
  366. .size _PL,40
  367. .align 4
  368. _LN2:
  369. .long 897137782
  370. .long 1038760431
  371. .long 4276092928
  372. .long 1072049730
  373. .type _LN2,@object
  374. .size _LN2,16
  375. .data
  376. .section .note.GNU-stack, ""
  377. // -- Begin DWARF2 SEGMENT .eh_frame
  378. .section .eh_frame,"a",@progbits
  379. .eh_frame_seg:
  380. .align 1
  381. # End