acosh_gen.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "acosh_gen.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin acosh
  41. .text
  42. .align 16,0x90
  43. .globl acosh
  44. acosh:
  45. # parameter 1: %xmm0
  46. ..B1.1:
  47. .cfi_startproc
  48. ..___tag_value_acosh.1:
  49. ..L2:
  50. movsd %xmm0, -8(%rsp)
  51. movl -4(%rsp), %eax
  52. lea -1072693248(%rax), %edx
  53. cmpl $1073741824, %edx
  54. jae ..B1.14
  55. ..B1.2:
  56. cmpl $6144, %edx
  57. jae ..B1.6
  58. ..B1.3:
  59. movl -8(%rsp), %eax
  60. orl %eax, %edx
  61. jne ..B1.5
  62. ..B1.4:
  63. lea _zeros(%rip), %rax
  64. movsd (%rax), %xmm0
  65. ret
  66. ..B1.5:
  67. lea 8+_ones(%rip), %rax
  68. pxor %xmm3, %xmm3
  69. movsd -8(%rsp), %xmm5
  70. xorl %r9d, %r9d
  71. movsd .L_2il0floatpacket.0(%rip), %xmm4
  72. lea 32+_PA(%rip), %rdx
  73. lea 16+_PA(%rip), %rcx
  74. lea 24+_PA(%rip), %rdi
  75. lea _PA(%rip), %rsi
  76. addsd (%rax), %xmm5
  77. mulsd %xmm5, %xmm4
  78. movaps %xmm5, %xmm6
  79. mulsd %xmm5, %xmm6
  80. sqrtsd %xmm4, %xmm3
  81. movsd %xmm3, -16(%rsp)
  82. movaps %xmm3, %xmm0
  83. movl %r9d, -16(%rsp)
  84. lea 8+_PA(%rip), %r8
  85. movsd -16(%rsp), %xmm11
  86. movaps %xmm11, %xmm1
  87. movaps %xmm11, %xmm2
  88. movsd (%rdx), %xmm8
  89. subsd %xmm11, %xmm0
  90. mulsd %xmm11, %xmm1
  91. mulsd %xmm6, %xmm8
  92. mulsd %xmm0, %xmm2
  93. subsd %xmm1, %xmm4
  94. addsd (%rcx), %xmm8
  95. subsd %xmm2, %xmm4
  96. mulsd %xmm6, %xmm8
  97. divsd %xmm3, %xmm4
  98. movsd (%rdi), %xmm7
  99. addsd %xmm4, %xmm0
  100. mulsd %xmm6, %xmm7
  101. addsd (%rsi), %xmm8
  102. mulsd .L_2il0floatpacket.1(%rip), %xmm0
  103. addsd (%r8), %xmm7
  104. mulsd %xmm5, %xmm8
  105. mulsd %xmm6, %xmm7
  106. addsd %xmm7, %xmm8
  107. movsd %xmm8, -24(%rsp)
  108. movaps %xmm8, %xmm9
  109. movl %r9d, -24(%rsp)
  110. movsd -24(%rsp), %xmm10
  111. mulsd %xmm0, %xmm8
  112. subsd %xmm10, %xmm9
  113. mulsd %xmm11, %xmm10
  114. mulsd %xmm11, %xmm9
  115. movsd %xmm10, -24(%rsp)
  116. addsd %xmm8, %xmm9
  117. addsd %xmm9, %xmm0
  118. addsd %xmm10, %xmm0
  119. addsd %xmm11, %xmm0
  120. ret
  121. ..B1.6:
  122. cmpl $29360128, %edx
  123. jae ..B1.11
  124. ..B1.7:
  125. movsd -8(%rsp), %xmm7
  126. cmpl $2293760, %edx
  127. jae ..B1.9
  128. ..B1.8:
  129. xorl %edx, %edx
  130. movaps %xmm7, %xmm1
  131. movl %edx, -8(%rsp)
  132. lea 8+_ones(%rip), %rax
  133. movsd -8(%rsp), %xmm8
  134. movaps %xmm8, %xmm4
  135. subsd %xmm8, %xmm1
  136. mulsd %xmm8, %xmm4
  137. addsd %xmm8, %xmm7
  138. mulsd %xmm1, %xmm7
  139. addsd (%rax), %xmm4
  140. movaps %xmm4, %xmm0
  141. addsd %xmm7, %xmm0
  142. sqrtsd %xmm0, %xmm0
  143. movsd %xmm0, -16(%rsp)
  144. movl %edx, -16(%rsp)
  145. movsd -16(%rsp), %xmm6
  146. movaps %xmm6, %xmm2
  147. movaps %xmm6, %xmm3
  148. mulsd %xmm6, %xmm2
  149. subsd %xmm6, %xmm0
  150. addsd %xmm6, %xmm8
  151. mulsd %xmm0, %xmm3
  152. subsd %xmm2, %xmm4
  153. movaps %xmm6, %xmm5
  154. subsd %xmm3, %xmm4
  155. addsd %xmm0, %xmm5
  156. addsd %xmm4, %xmm7
  157. divsd %xmm5, %xmm7
  158. addsd %xmm7, %xmm0
  159. mulsd .L_2il0floatpacket.1(%rip), %xmm0
  160. addsd %xmm0, %xmm1
  161. jmp ..B1.10
  162. ..B1.9:
  163. movaps %xmm7, %xmm0
  164. lea 8+_ones(%rip), %rax
  165. movsd .L_2il0floatpacket.0(%rip), %xmm2
  166. mulsd %xmm7, %xmm0
  167. movsd (%rax), %xmm1
  168. addsd %xmm1, %xmm0
  169. sqrtsd %xmm0, %xmm0
  170. addsd %xmm7, %xmm0
  171. mulsd %xmm2, %xmm7
  172. divsd %xmm0, %xmm1
  173. movsd %xmm7, -8(%rsp)
  174. movl $0, -8(%rsp)
  175. movsd -8(%rsp), %xmm8
  176. subsd %xmm8, %xmm7
  177. addsd %xmm7, %xmm1
  178. ..B1.10:
  179. movaps %xmm1, %xmm0
  180. movq $0x3ff0000000000000, %rax
  181. addsd %xmm8, %xmm0
  182. movsd %xmm0, -8(%rsp)
  183. movl $0, -8(%rsp)
  184. movsd -8(%rsp), %xmm7
  185. movzwl -2(%rsp), %ecx
  186. subsd %xmm7, %xmm8
  187. andl $32752, %ecx
  188. addsd %xmm8, %xmm1
  189. shrl $4, %ecx
  190. addl $-1023, %ecx
  191. movslq %ecx, %rdx
  192. negq %rdx
  193. shlq $52, %rdx
  194. addq %rax, %rdx
  195. movq %rdx, -24(%rsp)
  196. movsd -24(%rsp), %xmm2
  197. mulsd %xmm2, %xmm1
  198. mulsd %xmm2, %xmm7
  199. movaps %xmm1, %xmm3
  200. addsd %xmm7, %xmm3
  201. movsd %xmm3, -16(%rsp)
  202. movl -12(%rsp), %esi
  203. shrl $12, %esi
  204. movzbl %sil, %edx
  205. jmp ..B1.12
  206. ..B1.11:
  207. movzwl -2(%rsp), %eax
  208. movl %eax, %ecx
  209. andl $-32753, %eax
  210. andl $32752, %ecx
  211. orl $-49168, %eax
  212. movw %ax, -2(%rsp)
  213. movsd -8(%rsp), %xmm1
  214. movl $0, -8(%rsp)
  215. movl -4(%rsp), %edx
  216. movsd -8(%rsp), %xmm7
  217. shrl $4, %ecx
  218. subsd %xmm7, %xmm1
  219. shrl $12, %edx
  220. addl $-1022, %ecx
  221. movzbl %dl, %edx
  222. ..B1.12:
  223. movq __libm_rcp_table_256@GOTPCREL(%rip), %rsi
  224. pxor %xmm0, %xmm0
  225. lea _ones(%rip), %rdi
  226. lea 32+_PL(%rip), %r8
  227. lea 24+_PL(%rip), %r11
  228. lea 16+_PL(%rip), %r9
  229. lea _PL(%rip), %r10
  230. pxor %xmm6, %xmm6
  231. movq __libm_log_table_256@GOTPCREL(%rip), %rax
  232. movsd (%r8), %xmm5
  233. movsd (%r11), %xmm4
  234. cvtss2sd (%rsi,%rdx,4), %xmm0
  235. cvtsi2sd %ecx, %xmm6
  236. mulsd %xmm0, %xmm7
  237. mulsd %xmm0, %xmm1
  238. subsd (%rdi), %xmm7
  239. movaps %xmm7, %xmm3
  240. lea _LN2(%rip), %rcx
  241. lea 8+_PL(%rip), %rsi
  242. addsd %xmm1, %xmm3
  243. movaps %xmm3, %xmm2
  244. mulsd %xmm3, %xmm2
  245. mulsd %xmm2, %xmm5
  246. mulsd %xmm2, %xmm4
  247. addsd (%r9), %xmm5
  248. addsd (%rsi), %xmm4
  249. mulsd %xmm2, %xmm5
  250. mulsd %xmm2, %xmm4
  251. addsd (%r10), %xmm5
  252. mulsd %xmm3, %xmm4
  253. mulsd %xmm2, %xmm5
  254. movsd (%rcx), %xmm0
  255. addsd %xmm4, %xmm5
  256. mulsd %xmm6, %xmm0
  257. addsd %xmm5, %xmm1
  258. shlq $4, %rdx
  259. lea 8+_LN2(%rip), %rcx
  260. movsd %xmm3, -16(%rsp)
  261. addsd (%rax,%rdx), %xmm0
  262. addsd %xmm1, %xmm0
  263. movsd (%rcx), %xmm1
  264. mulsd %xmm1, %xmm6
  265. addsd 8(%rax,%rdx), %xmm6
  266. movsd %xmm6, -24(%rsp)
  267. addsd %xmm6, %xmm7
  268. movsd %xmm7, -8(%rsp)
  269. addsd %xmm7, %xmm0
  270. ..B1.13:
  271. ret
  272. ..B1.14:
  273. movl %eax, %edx
  274. andl $2147483647, %edx
  275. cmpl $2146435072, %edx
  276. jbe ..B1.16
  277. ..B1.15:
  278. movsd -8(%rsp), %xmm0
  279. addsd %xmm0, %xmm0
  280. ret
  281. ..B1.16:
  282. cmpl $2146435072, %eax
  283. jne ..B1.18
  284. ..B1.17:
  285. cmpl $0, -8(%rsp)
  286. je ..B1.15
  287. ..B1.18:
  288. cmpl $2146435072, %edx
  289. jne ..B1.20
  290. ..B1.19:
  291. cmpl $0, -8(%rsp)
  292. jne ..B1.15
  293. ..B1.20:
  294. lea _infs(%rip), %rax
  295. lea _zeros(%rip), %rdx
  296. movsd (%rax), %xmm0
  297. mulsd (%rdx), %xmm0
  298. ret
  299. .align 16,0x90
  300. .cfi_endproc
  301. .type acosh,@function
  302. .size acosh,.-acosh
  303. .data
  304. # -- End acosh
  305. .section .rodata, "a"
  306. .align 8
  307. .align 8
  308. .L_2il0floatpacket.0:
  309. .long 0x00000000,0x40000000
  310. .type .L_2il0floatpacket.0,@object
  311. .size .L_2il0floatpacket.0,8
  312. .align 8
  313. .L_2il0floatpacket.1:
  314. .long 0x00000000,0x3fe00000
  315. .type .L_2il0floatpacket.1,@object
  316. .size .L_2il0floatpacket.1,8
  317. .align 4
  318. _zeros:
  319. .long 0
  320. .long 0
  321. .long 0
  322. .long 2147483648
  323. .type _zeros,@object
  324. .size _zeros,16
  325. .align 4
  326. _ones:
  327. .long 0
  328. .long 1072693248
  329. .long 0
  330. .long 3220176896
  331. .type _ones,@object
  332. .size _ones,16
  333. .align 4
  334. _PA:
  335. .long 1431655760
  336. .long 3216332117
  337. .long 858954823
  338. .long 1066611507
  339. .long 2956777430
  340. .long 3212237677
  341. .long 4158004615
  342. .long 1063197775
  343. .long 2173429939
  344. .long 3209085130
  345. .type _PA,@object
  346. .size _PA,40
  347. .align 4
  348. _PL:
  349. .long 0
  350. .long 3219128320
  351. .long 1431621855
  352. .long 1070945621
  353. .long 4294842013
  354. .long 3218079743
  355. .long 1289448124
  356. .long 1070176674
  357. .long 2077359316
  358. .long 3217380703
  359. .type _PL,@object
  360. .size _PL,40
  361. .align 4
  362. _LN2:
  363. .long 897137782
  364. .long 1038760431
  365. .long 4276092928
  366. .long 1072049730
  367. .type _LN2,@object
  368. .size _LN2,16
  369. .align 4
  370. _infs:
  371. .long 0
  372. .long 2146435072
  373. .long 0
  374. .long 4293918720
  375. .type _infs,@object
  376. .size _infs,16
  377. .data
  378. .section .note.GNU-stack, ""
  379. // -- Begin DWARF2 SEGMENT .eh_frame
  380. .section .eh_frame,"a",@progbits
  381. .eh_frame_seg:
  382. .align 1
  383. # End