libm_sincos_k32.S 9.6 KB


  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "libm_sincos_k32.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin __libm_sincos_k32
  41. .text
  42. .align 16,0x90
  43. .globl __libm_sincos_k32
  44. __libm_sincos_k32:
  45. # parameter 1: %xmm0
  46. # parameter 2: %rdi
  47. # parameter 3: %rsi
  48. # parameter 4: %edx
  49. ..B1.1:
  50. .cfi_startproc
  51. ..___tag_value___libm_sincos_k32.1:
  52. ..L2:
  53. pushq %r12
  54. .cfi_def_cfa_offset 16
  55. .cfi_offset 12, -16
  56. pushq %rbx
  57. .cfi_def_cfa_offset 24
  58. .cfi_offset 3, -24
  59. pushq %rbp
  60. .cfi_def_cfa_offset 32
  61. .cfi_offset 6, -32
  62. subq $16, %rsp
  63. .cfi_def_cfa_offset 48
  64. lea iones(%rip), %rax
  65. movsd %xmm0, 8(%rsp)
  66. movl %edx, %ecx
  67. movl 12(%rsp), %r12d
  68. movl %r12d, %r10d
  69. shrl $31, %r12d
  70. andl $2147483647, %r10d
  71. imull (%rax,%r12,4), %ecx
  72. movq %rsi, %rbp
  73. andps .L_2il0floatpacket.2(%rip), %xmm0
  74. movq %rdi, %rbx
  75. cmpl $1103101952, %r10d
  76. jae ..B1.9
  77. ..B1.2:
  78. cmpl $1073283072, %r10d
  79. ja ..B1.6
  80. ..B1.3:
  81. incl %ecx
  82. movl %ecx, %edx
  83. andl $2, %edx
  84. je ..B1.5
  85. ..B1.4:
  86. subsd .L_2il0floatpacket.0(%rip), %xmm0
  87. movsd %xmm0, 8(%rsp)
  88. jmp ..B1.11
  89. ..B1.5:
  90. movsd %xmm0, 8(%rsp)
  91. jmp ..B1.11
  92. ..B1.6:
  93. lea _INV_PI04(%rip), %rax
  94. lea _TWO_52H(%rip), %rdx
  95. pxor %xmm2, %xmm2
  96. pxor %xmm5, %xmm5
  97. movsd %xmm0, 8(%rsp)
  98. movsd (%rax), %xmm3
  99. mulsd %xmm0, %xmm3
  100. movsd (%rdx), %xmm1
  101. addsd %xmm3, %xmm1
  102. movsd %xmm1, (%rsp)
  103. movl (%rsp), %r9d
  104. cvtsi2sd %r9d, %xmm2
  105. cmpltsd %xmm2, %xmm3
  106. movd %xmm3, %edi
  107. negl %edi
  108. subl %edi, %r9d
  109. addl %r9d, %ecx
  110. movl %ecx, %r8d
  111. andl $1, %r8d
  112. addl %r8d, %r9d
  113. addl %r8d, %ecx
  114. cvtsi2sd %r9d, %xmm5
  115. cmpl $1090519040, %r10d
  116. jae ..B1.8
  117. ..B1.7:
  118. lea _DP2(%rip), %rax
  119. lea 8+_DP2(%rip), %rdx
  120. lea 16+_DP2(%rip), %rdi
  121. movsd (%rax), %xmm1
  122. mulsd %xmm5, %xmm1
  123. movsd (%rdx), %xmm2
  124. movl %ecx, %edx
  125. mulsd %xmm5, %xmm2
  126. subsd %xmm1, %xmm0
  127. movsd (%rdi), %xmm3
  128. andl $2, %edx
  129. mulsd %xmm3, %xmm5
  130. subsd %xmm2, %xmm0
  131. subsd %xmm5, %xmm0
  132. movsd %xmm0, 8(%rsp)
  133. jmp ..B1.11
  134. ..B1.8:
  135. lea _DP3(%rip), %rax
  136. lea 8+_DP3(%rip), %rdx
  137. movsd 8(%rsp), %xmm0
  138. lea 16+_DP3(%rip), %rdi
  139. lea 24+_DP3(%rip), %r8
  140. movsd (%rax), %xmm1
  141. movsd (%rdx), %xmm2
  142. movl %ecx, %edx
  143. mulsd %xmm5, %xmm1
  144. andl $2, %edx
  145. mulsd %xmm5, %xmm2
  146. subsd %xmm1, %xmm0
  147. movsd (%rdi), %xmm3
  148. subsd %xmm2, %xmm0
  149. mulsd %xmm5, %xmm3
  150. movsd (%r8), %xmm4
  151. subsd %xmm3, %xmm0
  152. mulsd %xmm4, %xmm5
  153. subsd %xmm5, %xmm0
  154. movsd %xmm0, 8(%rsp)
  155. jmp ..B1.11
  156. ..B1.9:
  157. movl %ecx, %esi
  158. lea 8(%rsp), %rdi
  159. movsd %xmm0, (%rdi)
  160. ..___tag_value___libm_sincos_k32.10:
  161. call __libm_reduce_pi04d@PLT
  162. ..___tag_value___libm_sincos_k32.11:
  163. ..B1.17:
  164. movl %eax, %ecx
  165. ..B1.10:
  166. incl %ecx
  167. movl %ecx, %edx
  168. movsd 8(%rsp), %xmm0
  169. andl $2, %edx
  170. ..B1.11:
  171. movaps %xmm0, %xmm2
  172. lea 24+_CP(%rip), %rsi
  173. mulsd %xmm0, %xmm2
  174. lea 8+_CP(%rip), %rdi
  175. movaps %xmm2, %xmm1
  176. lea 24+_SP(%rip), %r8
  177. mulsd %xmm2, %xmm1
  178. lea 32+_CP(%rip), %r10
  179. movsd (%rsi), %xmm6
  180. lea 32+_SP(%rip), %rsi
  181. mulsd %xmm1, %xmm6
  182. lea 8+_SP(%rip), %r9
  183. movsd (%rsi), %xmm4
  184. lea 16+_CP(%rip), %r11
  185. mulsd %xmm1, %xmm4
  186. addsd (%rdi), %xmm6
  187. lea 16+_SP(%rip), %rdi
  188. lea 2(%rcx), %eax
  189. movsd (%r8), %xmm5
  190. lea _SP(%rip), %r8
  191. movsd (%r10), %xmm3
  192. mulsd %xmm1, %xmm5
  193. addsd (%rdi), %xmm4
  194. mulsd %xmm1, %xmm3
  195. mulsd %xmm1, %xmm6
  196. addsd (%r9), %xmm5
  197. mulsd %xmm1, %xmm4
  198. addsd (%r11), %xmm3
  199. addsd .L_2il0floatpacket.1(%rip), %xmm6
  200. mulsd %xmm1, %xmm5
  201. addsd (%r8), %xmm4
  202. mulsd %xmm1, %xmm3
  203. mulsd %xmm0, %xmm5
  204. mulsd %xmm2, %xmm4
  205. addsd %xmm0, %xmm5
  206. mulsd %xmm0, %xmm4
  207. shrl $2, %ecx
  208. addsd %xmm4, %xmm5
  209. andl $1, %ecx
  210. xorq %rcx, %r12
  211. lea _CP(%rip), %rcx
  212. shrl $2, %eax
  213. andl $1, %eax
  214. testl %edx, %edx
  215. addsd (%rcx), %xmm3
  216. lea ones(%rip), %rcx
  217. mulsd %xmm2, %xmm3
  218. movsd (%rcx,%r12,8), %xmm0
  219. addsd %xmm3, %xmm6
  220. je ..B1.13
  221. ..B1.12:
  222. mulsd %xmm0, %xmm6
  223. movsd (%rcx,%rax,8), %xmm0
  224. mulsd %xmm0, %xmm5
  225. movsd %xmm6, (%rbx)
  226. movsd %xmm5, (%rbp)
  227. addq $16, %rsp
  228. .cfi_def_cfa_offset 32
  229. .cfi_restore 6
  230. popq %rbp
  231. .cfi_def_cfa_offset 24
  232. .cfi_restore 3
  233. popq %rbx
  234. .cfi_def_cfa_offset 16
  235. .cfi_restore 12
  236. popq %r12
  237. .cfi_def_cfa_offset 8
  238. ret
  239. .cfi_def_cfa_offset 48
  240. .cfi_offset 3, -24
  241. .cfi_offset 6, -32
  242. .cfi_offset 12, -16
  243. ..B1.13:
  244. mulsd %xmm5, %xmm0
  245. movsd %xmm0, (%rbx)
  246. movsd (%rcx,%rax,8), %xmm0
  247. mulsd %xmm0, %xmm6
  248. movsd %xmm6, (%rbp)
  249. ..B1.14:
  250. addq $16, %rsp
  251. .cfi_def_cfa_offset 32
  252. .cfi_restore 6
  253. popq %rbp
  254. .cfi_def_cfa_offset 24
  255. .cfi_restore 3
  256. popq %rbx
  257. .cfi_def_cfa_offset 16
  258. .cfi_restore 12
  259. popq %r12
  260. .cfi_def_cfa_offset 8
  261. ret
  262. .align 16,0x90
  263. .cfi_endproc
  264. .type __libm_sincos_k32,@function
  265. .size __libm_sincos_k32,.-__libm_sincos_k32
  266. .data
  267. # -- End __libm_sincos_k32
  268. .section .rodata, "a"
  269. .align 16
  270. .align 16
  271. .L_2il0floatpacket.2:
  272. .long 0xffffffff,0x7fffffff,0x00000000,0x00000000
  273. .type .L_2il0floatpacket.2,@object
  274. .size .L_2il0floatpacket.2,16
  275. .align 8
  276. .L_2il0floatpacket.0:
  277. .long 0x54442d18,0x3fe921fb
  278. .type .L_2il0floatpacket.0,@object
  279. .size .L_2il0floatpacket.0,8
  280. .align 8
  281. .L_2il0floatpacket.1:
  282. .long 0x00000000,0x3ff00000
  283. .type .L_2il0floatpacket.1,@object
  284. .size .L_2il0floatpacket.1,8
  285. .align 8
  286. ones:
  287. .long 0x00000000,0x3ff00000
  288. .long 0x00000000,0xbff00000
  289. .type ones,@object
  290. .size ones,16
  291. .align 4
  292. iones:
  293. .long 1
  294. .long -1
  295. .type iones,@object
  296. .size iones,8
  297. .align 4
  298. _INV_PI04:
  299. .long 1841940611
  300. .long 1072979760
  301. .type _INV_PI04,@object
  302. .size _INV_PI04,8
  303. .align 4
  304. _TWO_52H:
  305. .long 0
  306. .long 1127743488
  307. .type _TWO_52H,@object
  308. .size _TWO_52H,8
  309. .align 4
  310. _DP2:
  311. .long 1413742592
  312. .long 1072243195
  313. .long 1279262720
  314. .long 1031179299
  315. .long 1880851354
  316. .long 996723793
  317. .type _DP2,@object
  318. .size _DP2,24
  319. .align 4
  320. _DP3:
  321. .long 1073741824
  322. .long 1072243195
  323. .long 0
  324. .long 1046758445
  325. .long 2147483648
  326. .long 1021855384
  327. .long 1880851354
  328. .long 996723793
  329. .type _DP3,@object
  330. .size _DP3,32
  331. .align 4
  332. _CP:
  333. .long 4294960802
  334. .long 3219128319
  335. .long 1427442001
  336. .long 1067799893
  337. .long 2926645240
  338. .long 3210133867
  339. .long 2571283200
  340. .long 1056571689
  341. .long 2069816734
  342. .long 3197257552
  343. .type _CP,@object
  344. .size _CP,40
  345. .align 4
  346. _SP:
  347. .long 1431654765
  348. .long 3217380693
  349. .long 285032968
  350. .long 1065423121
  351. .long 3653044354
  352. .long 3207201183
  353. .long 2777006020
  354. .long 1053236634
  355. .long 43514947
  356. .long 3193610888
  357. .type _SP,@object
  358. .size _SP,40
  359. .data
  360. .section .note.GNU-stack, ""
  361. // -- Begin DWARF2 SEGMENT .eh_frame
  362. .section .eh_frame,"a",@progbits
  363. .eh_frame_seg:
  364. .align 1
  365. # End