libm_reduce_pi04d.S 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "libm_reduce_pi04d.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin __libm_reduce_pi04d
  41. .text
  42. .align 16,0x90
  43. .globl __libm_reduce_pi04d
  44. __libm_reduce_pi04d:
  45. # parameter 1: %xmm0
  46. # parameter 2: %rdi
  47. # parameter 3: %esi
  48. ..B1.1:
  49. .cfi_startproc
  50. ..___tag_value___libm_reduce_pi04d.1:
  51. ..L2:
  52. subq $24, %rsp
  53. .cfi_def_cfa_offset 32
  54. movl %esi, %r8d
  55. movsd %xmm0, (%rsp)
  56. xorb %sil, %sil
  57. ..B1.2:
  58. fnstcw 10(%rsp)
  59. ..B1.3:
  60. movzwl 10(%rsp), %edx
  61. movl %edx, %eax
  62. andl $768, %eax
  63. cmpl $768, %eax
  64. je ..B1.7
  65. ..B1.4:
  66. orl $-64768, %edx
  67. movw %dx, 8(%rsp)
  68. ..B1.5:
  69. fldcw 8(%rsp)
  70. ..B1.6:
  71. movb $1, %sil
  72. ..B1.7:
  73. movzwl 6(%rsp), %r10d
  74. movl %r10d, %ecx
  75. andl $32752, %ecx
  76. movl $1374389535, %eax
  77. shrl $4, %ecx
  78. andl $-32753, %r10d
  79. lea -1052(%rcx), %r11d
  80. imull %r11d
  81. sarl $31, %r11d
  82. lea -200(%rcx), %r9d
  83. sarl $3, %edx
  84. andl $2047, %r9d
  85. subl %r11d, %edx
  86. imull $-25, %edx, %eax
  87. shll $4, %r9d
  88. lea -1052(%rax,%rcx), %ecx
  89. orl %r9d, %r10d
  90. movw %r10w, 6(%rsp)
  91. movsd (%rsp), %xmm2
  92. andl $-134217728, (%rsp)
  93. movsd (%rsp), %xmm1
  94. cmpl $17, %ecx
  95. subsd %xmm1, %xmm2
  96. jl ..B1.9
  97. ..B1.8:
  98. movslq %edx, %rdx
  99. lea _DP(%rip), %r9
  100. movsd (%r9,%rdx,8), %xmm7
  101. movsd 8(%r9,%rdx,8), %xmm0
  102. incl %edx
  103. mulsd %xmm2, %xmm7
  104. mulsd %xmm1, %xmm0
  105. addsd %xmm0, %xmm7
  106. movsd %xmm7, (%rsp)
  107. andl $-1048576, (%rsp)
  108. subsd (%rsp), %xmm7
  109. jmp ..B1.10
  110. ..B1.9:
  111. lea _DP(%rip), %r9
  112. pxor %xmm7, %xmm7
  113. ..B1.10:
  114. movslq %edx, %rdx
  115. movaps %xmm1, %xmm3
  116. movaps %xmm1, %xmm5
  117. lea zero_none(%rip), %r11
  118. movaps %xmm1, %xmm9
  119. movaps %xmm1, %xmm11
  120. movaps %xmm1, %xmm13
  121. movsd (%r9,%rdx,8), %xmm4
  122. movsd 8(%r9,%rdx,8), %xmm0
  123. mulsd %xmm2, %xmm4
  124. mulsd %xmm0, %xmm3
  125. mulsd %xmm2, %xmm0
  126. addsd %xmm3, %xmm4
  127. movsd 16(%r9,%rdx,8), %xmm8
  128. addsd %xmm4, %xmm7
  129. mulsd %xmm8, %xmm5
  130. mulsd %xmm2, %xmm8
  131. addsd %xmm5, %xmm0
  132. movaps %xmm7, %xmm6
  133. movsd 24(%r9,%rdx,8), %xmm10
  134. addsd %xmm0, %xmm6
  135. movsd %xmm6, (%rsp)
  136. movzwl 6(%rsp), %ecx
  137. shrl $4, %ecx
  138. movd %xmm6, %r10
  139. negl %ecx
  140. movsd 32(%r9,%rdx,8), %xmm12
  141. movsd 40(%r9,%rdx,8), %xmm14
  142. addl $51, %ecx
  143. sarq %cl, %r10
  144. movl %r10d, %eax
  145. shlq %cl, %r10
  146. addl %r8d, %eax
  147. movq %r10, (%rsp)
  148. movl %eax, %r8d
  149. andl $1, %r8d
  150. subsd (%rsp), %xmm7
  151. mulsd %xmm12, %xmm9
  152. mulsd %xmm2, %xmm12
  153. mulsd %xmm14, %xmm11
  154. addsd (%r11,%r8,8), %xmm7
  155. mulsd %xmm2, %xmm14
  156. addsd %xmm7, %xmm0
  157. addsd %xmm11, %xmm12
  158. movaps %xmm1, %xmm7
  159. testb %sil, %sil
  160. mulsd %xmm10, %xmm7
  161. mulsd %xmm2, %xmm10
  162. addsd %xmm7, %xmm8
  163. addsd %xmm9, %xmm10
  164. addsd %xmm8, %xmm0
  165. movsd 48(%r9,%rdx,8), %xmm15
  166. addsd %xmm10, %xmm0
  167. mulsd %xmm15, %xmm13
  168. mulsd %xmm15, %xmm2
  169. addsd %xmm12, %xmm0
  170. addsd %xmm13, %xmm14
  171. movsd 56(%r9,%rdx,8), %xmm3
  172. addsd %xmm14, %xmm0
  173. mulsd %xmm3, %xmm1
  174. addsd %xmm1, %xmm2
  175. addsd %xmm2, %xmm0
  176. mulsd .L_2il0floatpacket.0(%rip), %xmm0
  177. movsd %xmm0, (%rdi)
  178. je ..B1.12
  179. ..B1.11:
  180. fldcw 10(%rsp)
  181. ..B1.12:
  182. addq $24, %rsp
  183. .cfi_def_cfa_offset 8
  184. ret
  185. .align 16,0x90
  186. .cfi_endproc
  187. .type __libm_reduce_pi04d,@function
  188. .size __libm_reduce_pi04d,.-__libm_reduce_pi04d
  189. .data
  190. # -- End __libm_reduce_pi04d
  191. .section .rodata, "a"
  192. .align 8
  193. .align 8
  194. .L_2il0floatpacket.0:
  195. .long 0x54442d18,0x3fe921fb
  196. .type .L_2il0floatpacket.0,@object
  197. .size .L_2il0floatpacket.0,8
  198. .align 8
  199. zero_none:
  200. .long 0x00000000,0x00000000
  201. .long 0x00000000,0xbff00000
  202. .type zero_none,@object
  203. .size zero_none,16
  204. .align 4
  205. _DP:
  206. .long 0
  207. .long 0
  208. .long 1610612736
  209. .long 1282694960
  210. .long 0
  211. .long 1256952721
  212. .long 536870912
  213. .long 1229269500
  214. .long 3221225472
  215. .long 1202544455
  216. .long 0
  217. .long 1176818551
  218. .long 2147483648
  219. .long 1148939346
  220. .long 536870912
  221. .long 1124701124
  222. .long 3758096384
  223. .long 1099498527
  224. .long 3758096384
  225. .long 1071929578
  226. .long 1342177280
  227. .long 1046982385
  228. .long 805306368
  229. .long 1020320658
  230. .long 2147483648
  231. .long 993817732
  232. .long 0
  233. .long 968598976
  234. .long 2684354560
  235. .long 942220475
  236. .long 2415919104
  237. .long 915426956
  238. .long 0
  239. .long 885849629
  240. .long 536870912
  241. .long 863855510
  242. .long 1610612736
  243. .long 836031391
  244. .long 4026531840
  245. .long 810828058
  246. .long 1073741824
  247. .long 784674491
  248. .long 1610612736
  249. .long 757207974
  250. .long 3489660928
  251. .long 732020890
  252. .long 0
  253. .long 703061904
  254. .long 1610612736
  255. .long 679713053
  256. .long 2147483648
  257. .long 652001705
  258. .long 1073741824
  259. .long 626850382
  260. .long 2147483648
  261. .long 597786158
  262. .long 805306368
  263. .long 575535400
  264. .long 536870912
  265. .long 548814833
  266. .long 268435456
  267. .long 523239288
  268. .long 3758096384
  269. .long 495550718
  270. .long 2952790016
  271. .long 469954840
  272. .long 1073741824
  273. .long 442925723
  274. .long 1073741824
  275. .long 416247094
  276. .long 3758096384
  277. .long 392128403
  278. .long 2147483648
  279. .long 364254062
  280. .long 3221225472
  281. .long 339643518
  282. .long 2684354560
  283. .long 313162111
  284. .long 805306368
  285. .long 286354345
  286. .long 2952790016
  287. .long 260811902
  288. .long 1610612736
  289. .long 234667567
  290. .long 3758096384
  291. .long 207520668
  292. .long 1073741824
  293. .long 182175017
  294. .long 4026531840
  295. .long 155380331
  296. .long 805306368
  297. .long 129417058
  298. .long 536870912
  299. .long 103691636
  300. .long 0
  301. .long 73760972
  302. .long 3221225472
  303. .long 48348958
  304. .type _DP,@object
  305. .size _DP,392
  306. .data
  307. .section .note.GNU-stack, ""
  308. // -- Begin DWARF2 SEGMENT .eh_frame
  309. .section .eh_frame,"a",@progbits
  310. .eh_frame_seg:
  311. .align 1
  312. # End