remquo.S 10 KB


  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "remquo.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin remquo
  41. .text
  42. .align 16,0x90
  43. .globl remquo
  44. remquo:
  45. # parameter 1: %xmm0
  46. # parameter 2: %xmm1
  47. # parameter 3: %rdi
  48. ..B1.1:
  49. .cfi_startproc
  50. ..___tag_value_remquo.1:
  51. ..L2:
  52. pushq %rbx
  53. .cfi_def_cfa_offset 16
  54. .cfi_offset 3, -16
  55. pushq %rbp
  56. .cfi_def_cfa_offset 24
  57. .cfi_offset 6, -24
  58. movq $0x7fffffffffffffff, %r8
  59. movd %xmm0, %rdx
  60. movq $0x7fffffffffffffff, %rsi
  61. movd %xmm1, %r9
  62. movsd %xmm0, -32(%rsp)
  63. movsd %xmm1, -40(%rsp)
  64. andq %rdx, %r8
  65. andq %r9, %rsi
  66. sarq $63, %rdx
  67. movq $0x7ff0000000000000, %rcx
  68. sarq $63, %r9
  69. andq %rsi, %rcx
  70. movl $-1, %eax
  71. movl $1, %ebx
  72. cmpl %edx, %r9d
  73. movq $0x7ff0000000000000, %r9
  74. movq %r8, -16(%rsp)
  75. movq %rsi, -8(%rsp)
  76. cmovne %eax, %ebx
  77. xorb %bpl, %bpl
  78. andq %r8, %r9
  79. shrq $52, %r9
  80. shrq $52, %rcx
  81. addl $-1023, %r9d
  82. addl $-1023, %ecx
  83. movq $0, -24(%rsp)
  84. cmpl $1024, %r9d
  85. movsd -16(%rsp), %xmm2
  86. movsd -8(%rsp), %xmm0
  87. je ..B1.5
  88. ..B1.2:
  89. cmpl $1024, %ecx
  90. je ..B1.5
  91. ..B1.3:
  92. testq %r8, %r8
  93. je ..B1.5
  94. ..B1.4:
  95. testq %rsi, %rsi
  96. jne ..B1.6
  97. ..B1.5:
  98. movb $1, %bpl
  99. ..B1.6:
  100. movq $0xfffffffffffff, %r11
  101. movq %r8, %rax
  102. movq %rsi, %r10
  103. andq %r11, %rax
  104. andq %r11, %r10
  105. testb %bpl, %bpl
  106. je ..B1.18
  107. ..B1.7:
  108. movl $0, (%rdi)
  109. testq %rax, %rax
  110. je ..B1.9
  111. ..B1.8:
  112. cmpl $1024, %r9d
  113. je ..B1.47
  114. ..B1.9:
  115. testq %r10, %r10
  116. je ..B1.11
  117. ..B1.10:
  118. cmpl $1024, %ecx
  119. je ..B1.48
  120. ..B1.11:
  121. testq %rsi, %rsi
  122. je ..B1.49
  123. ..B1.12:
  124. testq %r8, %r8
  125. je ..B1.16
  126. ..B1.14:
  127. testq %rax, %rax
  128. jne ..B1.16
  129. ..B1.15:
  130. cmpl $1024, %r9d
  131. je ..B1.49
  132. ..B1.16:
  133. movsd -32(%rsp), %xmm0
  134. ..B1.17:
  135. .cfi_restore 6
  136. popq %rbp
  137. .cfi_def_cfa_offset 16
  138. .cfi_restore 3
  139. popq %rbx
  140. .cfi_def_cfa_offset 8
  141. ret
  142. .cfi_def_cfa_offset 24
  143. .cfi_offset 3, -16
  144. .cfi_offset 6, -24
  145. ..B1.18:
  146. comisd %xmm2, %xmm0
  147. jb ..B1.25
  148. ..B1.19:
  149. ucomisd %xmm0, %xmm2
  150. jp ..B1.20
  151. je ..B1.62
  152. ..B1.20:
  153. cmpl $1023, %r9d
  154. je ..B1.22
  155. ..B1.21:
  156. movaps %xmm2, %xmm1
  157. addsd %xmm2, %xmm1
  158. comisd %xmm1, %xmm0
  159. jae ..B1.24
  160. ..B1.22:
  161. movl %ebx, (%rdi)
  162. ..B1.23:
  163. testl %edx, %edx
  164. subsd %xmm0, %xmm2
  165. movaps %xmm2, %xmm0
  166. xorps .L_2il0floatpacket.1(%rip), %xmm0
  167. jne ..L14
  168. movaps %xmm2, %xmm0
  169. ..L14:
  170. .cfi_restore 6
  171. popq %rbp
  172. .cfi_def_cfa_offset 16
  173. .cfi_restore 3
  174. popq %rbx
  175. .cfi_def_cfa_offset 8
  176. ret
  177. .cfi_def_cfa_offset 24
  178. .cfi_offset 3, -16
  179. .cfi_offset 6, -24
  180. ..B1.24:
  181. movl $0, (%rdi)
  182. movsd -32(%rsp), %xmm0
  183. .cfi_restore 6
  184. popq %rbp
  185. .cfi_def_cfa_offset 16
  186. .cfi_restore 3
  187. popq %rbx
  188. .cfi_def_cfa_offset 8
  189. ret
  190. .cfi_def_cfa_offset 24
  191. .cfi_offset 3, -16
  192. .cfi_offset 6, -24
  193. ..B1.25:
  194. cmpl $-1023, %r9d
  195. je ..B1.58
  196. ..B1.26:
  197. movq $0x10000000000000, %rbp
  198. orq %rbp, %rax
  199. ..B1.27:
  200. cmpl $-1023, %ecx
  201. je ..B1.54
  202. ..B1.28:
  203. movq $0x10000000000000, %rbp
  204. orq %rbp, %r10
  205. ..B1.29:
  206. subl %ecx, %r9d
  207. xorl %ebp, %ebp
  208. xorl %esi, %esi
  209. incl %r9d
  210. je ..B1.35
  211. ..B1.31:
  212. addl %ebp, %ebp
  213. cmpq %r10, %rax
  214. jb ..B1.33
  215. ..B1.32:
  216. subq %r10, %rax
  217. incl %ebp
  218. ..B1.33:
  219. incl %esi
  220. addq %rax, %rax
  221. cmpl %r9d, %esi
  222. jb ..B1.31
  223. ..B1.35:
  224. testq %rax, %rax
  225. je ..B1.63
  226. ..B1.36:
  227. shrq $1, %rax
  228. movq $0xfffffffffffff, %rsi
  229. cmpq %rsi, %rax
  230. ja ..B1.40
  231. ..B1.38:
  232. addq %rax, %rax
  233. decl %ecx
  234. cmpq %rsi, %rax
  235. jbe ..B1.38
  236. ..B1.40:
  237. cmpl $-1022, %ecx
  238. jl ..B1.42
  239. ..B1.41:
  240. movslq %ecx, %rcx
  241. movq $0xfffffffffffff, %rsi
  242. andq %rax, %rsi
  243. addq $1023, %rcx
  244. jmp ..B1.43
  245. ..B1.42:
  246. negl %ecx
  247. movq %rax, %rsi
  248. addl $2, %ecx
  249. shrq %cl, %rsi
  250. xorl %ecx, %ecx
  251. ..B1.43:
  252. shlq $52, %rcx
  253. orq %rsi, %rcx
  254. movq %rcx, -40(%rsp)
  255. movsd -40(%rsp), %xmm2
  256. movaps %xmm2, %xmm1
  257. addsd %xmm2, %xmm1
  258. comisd %xmm0, %xmm1
  259. jb ..B1.46
  260. ..B1.44:
  261. ucomisd %xmm1, %xmm0
  262. jp ..B1.45
  263. je ..B1.52
  264. ..B1.45:
  265. incl %ebp
  266. subsd %xmm0, %xmm2
  267. ..B1.46:
  268. andl $2147483647, %ebp
  269. movaps %xmm2, %xmm0
  270. imull %ebp, %ebx
  271. testl %edx, %edx
  272. xorps .L_2il0floatpacket.1(%rip), %xmm0
  273. movl %ebx, (%rdi)
  274. jne ..L29
  275. movaps %xmm2, %xmm0
  276. ..L29:
  277. .cfi_restore 6
  278. popq %rbp
  279. .cfi_def_cfa_offset 16
  280. .cfi_restore 3
  281. popq %rbx
  282. .cfi_def_cfa_offset 8
  283. ret
  284. .cfi_def_cfa_offset 24
  285. .cfi_offset 3, -16
  286. .cfi_offset 6, -24
  287. ..B1.47:
  288. movsd -32(%rsp), %xmm0
  289. mulsd .L_2il0floatpacket.0(%rip), %xmm0
  290. .cfi_restore 6
  291. popq %rbp
  292. .cfi_def_cfa_offset 16
  293. .cfi_restore 3
  294. popq %rbx
  295. .cfi_def_cfa_offset 8
  296. ret
  297. .cfi_def_cfa_offset 24
  298. .cfi_offset 3, -16
  299. .cfi_offset 6, -24
  300. ..B1.48:
  301. movsd -40(%rsp), %xmm0
  302. mulsd .L_2il0floatpacket.0(%rip), %xmm0
  303. .cfi_restore 6
  304. popq %rbp
  305. .cfi_def_cfa_offset 16
  306. .cfi_restore 3
  307. popq %rbx
  308. .cfi_def_cfa_offset 8
  309. ret
  310. .cfi_def_cfa_offset 24
  311. .cfi_offset 3, -16
  312. .cfi_offset 6, -24
  313. ..B1.49:
  314. movsd -24(%rsp), %xmm0
  315. movsd -24(%rsp), %xmm1
  316. divsd %xmm1, %xmm0
  317. .cfi_restore 6
  318. popq %rbp
  319. .cfi_def_cfa_offset 16
  320. .cfi_restore 3
  321. popq %rbx
  322. .cfi_def_cfa_offset 8
  323. ret
  324. .cfi_def_cfa_offset 24
  325. .cfi_offset 3, -16
  326. .cfi_offset 6, -24
  327. ..B1.52:
  328. testl $1, %ebp
  329. je ..B1.46
  330. ..B1.53:
  331. xorps .L_2il0floatpacket.1(%rip), %xmm2
  332. incl %ebp
  333. jmp ..B1.46
  334. ..B1.54:
  335. testq %r10, %r10
  336. je ..B1.28
  337. ..B1.55:
  338. movl $-1022, %ecx
  339. movq $0xfffffffffffff, %rbp
  340. ..B1.56:
  341. addq %r10, %r10
  342. decl %ecx
  343. cmpq %rbp, %r10
  344. jbe ..B1.56
  345. jmp ..B1.29
  346. ..B1.58:
  347. testq %rax, %rax
  348. je ..B1.26
  349. ..B1.59:
  350. movl $-1022, %r9d
  351. movq $0xfffffffffffff, %rbp
  352. ..B1.60:
  353. addq %rax, %rax
  354. decl %r9d
  355. cmpq %rbp, %rax
  356. jbe ..B1.60
  357. jmp ..B1.27
  358. ..B1.62:
  359. movl %ebx, (%rdi)
  360. movsd -24(%rsp), %xmm0
  361. mulsd -32(%rsp), %xmm0
  362. .cfi_restore 6
  363. popq %rbp
  364. .cfi_def_cfa_offset 16
  365. .cfi_restore 3
  366. popq %rbx
  367. .cfi_def_cfa_offset 8
  368. ret
  369. .cfi_def_cfa_offset 24
  370. .cfi_offset 3, -16
  371. .cfi_offset 6, -24
  372. ..B1.63:
  373. andl $2147483647, %ebp
  374. imull %ebp, %ebx
  375. movl %ebx, (%rdi)
  376. movsd -24(%rsp), %xmm0
  377. mulsd -32(%rsp), %xmm0
  378. movq %rax, -40(%rsp)
  379. .cfi_restore 6
  380. popq %rbp
  381. .cfi_def_cfa_offset 16
  382. .cfi_restore 3
  383. popq %rbx
  384. .cfi_def_cfa_offset 8
  385. ret
  386. .align 16,0x90
  387. .cfi_endproc
  388. .type remquo,@function
  389. .size remquo,.-remquo
  390. .data
  391. # -- End remquo
  392. .section .rodata, "a"
  393. .align 16
  394. .align 16
  395. .L_2il0floatpacket.1:
  396. .long 0x00000000,0x80000000,0x00000000,0x00000000
  397. .type .L_2il0floatpacket.1,@object
  398. .size .L_2il0floatpacket.1,16
  399. .align 8
  400. .L_2il0floatpacket.0:
  401. .long 0x33333333,0x3ffb3333
  402. .type .L_2il0floatpacket.0,@object
  403. .size .L_2il0floatpacket.0,8
  404. .data
  405. .section .note.GNU-stack, ""
  406. // -- Begin DWARF2 SEGMENT .eh_frame
  407. .section .eh_frame,"a",@progbits
  408. .eh_frame_seg:
  409. .align 1
  410. # End