nearbyint_gen.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "nearbyint_gen.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin nearbyint
  41. .text
  42. .align 16,0x90
  43. .globl nearbyint
  44. nearbyint:
  45. # parameter 1: %xmm0
  46. ..B1.1:
  47. .cfi_startproc
  48. ..___tag_value_nearbyint.1:
  49. ..L2:
  50. subq $24, %rsp
  51. .cfi_def_cfa_offset 32
  52. movsd %xmm0, (%rsp)
  53. ..B1.2:
  54. stmxcsr 8(%rsp)
  55. ..B1.3:
  56. movzwl 6(%rsp), %esi
  57. andl $32752, %esi
  58. shrl $4, %esi
  59. cmpl $1075, %esi
  60. jge ..B1.62
  61. ..B1.4:
  62. movl 8(%rsp), %eax
  63. andl $24576, %eax
  64. cmpl $8192, %eax
  65. je ..B1.49
  66. ..B1.5:
  67. cmpl $16384, %eax
  68. jne ..B1.19
  69. ..B1.6:
  70. cmpl $1023, %esi
  71. jl ..B1.14
  72. ..B1.7:
  73. cmpl $1043, %esi
  74. jle ..B1.9
  75. ..B1.8:
  76. negl %esi
  77. movl $-1, %edi
  78. addl $19, %esi
  79. movl %esi, %ecx
  80. shll %cl, %edi
  81. movl (%rsp), %eax
  82. andl %eax, %edi
  83. movl %edi, (%rsp)
  84. xorl %eax, %edi
  85. jmp ..B1.10
  86. ..B1.9:
  87. negl %esi
  88. movl $1048575, %eax
  89. addl $19, %esi
  90. movl 4(%rsp), %edx
  91. movl %edx, %edi
  92. movl %esi, %ecx
  93. andl $1048575, %edi
  94. shll %cl, %eax
  95. andl $-1048576, %edx
  96. andl %edi, %eax
  97. orl %eax, %edx
  98. movl %edx, 4(%rsp)
  99. andl $1048575, %edx
  100. xorl %edx, %edi
  101. orl (%rsp), %edi
  102. movl $0, (%rsp)
  103. ..B1.10:
  104. testb $-128, 7(%rsp)
  105. jne ..B1.13
  106. ..B1.11:
  107. testl %edi, %edi
  108. je ..B1.13
  109. ..B1.12:
  110. lea _ones(%rip), %rax
  111. movsd (%rsp), %xmm0
  112. addsd (%rax), %xmm0
  113. addq $24, %rsp
  114. .cfi_def_cfa_offset 8
  115. ret
  116. .cfi_def_cfa_offset 32
  117. ..B1.13:
  118. movsd (%rsp), %xmm0
  119. addq $24, %rsp
  120. .cfi_def_cfa_offset 8
  121. ret
  122. .cfi_def_cfa_offset 32
  123. ..B1.14:
  124. testl %esi, %esi
  125. jne ..B1.18
  126. ..B1.15:
  127. testl $1048575, 4(%rsp)
  128. jne ..B1.18
  129. ..B1.16:
  130. cmpl $0, (%rsp)
  131. je ..B1.13
  132. ..B1.18:
  133. movb 7(%rsp), %al
  134. lea _pone_nzero(%rip), %rdx
  135. andb $-128, %al
  136. shrb $7, %al
  137. movzbl %al, %ecx
  138. movsd (%rdx,%rcx,8), %xmm0
  139. addq $24, %rsp
  140. .cfi_def_cfa_offset 8
  141. ret
  142. .cfi_def_cfa_offset 32
  143. ..B1.19:
  144. cmpl $24576, %eax
  145. jne ..B1.26
  146. ..B1.20:
  147. cmpl $1023, %esi
  148. jl ..B1.25
  149. ..B1.21:
  150. cmpl $1043, %esi
  151. jle ..B1.23
  152. ..B1.22:
  153. negl %esi
  154. movl $-1, %eax
  155. addl $19, %esi
  156. movl %esi, %ecx
  157. shll %cl, %eax
  158. andl (%rsp), %eax
  159. jmp ..B1.24
  160. ..B1.23:
  161. negl %esi
  162. movl $1048575, %eax
  163. addl $19, %esi
  164. movl %esi, %ecx
  165. shll %cl, %eax
  166. movl 4(%rsp), %edx
  167. movl %edx, %edi
  168. andl %eax, %edx
  169. andl $-1048576, %edi
  170. andl $1048575, %edx
  171. xorl %eax, %eax
  172. orl %edx, %edi
  173. movl %edi, 4(%rsp)
  174. ..B1.24:
  175. movl %eax, (%rsp)
  176. movsd (%rsp), %xmm0
  177. addq $24, %rsp
  178. .cfi_def_cfa_offset 8
  179. ret
  180. .cfi_def_cfa_offset 32
  181. ..B1.25:
  182. movb 7(%rsp), %al
  183. lea _zeros(%rip), %rdx
  184. andb $-128, %al
  185. shrb $7, %al
  186. movzbl %al, %ecx
  187. movsd (%rdx,%rcx,8), %xmm0
  188. addq $24, %rsp
  189. .cfi_def_cfa_offset 8
  190. ret
  191. .cfi_def_cfa_offset 32
  192. ..B1.26:
  193. cmpl $1023, %esi
  194. jl ..B1.42
  195. ..B1.27:
  196. movl (%rsp), %eax
  197. cmpl $1043, %esi
  198. jl ..B1.29
  199. ..B1.28:
  200. movl %eax, %edx
  201. lea 13(%rsi), %ecx
  202. shll %cl, %edx
  203. xorl %r9d, %r9d
  204. jmp ..B1.30
  205. ..B1.29:
  206. movl 4(%rsp), %edx
  207. lea 13(%rsi), %ecx
  208. andl $1048575, %edx
  209. movl %eax, %r9d
  210. shll %cl, %edx
  211. ..B1.30:
  212. cmpl $1043, %esi
  213. jle ..B1.32
  214. ..B1.31:
  215. negl %esi
  216. movl $-1, %edi
  217. addl $19, %esi
  218. movl %esi, %ecx
  219. shll %cl, %edi
  220. andl %edi, %eax
  221. jmp ..B1.33
  222. ..B1.32:
  223. negl %esi
  224. movl $1048575, %eax
  225. addl $19, %esi
  226. movl %esi, %ecx
  227. shll %cl, %eax
  228. movl 4(%rsp), %edi
  229. movl %edi, %r8d
  230. andl %eax, %edi
  231. andl $-1048576, %r8d
  232. andl $1048575, %edi
  233. xorl %eax, %eax
  234. orl %edi, %r8d
  235. movl %r8d, 4(%rsp)
  236. ..B1.33:
  237. movl %eax, (%rsp)
  238. testl %edx, %edx
  239. jge ..B1.13
  240. ..B1.34:
  241. andl $2147483647, %edx
  242. orl %r9d, %edx
  243. jne ..B1.37
  244. ..B1.35:
  245. movzwl 6(%rsp), %ecx
  246. andl $32752, %ecx
  247. shrl $4, %ecx
  248. cmpl $1043, %ecx
  249. jle ..B1.38
  250. ..B1.36:
  251. negl %ecx
  252. movl $1, %edx
  253. addl $19, %ecx
  254. shll %cl, %edx
  255. testl %edx, %eax
  256. je ..B1.13
  257. ..B1.37:
  258. movb 7(%rsp), %al
  259. lea _ones(%rip), %rdx
  260. andb $-128, %al
  261. shrb $7, %al
  262. movsd (%rsp), %xmm0
  263. movzbl %al, %ecx
  264. addsd (%rdx,%rcx,8), %xmm0
  265. addq $24, %rsp
  266. .cfi_def_cfa_offset 8
  267. ret
  268. .cfi_def_cfa_offset 32
  269. ..B1.38:
  270. negl %ecx
  271. movl $1, %eax
  272. addl $19, %ecx
  273. shll %cl, %eax
  274. testl %eax, 4(%rsp)
  275. je ..B1.13
  276. ..B1.39:
  277. movb 7(%rsp), %al
  278. lea _ones(%rip), %rdx
  279. andb $-128, %al
  280. shrb $7, %al
  281. movsd (%rsp), %xmm0
  282. movzbl %al, %ecx
  283. addsd (%rdx,%rcx,8), %xmm0
  284. addq $24, %rsp
  285. .cfi_def_cfa_offset 8
  286. ret
  287. .cfi_def_cfa_offset 32
  288. ..B1.42:
  289. cmpl $1022, %esi
  290. jg ..B1.48
  291. ..B1.43:
  292. jne ..B1.25
  293. ..B1.44:
  294. movl 4(%rsp), %eax
  295. andl $1048575, %eax
  296. jg ..B1.48
  297. ..B1.45:
  298. jne ..B1.25
  299. ..B1.46:
  300. cmpl $0, (%rsp)
  301. jbe ..B1.25
  302. ..B1.48:
  303. movb 7(%rsp), %al
  304. lea _ones(%rip), %rdx
  305. andb $-128, %al
  306. shrb $7, %al
  307. movzbl %al, %ecx
  308. movsd (%rdx,%rcx,8), %xmm0
  309. addq $24, %rsp
  310. .cfi_def_cfa_offset 8
  311. ret
  312. .cfi_def_cfa_offset 32
  313. ..B1.49:
  314. cmpl $1023, %esi
  315. jl ..B1.57
  316. ..B1.50:
  317. cmpl $1043, %esi
  318. jle ..B1.52
  319. ..B1.51:
  320. negl %esi
  321. movl $-1, %eax
  322. addl $19, %esi
  323. movl %esi, %ecx
  324. shll %cl, %eax
  325. movl (%rsp), %edi
  326. andl %edi, %eax
  327. movl %eax, (%rsp)
  328. xorl %eax, %edi
  329. jmp ..B1.53
  330. ..B1.52:
  331. negl %esi
  332. movl $1048575, %eax
  333. addl $19, %esi
  334. movl 4(%rsp), %edx
  335. movl %edx, %edi
  336. movl %esi, %ecx
  337. andl $1048575, %edi
  338. shll %cl, %eax
  339. andl $-1048576, %edx
  340. andl %edi, %eax
  341. orl %eax, %edx
  342. movl %edx, 4(%rsp)
  343. andl $1048575, %edx
  344. xorl %edx, %edi
  345. orl (%rsp), %edi
  346. movl $0, (%rsp)
  347. ..B1.53:
  348. testb $-128, 7(%rsp)
  349. je ..B1.13
  350. ..B1.54:
  351. testl %edi, %edi
  352. je ..B1.13
  353. ..B1.55:
  354. lea 8+_ones(%rip), %rax
  355. movsd (%rsp), %xmm0
  356. addsd (%rax), %xmm0
  357. addq $24, %rsp
  358. .cfi_def_cfa_offset 8
  359. ret
  360. .cfi_def_cfa_offset 32
  361. ..B1.57:
  362. testl %esi, %esi
  363. jne ..B1.61
  364. ..B1.58:
  365. testl $1048575, 4(%rsp)
  366. jne ..B1.61
  367. ..B1.59:
  368. cmpl $0, (%rsp)
  369. je ..B1.13
  370. ..B1.61:
  371. movb 7(%rsp), %al
  372. lea _pzero_none(%rip), %rdx
  373. andb $-128, %al
  374. shrb $7, %al
  375. movzbl %al, %ecx
  376. movsd (%rdx,%rcx,8), %xmm0
  377. addq $24, %rsp
  378. .cfi_def_cfa_offset 8
  379. ret
  380. .cfi_def_cfa_offset 32
  381. ..B1.62:
  382. lea _ones(%rip), %rax
  383. movsd (%rsp), %xmm0
  384. mulsd (%rax), %xmm0
  385. addq $24, %rsp
  386. .cfi_def_cfa_offset 8
  387. ret
  388. .align 16,0x90
  389. .cfi_endproc
  390. .type nearbyint,@function
  391. .size nearbyint,.-nearbyint
  392. .data
  393. # -- End nearbyint
  394. .section .rodata, "a"
  395. .align 4
  396. .align 4
  397. _ones:
  398. .long 0
  399. .long 1072693248
  400. .long 0
  401. .long 3220176896
  402. .type _ones,@object
  403. .size _ones,16
  404. .align 4
  405. _pone_nzero:
  406. .long 0
  407. .long 1072693248
  408. .long 0
  409. .long 2147483648
  410. .type _pone_nzero,@object
  411. .size _pone_nzero,16
  412. .align 4
  413. _zeros:
  414. .long 0
  415. .long 0
  416. .long 0
  417. .long 2147483648
  418. .type _zeros,@object
  419. .size _zeros,16
  420. .align 4
  421. _pzero_none:
  422. .long 0
  423. .long 0
  424. .long 0
  425. .long 3220176896
  426. .type _pzero_none,@object
  427. .size _pzero_none,16
  428. .data
  429. .section .note.GNU-stack, ""
  430. // -- Begin DWARF2 SEGMENT .eh_frame
  431. .section .eh_frame,"a",@progbits
  432. .eh_frame_seg:
  433. .align 1
  434. # End