sinh_gen.S 13 KB


  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "sinh_gen.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin sinh
  41. .text
  42. .align 16,0x90
  43. .globl sinh
  44. sinh:
  45. # parameter 1: %xmm0
  46. ..B1.1:
  47. .cfi_startproc
  48. ..___tag_value_sinh.1:
  49. ..L2:
  50. movsd %xmm0, -8(%rsp)
  51. movl -4(%rsp), %eax
  52. movl %eax, %ecx
  53. andl $2147483647, %ecx
  54. cmpl $1077338037, %ecx
  55. jae ..B1.12
  56. ..B1.2:
  57. cmpl $1066695393, %ecx
  58. jae ..B1.11
  59. ..B1.3:
  60. cmpl $1012924416, %ecx
  61. jae ..B1.10
  62. ..B1.4:
  63. shrl $31, %eax
  64. cmpl $1048576, %ecx
  65. jb ..B1.6
  66. ..B1.5:
  67. movsd .L_2il0floatpacket.13(%rip), %xmm1
  68. addsd %xmm0, %xmm1
  69. movsd %xmm1, -24(%rsp)
  70. movsd -24(%rsp), %xmm2
  71. mulsd %xmm2, %xmm0
  72. ret
  73. ..B1.6:
  74. movl -8(%rsp), %edx
  75. orl %edx, %ecx
  76. je ..B1.8
  77. ..B1.7:
  78. lea _small_value_64(%rip), %rdx
  79. movsd (%rdx,%rax,8), %xmm0
  80. mulsd (%rdx), %xmm0
  81. movsd %xmm0, -24(%rsp)
  82. movsd -24(%rsp), %xmm0
  83. addsd -8(%rsp), %xmm0
  84. ret
  85. ..B1.8:
  86. movsd -8(%rsp), %xmm0
  87. ..B1.9:
  88. ret
  89. ..B1.10:
  90. movsd -8(%rsp), %xmm4
  91. movaps %xmm4, %xmm2
  92. mulsd %xmm4, %xmm2
  93. movaps %xmm2, %xmm1
  94. mulsd %xmm2, %xmm1
  95. movsd .L_2il0floatpacket.9(%rip), %xmm0
  96. movsd .L_2il0floatpacket.11(%rip), %xmm3
  97. mulsd %xmm1, %xmm0
  98. mulsd %xmm1, %xmm3
  99. addsd .L_2il0floatpacket.10(%rip), %xmm0
  100. addsd .L_2il0floatpacket.12(%rip), %xmm3
  101. mulsd %xmm1, %xmm0
  102. mulsd %xmm2, %xmm3
  103. addsd %xmm3, %xmm0
  104. mulsd %xmm4, %xmm0
  105. addsd %xmm4, %xmm0
  106. ret
  107. ..B1.11:
  108. movsd -8(%rsp), %xmm1
  109. lea _TWO_52(%rip), %rdx
  110. andps .L_2il0floatpacket.14(%rip), %xmm1
  111. lea _ptwo_32p1(%rip), %rsi
  112. movsd .L_2il0floatpacket.0(%rip), %xmm2
  113. lea _ntwo_32p1(%rip), %rdi
  114. mulsd %xmm1, %xmm2
  115. lea _two_32(%rip), %r8
  116. movsd .L_2il0floatpacket.1(%rip), %xmm3
  117. andl $-2147483648, %eax
  118. movsd %xmm1, -8(%rsp)
  119. movsd .L_2il0floatpacket.2(%rip), %xmm4
  120. movsd (%rsi), %xmm6
  121. movsd (%rdi), %xmm7
  122. movsd .L_2il0floatpacket.4(%rip), %xmm13
  123. movsd .L_2il0floatpacket.6(%rip), %xmm14
  124. movsd .L_2il0floatpacket.3(%rip), %xmm8
  125. movsd .L_2il0floatpacket.3(%rip), %xmm9
  126. addsd (%rdx), %xmm2
  127. movsd %xmm2, -24(%rsp)
  128. movsd -24(%rsp), %xmm5
  129. movl -24(%rsp), %r10d
  130. movl %r10d, %ecx
  131. shll $25, %ecx
  132. subsd (%rdx), %xmm5
  133. mulsd %xmm5, %xmm3
  134. mulsd %xmm4, %xmm5
  135. subsd %xmm3, %xmm1
  136. movaps %xmm1, %xmm10
  137. sarl $25, %ecx
  138. subsd %xmm5, %xmm10
  139. movaps %xmm10, %xmm11
  140. movaps %xmm10, %xmm15
  141. movslq %ecx, %rcx
  142. subsd %xmm10, %xmm1
  143. addsd %xmm10, %xmm6
  144. subsd %xmm10, %xmm7
  145. subsd %xmm5, %xmm1
  146. movsd %xmm6, -40(%rsp)
  147. subl %ecx, %r10d
  148. movsd %xmm7, -32(%rsp)
  149. pxor %xmm5, %xmm5
  150. movsd -40(%rsp), %xmm2
  151. addsd %xmm1, %xmm11
  152. subsd (%r8), %xmm2
  153. movaps %xmm11, %xmm12
  154. addsd %xmm2, %xmm8
  155. mulsd %xmm11, %xmm12
  156. subsd %xmm8, %xmm15
  157. mulsd %xmm12, %xmm13
  158. mulsd %xmm12, %xmm14
  159. addsd %xmm1, %xmm15
  160. addsd .L_2il0floatpacket.5(%rip), %xmm13
  161. addsd .L_2il0floatpacket.7(%rip), %xmm14
  162. mulsd %xmm12, %xmm13
  163. mulsd %xmm12, %xmm14
  164. mulsd %xmm11, %xmm13
  165. addsd .L_2il0floatpacket.8(%rip), %xmm14
  166. addsd %xmm13, %xmm15
  167. mulsd %xmm12, %xmm14
  168. movsd -32(%rsp), %xmm0
  169. addsd %xmm14, %xmm15
  170. addsd (%r8), %xmm0
  171. shrl $7, %r10d
  172. addsd %xmm0, %xmm9
  173. shll $23, %r10d
  174. addsd %xmm9, %xmm10
  175. shlq $4, %rcx
  176. lea 1056964608(%r10), %r9d
  177. negl %r10d
  178. orl %eax, %r9d
  179. addl $1056964608, %r10d
  180. addsd %xmm10, %xmm1
  181. orl %r10d, %eax
  182. movl %eax, -12(%rsp)
  183. movq __libm_exp_table_128@GOTPCREL(%rip), %rax
  184. xorps .L_2il0floatpacket.15(%rip), %xmm1
  185. movl %r9d, -16(%rsp)
  186. subsd %xmm13, %xmm1
  187. cvtss2sd -16(%rsp), %xmm5
  188. addsd %xmm14, %xmm1
  189. movsd 1032(%rax,%rcx), %xmm8
  190. movaps %xmm8, %xmm4
  191. mulsd %xmm15, %xmm4
  192. addsd %xmm2, %xmm15
  193. mulsd %xmm8, %xmm2
  194. mulsd 1024(%rax,%rcx), %xmm15
  195. negq %rcx
  196. addsd %xmm15, %xmm4
  197. movsd 1032(%rax,%rcx), %xmm15
  198. movaps %xmm15, %xmm3
  199. mulsd %xmm1, %xmm3
  200. addsd %xmm0, %xmm1
  201. mulsd %xmm15, %xmm0
  202. mulsd %xmm5, %xmm4
  203. mulsd 1024(%rax,%rcx), %xmm1
  204. mulsd %xmm2, %xmm5
  205. addsd %xmm1, %xmm3
  206. pxor %xmm1, %xmm1
  207. cvtss2sd -12(%rsp), %xmm1
  208. mulsd %xmm1, %xmm3
  209. mulsd %xmm0, %xmm1
  210. subsd %xmm3, %xmm4
  211. movaps %xmm5, %xmm0
  212. subsd %xmm1, %xmm0
  213. subsd %xmm0, %xmm5
  214. subsd %xmm1, %xmm5
  215. addsd %xmm4, %xmm5
  216. addsd %xmm5, %xmm0
  217. ret
  218. ..B1.12:
  219. cmpl $1082536910, %ecx
  220. jb ..B1.18
  221. ..B1.13:
  222. jne ..B1.15
  223. ..B1.14:
  224. cmpl $-1883637635, -8(%rsp)
  225. jbe ..B1.18
  226. ..B1.15:
  227. cmpl $2146435072, %ecx
  228. jb ..B1.19
  229. ..B1.16:
  230. movsd -8(%rsp), %xmm0
  231. addsd %xmm0, %xmm0
  232. ..B1.17:
  233. ret
  234. ..B1.18:
  235. movsd -8(%rsp), %xmm12
  236. lea _TWO_52(%rip), %rdx
  237. andps .L_2il0floatpacket.14(%rip), %xmm12
  238. lea _ptwo_32p1(%rip), %rcx
  239. movsd .L_2il0floatpacket.0(%rip), %xmm0
  240. lea _two_32(%rip), %rsi
  241. mulsd %xmm12, %xmm0
  242. andl $-2147483648, %eax
  243. movsd .L_2il0floatpacket.1(%rip), %xmm1
  244. lea _TWO_512(%rip), %r9
  245. movsd %xmm12, -8(%rsp)
  246. movsd .L_2il0floatpacket.2(%rip), %xmm2
  247. movsd .L_2il0floatpacket.6(%rip), %xmm11
  248. movsd .L_2il0floatpacket.4(%rip), %xmm8
  249. movsd (%rcx), %xmm4
  250. movsd .L_2il0floatpacket.3(%rip), %xmm5
  251. movsd .L_2il0floatpacket.8(%rip), %xmm10
  252. movl $0, -16(%rsp)
  253. addsd (%rdx), %xmm0
  254. movsd %xmm0, -24(%rsp)
  255. movsd -24(%rsp), %xmm3
  256. movl -24(%rsp), %edi
  257. movl %edi, %r8d
  258. shll $25, %r8d
  259. subsd (%rdx), %xmm3
  260. mulsd %xmm3, %xmm1
  261. mulsd %xmm2, %xmm3
  262. subsd %xmm1, %xmm12
  263. movaps %xmm12, %xmm6
  264. sarl $25, %r8d
  265. subsd %xmm3, %xmm6
  266. movaps %xmm6, %xmm7
  267. subl %r8d, %edi
  268. shrl $7, %edi
  269. subsd %xmm6, %xmm12
  270. addsd %xmm6, %xmm4
  271. subsd %xmm3, %xmm12
  272. movsd %xmm4, -24(%rsp)
  273. addsd %xmm12, %xmm7
  274. movaps %xmm7, %xmm9
  275. mulsd %xmm7, %xmm9
  276. mulsd %xmm9, %xmm11
  277. mulsd %xmm9, %xmm8
  278. mulsd %xmm9, %xmm10
  279. addsd .L_2il0floatpacket.7(%rip), %xmm11
  280. addsd .L_2il0floatpacket.5(%rip), %xmm8
  281. mulsd %xmm9, %xmm11
  282. mulsd %xmm7, %xmm8
  283. movsd -24(%rsp), %xmm0
  284. addsd %xmm8, %xmm11
  285. subsd (%rsi), %xmm0
  286. mulsd %xmm9, %xmm11
  287. addsd %xmm0, %xmm5
  288. addsd %xmm10, %xmm11
  289. subsd %xmm5, %xmm6
  290. shll $20, %edi
  291. addsd %xmm6, %xmm12
  292. movslq %r8d, %r8
  293. addl $534773760, %edi
  294. shlq $4, %r8
  295. orl %edi, %eax
  296. movl %eax, -12(%rsp)
  297. addsd %xmm11, %xmm12
  298. movq __libm_exp_table_128@GOTPCREL(%rip), %rax
  299. movsd 1032(%rax,%r8), %xmm13
  300. movaps %xmm13, %xmm14
  301. mulsd %xmm12, %xmm14
  302. addsd %xmm0, %xmm12
  303. mulsd %xmm13, %xmm0
  304. mulsd 1024(%rax,%r8), %xmm12
  305. addsd %xmm12, %xmm14
  306. addsd %xmm14, %xmm0
  307. mulsd -16(%rsp), %xmm0
  308. mulsd (%r9), %xmm0
  309. ret
  310. ..B1.19:
  311. lea _large_value_64(%rip), %rdx
  312. shrl $31, %eax
  313. movsd (%rdx,%rax,8), %xmm0
  314. mulsd (%rdx), %xmm0
  315. ret
  316. .align 16,0x90
  317. .cfi_endproc
  318. .type sinh,@function
  319. .size sinh,.-sinh
  320. .data
  321. # -- End sinh
  322. .section .rodata, "a"
  323. .align 16
  324. .align 16
  325. .L_2il0floatpacket.14:
  326. .long 0xffffffff,0x7fffffff,0x00000000,0x00000000
  327. .type .L_2il0floatpacket.14,@object
  328. .size .L_2il0floatpacket.14,16
  329. .align 16
  330. .L_2il0floatpacket.15:
  331. .long 0x00000000,0x80000000,0x00000000,0x00000000
  332. .type .L_2il0floatpacket.15,@object
  333. .size .L_2il0floatpacket.15,16
  334. .align 8
  335. .L_2il0floatpacket.0:
  336. .long 0x652b82fe,0x40671547
  337. .type .L_2il0floatpacket.0,@object
  338. .size .L_2il0floatpacket.0,8
  339. .align 8
  340. .L_2il0floatpacket.1:
  341. .long 0x00000000,0x3f762e42
  342. .type .L_2il0floatpacket.1,@object
  343. .size .L_2il0floatpacket.1,8
  344. .align 8
  345. .L_2il0floatpacket.2:
  346. .long 0x3de6af28,0x3e2fdf47
  347. .type .L_2il0floatpacket.2,@object
  348. .size .L_2il0floatpacket.2,8
  349. .align 8
  350. .L_2il0floatpacket.3:
  351. .long 0x00000000,0xbff00000
  352. .type .L_2il0floatpacket.3,@object
  353. .size .L_2il0floatpacket.3,8
  354. .align 8
  355. .L_2il0floatpacket.4:
  356. .long 0x6887cd7c,0x3f811111
  357. .type .L_2il0floatpacket.4,@object
  358. .size .L_2il0floatpacket.4,8
  359. .align 8
  360. .L_2il0floatpacket.5:
  361. .long 0x55555405,0x3fc55555
  362. .type .L_2il0floatpacket.5,@object
  363. .size .L_2il0floatpacket.5,8
  364. .align 8
  365. .L_2il0floatpacket.6:
  366. .long 0x87372663,0x3f56c16c
  367. .type .L_2il0floatpacket.6,@object
  368. .size .L_2il0floatpacket.6,8
  369. .align 8
  370. .L_2il0floatpacket.7:
  371. .long 0x5555541d,0x3fa55555
  372. .type .L_2il0floatpacket.7,@object
  373. .size .L_2il0floatpacket.7,8
  374. .align 8
  375. .L_2il0floatpacket.8:
  376. .long 0x00000000,0x3fe00000
  377. .type .L_2il0floatpacket.8,@object
  378. .size .L_2il0floatpacket.8,8
  379. .align 8
  380. .L_2il0floatpacket.9:
  381. .long 0xb9ff12f0,0x3ec71e39
  382. .type .L_2il0floatpacket.9,@object
  383. .size .L_2il0floatpacket.9,8
  384. .align 8
  385. .L_2il0floatpacket.10:
  386. .long 0x111111c1,0x3f811111
  387. .type .L_2il0floatpacket.10,@object
  388. .size .L_2il0floatpacket.10,8
  389. .align 8
  390. .L_2il0floatpacket.11:
  391. .long 0x1940cd88,0x3f2a01a0
  392. .type .L_2il0floatpacket.11,@object
  393. .size .L_2il0floatpacket.11,8
  394. .align 8
  395. .L_2il0floatpacket.12:
  396. .long 0x55555555,0x3fc55555
  397. .type .L_2il0floatpacket.12,@object
  398. .size .L_2il0floatpacket.12,8
  399. .align 8
  400. .L_2il0floatpacket.13:
  401. .long 0x00000000,0x3ff00000
  402. .type .L_2il0floatpacket.13,@object
  403. .size .L_2il0floatpacket.13,8
  404. .align 4
  405. _small_value_64:
  406. .long 0
  407. .long 24117248
  408. .long 0
  409. .long 2171600896
  410. .type _small_value_64,@object
  411. .size _small_value_64,16
  412. .align 4
  413. _TWO_52:
  414. .long 0
  415. .long 1127219200
  416. .type _TWO_52,@object
  417. .size _TWO_52,8
  418. .align 4
  419. _ptwo_32p1:
  420. .long 1048576
  421. .long 1106247680
  422. .type _ptwo_32p1,@object
  423. .size _ptwo_32p1,8
  424. .align 4
  425. _ntwo_32p1:
  426. .long 4292870144
  427. .long 3253731327
  428. .type _ntwo_32p1,@object
  429. .size _ntwo_32p1,8
  430. .align 4
  431. _two_32:
  432. .long 0
  433. .long 1106247680
  434. .type _two_32,@object
  435. .size _two_32,8
  436. .align 4
  437. _TWO_512:
  438. .long 0
  439. .long 1609564160
  440. .type _TWO_512,@object
  441. .size _TWO_512,8
  442. .align 4
  443. _large_value_64:
  444. .long 0
  445. .long 2121269248
  446. .long 0
  447. .long 4268752896
  448. .type _large_value_64,@object
  449. .size _large_value_64,16
  450. .data
  451. .section .note.GNU-stack, ""
  452. // -- Begin DWARF2 SEGMENT .eh_frame
  453. .section .eh_frame,"a",@progbits
  454. .eh_frame_seg:
  455. .align 1
  456. # End