cosdl.S 17 KB


  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "cosdl.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin cosdl
  41. .text
  42. .align 16,0x90
  43. .globl cosdl
  44. cosdl:
  45. # parameter 1: 144 + %rsp
  46. ..B1.1:
  47. .cfi_startproc
  48. ..___tag_value_cosdl.1:
  49. ..L2:
  50. subq $136, %rsp
  51. .cfi_def_cfa_offset 144
  52. xorb %r8b, %r8b
  53. ..B1.2:
  54. fnstcw 122(%rsp)
  55. ..B1.3:
  56. movzwl 122(%rsp), %edx
  57. movl %edx, %eax
  58. andl $3840, %eax
  59. cmpl $768, %eax
  60. je ..B1.7
  61. ..B1.4:
  62. andl $-3841, %edx
  63. orl $-64768, %edx
  64. movw %dx, 120(%rsp)
  65. ..B1.5:
  66. fldcw 120(%rsp)
  67. ..B1.6:
  68. movb $1, %r8b
  69. ..B1.7:
  70. fldt 144(%rsp)
  71. lea ones(%rip), %rdi
  72. movzwl 152(%rsp), %eax
  73. movl %eax, %esi
  74. shrl $15, %eax
  75. andl $32767, %esi
  76. cmpl $32767, %esi
  77. fmuls (%rdi,%rax,4)
  78. fstpt 144(%rsp)
  79. fldt 144(%rsp)
  80. je ..B1.26
  81. ..B1.8:
  82. fldt .L_2il0floatpacket.0(%rip)
  83. fucomip %st(1), %st
  84. jp ..B1.9
  85. je ..B1.25
  86. ..B1.9:
  87. cmpl $10783, %esi
  88. jge ..B1.11
  89. ..B1.10:
  90. fldt .L_2il0floatpacket.1(%rip)
  91. fsubp %st, %st(1)
  92. fstpt 104(%rsp)
  93. jmp ..B1.21
  94. ..B1.11:
  95. cmpl $16446, %esi
  96. jge ..B1.16
  97. ..B1.12:
  98. fldt .L_2il0floatpacket.2(%rip)
  99. lea _Rcp90(%rip), %rax
  100. fldt .L_2il0floatpacket.3(%rip)
  101. fldt .L_2il0floatpacket.0(%rip)
  102. fldt (%rax)
  103. fmulp %st, %st(4)
  104. fxch %st(2)
  105. fadd %st, %st(3)
  106. fxch %st(3)
  107. fstpt 80(%rsp)
  108. fldt 80(%rsp)
  109. movl 80(%rsp), %ecx
  110. fsubp %st, %st(3)
  111. fmulp %st, %st(2)
  112. incl %ecx
  113. fldt 144(%rsp)
  114. movl %ecx, %edx
  115. andl $2, %edx
  116. fsubp %st, %st(2)
  117. shrl $1, %edx
  118. movss (%rdi,%rdx,4), %xmm0
  119. fucomip %st(1), %st
  120. jp ..B1.13
  121. je ..B1.24
  122. ..B1.13:
  123. fldt .L_2il0floatpacket.4(%rip)
  124. lea _TWO_53H(%rip), %rax
  125. fmul %st(1), %st
  126. fld %st(1)
  127. movss %xmm0, (%rsp)
  128. testb $1, %cl
  129. fsubr %st(1), %st
  130. fsubrp %st, %st(1)
  131. fld %st(0)
  132. fmul %st(1), %st
  133. fld %st(2)
  134. fsub %st(2), %st
  135. fmul %st, %st(2)
  136. fld %st(3)
  137. fxch %st(1)
  138. fmul %st(4), %st
  139. faddp %st, %st(3)
  140. fld %st(1)
  141. fld %st(4)
  142. fldl (%rax)
  143. fld %st(0)
  144. fmul %st(5), %st
  145. fadd %st, %st(3)
  146. fsubrp %st, %st(3)
  147. fxch %st(2)
  148. fsubr %st, %st(4)
  149. fxch %st(4)
  150. faddp %st, %st(5)
  151. fxch %st(1)
  152. fmul %st, %st(2)
  153. fld %st(5)
  154. fadd %st(3), %st
  155. fsubp %st, %st(3)
  156. fxch %st(1)
  157. fsub %st(2), %st
  158. fstpt 64(%rsp)
  159. fld %st(4)
  160. fmul %st(5), %st
  161. fld %st(0)
  162. fmul %st(1), %st
  163. flds (%rsp)
  164. fstps 96(%rsp)
  165. je ..B1.15
  166. ..B1.14:
  167. fstp %st(6)
  168. fstp %st(2)
  169. fld %st(2)
  170. lea 112+_cosdl_poly_coeff(%rip), %rax
  171. fmul %st(3), %st
  172. lea 80+_cosdl_poly_coeff(%rip), %rdx
  173. fmul %st, %st(1)
  174. fld %st(2)
  175. fmul %st(5), %st
  176. fxch %st(4)
  177. fstpt (%rsp)
  178. fldt (%rsp)
  179. fld %st(3)
  180. lea 96+_cosdl_poly_coeff(%rip), %rsi
  181. lea 48+_cosdl_poly_coeff(%rip), %rcx
  182. lea 64+_cosdl_poly_coeff(%rip), %rdi
  183. lea 32+_cosdl_poly_coeff(%rip), %r9
  184. lea 24+_cosdl_mp_poly_coeff(%rip), %r10
  185. lea 8+_cosdl_mp_poly_coeff(%rip), %r11
  186. fmul %st(7), %st
  187. fxch %st(1)
  188. fmul %st(6), %st
  189. faddp %st, %st(5)
  190. fld %st(1)
  191. fadd %st(3), %st
  192. fsubp %st, %st(3)
  193. fxch %st(2)
  194. fstpt 16(%rsp)
  195. fldt 16(%rsp)
  196. fsubrp %st, %st(1)
  197. faddp %st, %st(3)
  198. fldt (%rax)
  199. fmul %st(5), %st
  200. fldt (%rdx)
  201. lea 16+_cosdl_mp_poly_coeff(%rip), %rax
  202. lea _cosdl_mp_poly_coeff(%rip), %rdx
  203. faddp %st, %st(1)
  204. fmul %st(5), %st
  205. fldt (%rcx)
  206. faddp %st, %st(1)
  207. fmul %st(2), %st
  208. fldt (%rsi)
  209. fmul %st(6), %st
  210. fldt (%rdi)
  211. faddp %st, %st(1)
  212. fmul %st(6), %st
  213. fldt (%r9)
  214. faddp %st, %st(1)
  215. faddp %st, %st(1)
  216. fmulp %st, %st(1)
  217. fldl (%r10)
  218. fmulp %st, %st(5)
  219. faddp %st, %st(4)
  220. fldl (%r11)
  221. fmulp %st, %st(1)
  222. faddp %st, %st(3)
  223. fldl (%rax)
  224. fmul %st, %st(1)
  225. fxch %st(1)
  226. faddp %st, %st(3)
  227. fldl (%rdx)
  228. fmul %st, %st(2)
  229. fxch %st(2)
  230. faddp %st, %st(3)
  231. fldt (%rsp)
  232. fmulp %st, %st(2)
  233. fld %st(1)
  234. fldt 16(%rsp)
  235. fmulp %st, %st(2)
  236. fadd %st(1), %st
  237. fsubr %st, %st(2)
  238. fxch %st(1)
  239. faddp %st, %st(2)
  240. fxch %st(2)
  241. faddp %st, %st(1)
  242. flds 96(%rsp)
  243. fld %st(0)
  244. fldt .L_2il0floatpacket.1(%rip)
  245. fadd %st(4), %st
  246. fmul %st, %st(1)
  247. fldt .L_2il0floatpacket.1(%rip)
  248. fsubp %st, %st(1)
  249. faddp %st, %st(4)
  250. fxch %st(2)
  251. faddp %st, %st(3)
  252. fmulp %st, %st(2)
  253. faddp %st, %st(1)
  254. fstpt 104(%rsp)
  255. jmp ..B1.21
  256. ..B1.15:
  257. fld %st(6)
  258. lea 128+_sindl_poly_coeff(%rip), %rax
  259. fmul %st(2), %st
  260. lea 96+_sindl_poly_coeff(%rip), %rdx
  261. fstpt 32(%rsp)
  262. fld %st(4)
  263. fmul %st(4), %st
  264. lea 64+_sindl_poly_coeff(%rip), %rcx
  265. fmul %st, %st(3)
  266. lea 32+_sindl_poly_coeff(%rip), %rsi
  267. fxch %st(6)
  268. fmul %st(7), %st
  269. fxch %st(2)
  270. fstpt 16(%rsp)
  271. lea 112+_sindl_poly_coeff(%rip), %rdi
  272. fxch %st(3)
  273. fstpt (%rsp)
  274. lea 80+_sindl_poly_coeff(%rip), %r9
  275. fldt 64(%rsp)
  276. lea 48+_sindl_poly_coeff(%rip), %r10
  277. lea 24+_sindl_mp_poly_coeff(%rip), %r11
  278. fmul %st, %st(4)
  279. fxch %st(1)
  280. faddp %st, %st(4)
  281. fld %st(4)
  282. fadd %st(2), %st
  283. fsubp %st, %st(2)
  284. fxch %st(1)
  285. fstpt 48(%rsp)
  286. fldt 48(%rsp)
  287. fsubrp %st, %st(4)
  288. fxch %st(3)
  289. faddp %st, %st(2)
  290. fldt (%rax)
  291. fmul %st(1), %st
  292. fldt (%rdx)
  293. lea 8+_sindl_mp_poly_coeff(%rip), %rax
  294. lea 16+_sindl_mp_poly_coeff(%rip), %rdx
  295. faddp %st, %st(1)
  296. fmul %st(1), %st
  297. fldt (%rcx)
  298. lea _sindl_mp_poly_coeff(%rip), %rcx
  299. faddp %st, %st(1)
  300. fmul %st(1), %st
  301. fldt (%rsi)
  302. faddp %st, %st(1)
  303. fldt 16(%rsp)
  304. fmulp %st, %st(1)
  305. fldt (%rdi)
  306. fmul %st(2), %st
  307. fldt (%r9)
  308. faddp %st, %st(1)
  309. fmul %st(2), %st
  310. fldt (%r10)
  311. faddp %st, %st(1)
  312. fmulp %st, %st(2)
  313. faddp %st, %st(1)
  314. fldt 32(%rsp)
  315. fmul %st, %st(1)
  316. fldl (%r11)
  317. fmulp %st, %st(1)
  318. faddp %st, %st(1)
  319. fldl (%rax)
  320. fmulp %st, %st(4)
  321. faddp %st, %st(3)
  322. fldl (%rdx)
  323. fmul %st, %st(1)
  324. fxch %st(1)
  325. faddp %st, %st(3)
  326. fldl (%rcx)
  327. fmul %st, %st(2)
  328. fxch %st(2)
  329. faddp %st, %st(3)
  330. fldt (%rsp)
  331. fmulp %st, %st(2)
  332. fld %st(1)
  333. fldt 48(%rsp)
  334. fmulp %st, %st(2)
  335. fadd %st(1), %st
  336. fsubr %st, %st(2)
  337. fxch %st(1)
  338. faddp %st, %st(2)
  339. fxch %st(2)
  340. faddp %st, %st(1)
  341. flds 96(%rsp)
  342. fld %st(0)
  343. fmulp %st, %st(3)
  344. fmulp %st, %st(1)
  345. faddp %st, %st(1)
  346. fstpt 104(%rsp)
  347. jmp ..B1.21
  348. ..B1.16:
  349. fstp %st(0)
  350. lea -16446(%rsi), %ecx
  351. cmpl $14, %ecx
  352. jle ..B1.18
  353. ..B1.17:
  354. addl $-16449, %esi
  355. movl $715827883, %eax
  356. imull %esi
  357. movl %esi, %ecx
  358. sarl $1, %edx
  359. sarl $31, %ecx
  360. subl %ecx, %edx
  361. lea (,%rdx,8), %r9d
  362. lea (%r9,%rdx,4), %r10d
  363. subl %r10d, %esi
  364. lea 3(%rsi), %ecx
  365. ..B1.18:
  366. movl 148(%rsp), %r11d
  367. movl %r11d, %esi
  368. shll $8, %esi
  369. movl $381774871, %eax
  370. movl %esi, %r9d
  371. andl $-16777216, %r11d
  372. shrl $3, %r9d
  373. movl %r11d, %r10d
  374. mull %r9d
  375. shrl $19, %r11d
  376. shrl $2, %edx
  377. imull $-360, %edx, %eax
  378. addl %eax, %esi
  379. movl $381774871, %eax
  380. mull %r11d
  381. shrl $16, %r10d
  382. movl $381774871, %eax
  383. shrl $2, %edx
  384. imull $-360, %edx, %r11d
  385. addl %r11d, %r10d
  386. movl 144(%rsp), %r9d
  387. addl %r10d, %esi
  388. movl %r9d, %r10d
  389. shrl $3, %r10d
  390. mull %r10d
  391. shrl $2, %edx
  392. imull $-360, %edx, %eax
  393. addl %eax, %r9d
  394. movl $-1240768329, %eax
  395. addl %r9d, %esi
  396. shll %cl, %esi
  397. imull %esi
  398. movl %esi, %ecx
  399. addl %esi, %edx
  400. sarl $8, %edx
  401. sarl $31, %ecx
  402. subl %ecx, %edx
  403. movl $1, %ecx
  404. imull $-360, %edx, %eax
  405. lea (%rsi,%rax), %edx
  406. cmpl $179, %edx
  407. lea -180(%rsi,%rax), %eax
  408. movl $3, %esi
  409. cmovg %eax, %edx
  410. cmovg %esi, %ecx
  411. cmpl $89, %edx
  412. jle ..B1.20
  413. ..B1.19:
  414. incl %ecx
  415. addl $-90, %edx
  416. ..B1.20:
  417. movl %ecx, %eax
  418. andl $2, %ecx
  419. andl $1, %eax
  420. addl %eax, %eax
  421. movq __libm_sindl_cosdl_table@GOTPCREL(%rip), %rsi
  422. shrl $1, %ecx
  423. lea (%rax,%rdx,4), %edx
  424. movslq %edx, %rdx
  425. fldl (%rsi,%rdx,8)
  426. faddl 8(%rsi,%rdx,8)
  427. fmuls (%rdi,%rcx,4)
  428. fstpt 104(%rsp)
  429. ..B1.21:
  430. testb %r8b, %r8b
  431. je ..B1.23
  432. ..B1.22:
  433. fldcw 122(%rsp)
  434. ..B1.23:
  435. fldt 104(%rsp)
  436. addq $136, %rsp
  437. .cfi_def_cfa_offset 8
  438. ret
  439. .cfi_def_cfa_offset 144
  440. ..B1.24:
  441. fstp %st(0)
  442. fldt .L_2il0floatpacket.0(%rip)
  443. testb $1, %cl
  444. movss %xmm0, (%rsp)
  445. flds (%rsp)
  446. fcmove %st(1), %st
  447. fstp %st(1)
  448. fstpt 104(%rsp)
  449. jmp ..B1.21
  450. ..B1.25:
  451. fstp %st(0)
  452. fldt .L_2il0floatpacket.1(%rip)
  453. fstpt 104(%rsp)
  454. jmp ..B1.21
  455. ..B1.26:
  456. fldt .L_2il0floatpacket.0(%rip)
  457. fmulp %st, %st(1)
  458. fstpt 104(%rsp)
  459. jmp ..B1.21
  460. .align 16,0x90
  461. .cfi_endproc
  462. .type cosdl,@function
  463. .size cosdl,.-cosdl
  464. .data
  465. # -- End cosdl
  466. .section .rodata, "a"
  467. .align 16
  468. .align 16
  469. .L_2il0floatpacket.0:
  470. .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
  471. .type .L_2il0floatpacket.0,@object
  472. .size .L_2il0floatpacket.0,16
  473. .align 16
  474. .L_2il0floatpacket.1:
  475. .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xff,0x3f,0x00,0x00,0x00,0x00,0x00,0x00
  476. .type .L_2il0floatpacket.1,@object
  477. .size .L_2il0floatpacket.1,16
  478. .align 16
  479. .L_2il0floatpacket.2:
  480. .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x3e,0x40,0x00,0x00,0x00,0x00,0x00,0x00
  481. .type .L_2il0floatpacket.2,@object
  482. .size .L_2il0floatpacket.2,16
  483. .align 16
  484. .L_2il0floatpacket.3:
  485. .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xb4,0x05,0x40,0x00,0x00,0x00,0x00,0x00,0x00
  486. .type .L_2il0floatpacket.3,@object
  487. .size .L_2il0floatpacket.3,16
  488. .align 16
  489. .L_2il0floatpacket.4:
  490. .byte 0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x80,0x20,0x40,0x00,0x00,0x00,0x00,0x00,0x00
  491. .type .L_2il0floatpacket.4,@object
  492. .size .L_2il0floatpacket.4,16
  493. .align 16
  494. _cosdl_poly_coeff:
  495. .word 52350
  496. .word 41213
  497. .word 3800
  498. .word 40885
  499. .word 49138
  500. .word 0
  501. .word 0
  502. .word 0
  503. .word 28613
  504. .word 7908
  505. .word 35668
  506. .word 34008
  507. .word 16355
  508. .word 0
  509. .word 0
  510. .word 0
  511. .word 19927
  512. .word 58728
  513. .word 49885
  514. .word 45261
  515. .word 49106
  516. .word 0
  517. .word 0
  518. .word 0
  519. .word 32884
  520. .word 22035
  521. .word 23267
  522. .word 64541
  523. .word 16320
  524. .word 0
  525. .word 0
  526. .word 0
  527. .word 36401
  528. .word 51897
  529. .word 8309
  530. .word 57265
  531. .word 49070
  532. .word 0
  533. .word 0
  534. .word 0
  535. .word 34286
  536. .word 2728
  537. .word 41564
  538. .word 34642
  539. .word 16284
  540. .word 0
  541. .word 0
  542. .word 0
  543. .word 63248
  544. .word 18030
  545. .word 35596
  546. .word 60796
  547. .word 49032
  548. .word 0
  549. .word 0
  550. .word 0
  551. .word 52149
  552. .word 21294
  553. .word 63985
  554. .word 40123
  555. .word 16245
  556. .word 0
  557. .word 0
  558. .word 0
  559. .type _cosdl_poly_coeff,@object
  560. .size _cosdl_poly_coeff,128
  561. .align 16
  562. _cosdl_mp_poly_coeff:
  563. .long 3675529145
  564. .long 3206805153
  565. .long 2134983071
  566. .long 3151100167
  567. .long 1787026573
  568. .long 1043372817
  569. .long 205083639
  570. .long 988746860
  571. .type _cosdl_mp_poly_coeff,@object
  572. .size _cosdl_mp_poly_coeff,32
  573. .align 16
  574. _sindl_poly_coeff:
  575. .word 51374
  576. .word 38121
  577. .word 13586
  578. .word 36602
  579. .word 16377
  580. .word 0
  581. .word 0
  582. .word 0
  583. .word 50116
  584. .word 41339
  585. .word 4204
  586. .word 60892
  587. .word 49130
  588. .word 0
  589. .word 0
  590. .word 0
  591. .word 33704
  592. .word 2155
  593. .word 42839
  594. .word 60780
  595. .word 16346
  596. .word 0
  597. .word 0
  598. .word 0
  599. .word 21250
  600. .word 19076
  601. .word 27901
  602. .word 57780
  603. .word 49097
  604. .word 0
  605. .word 0
  606. .word 0
  607. .word 9076
  608. .word 49244
  609. .word 613
  610. .word 64083
  611. .word 16311
  612. .word 0
  613. .word 0
  614. .word 0
  615. .word 40572
  616. .word 30418
  617. .word 36251
  618. .word 46520
  619. .word 49061
  620. .word 0
  621. .word 0
  622. .word 0
  623. .word 3227
  624. .word 25505
  625. .word 5540
  626. .word 47626
  627. .word 16274
  628. .word 0
  629. .word 0
  630. .word 0
  631. .word 60933
  632. .word 3300
  633. .word 57416
  634. .word 36218
  635. .word 49023
  636. .word 0
  637. .word 0
  638. .word 0
  639. .word 45811
  640. .word 42646
  641. .word 37125
  642. .word 42185
  643. .word 16235
  644. .word 0
  645. .word 0
  646. .word 0
  647. .type _sindl_poly_coeff,@object
  648. .size _sindl_poly_coeff,144
  649. .align 16
  650. _sindl_mp_poly_coeff:
  651. .long 2723323193
  652. .long 1066524486
  653. .long 2863989530
  654. .long 1008058840
  655. .long 227815288
  656. .long 3199056770
  657. .long 3752327299
  658. .long 3142458725
  659. .type _sindl_mp_poly_coeff,@object
  660. .size _sindl_mp_poly_coeff,32
  661. .align 4
  662. ones:
  663. .long 0x3f800000
  664. .long 0xbf800000
  665. .type ones,@object
  666. .size ones,8
  667. .align 4
  668. _TWO_53H:
  669. .long 0
  670. .long 1128792064
  671. .type _TWO_53H,@object
  672. .size _TWO_53H,8
  673. .align 2
  674. _Rcp90:
  675. .word 46603
  676. .word 2912
  677. .word 24758
  678. .word 46603
  679. .word 16376
  680. .word 0
  681. .word 0
  682. .word 0
  683. .type _Rcp90,@object
  684. .size _Rcp90,16
  685. .data
  686. .section .note.GNU-stack, ""
  687. // -- Begin DWARF2 SEGMENT .eh_frame
  688. .section .eh_frame,"a",@progbits
  689. .eh_frame_seg:
  690. .align 1
  691. # End