cosdl.S 17 KB


  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "cosdl.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin cosdl
  41. .text
  42. .align 16,0x90
  43. .globl cosdl
  44. cosdl:
  45. # parameter 1: 8 + %ebp
  46. ..B1.1:
  47. ..L1:
  48. pushl %ebp
  49. movl %esp, %ebp
  50. andl $-16, %esp
  51. pushl %esi
  52. pushl %edi
  53. pushl %ebx
  54. subl $116, %esp
  55. ..B1.2:
  56. fnstcw 30(%esp)
  57. ..B1.3:
  58. movzwl 30(%esp), %edx
  59. movl %edx, %eax
  60. andl $3840, %eax
  61. cmpl $768, %eax
  62. je ..B1.38
  63. ..B1.4:
  64. andl $-3841, %edx
  65. orl $-64768, %edx
  66. movw %dx, 28(%esp)
  67. ..B1.5:
  68. fldcw 28(%esp)
  69. ..B1.6:
  70. movl $1, %edi
  71. ..B1.7:
  72. movzwl 16(%ebp), %esi
  73. andl $32767, %esi
  74. call ..L2
  75. ..L2:
  76. popl %ebx
  77. lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%ebx), %ebx
  78. cmpl $16446, %esi
  79. jge ..B1.19
  80. ..B1.8:
  81. cmpl $16318, %esi
  82. jge ..B1.12
  83. ..B1.9:
  84. fldt .L_2il0floatpacket.0@GOTOFF(%ebx)
  85. fldt 8(%ebp)
  86. movzbl 17(%ebp), %eax
  87. andl $128, %eax
  88. shrl $7, %eax
  89. testl %edi, %edi
  90. fmuls ones@GOTOFF(%ebx,%eax,4)
  91. fsubrp %st, %st(1)
  92. fstpt (%esp)
  93. je ..B1.11
  94. ..B1.10:
  95. fldcw 30(%esp)
  96. ..B1.11:
  97. fldt (%esp)
  98. addl $116, %esp
  99. popl %ebx
  100. popl %edi
  101. popl %esi
  102. movl %ebp, %esp
  103. popl %ebp
  104. ret
  105. ..B1.12:
  106. fldt 8(%ebp)
  107. fldt .L_2il0floatpacket.1@GOTOFF(%ebx)
  108. fldt _Rcp90@GOTOFF(%ebx)
  109. fldt .L_2il0floatpacket.2@GOTOFF(%ebx)
  110. fldt .L_2il0floatpacket.3@GOTOFF(%ebx)
  111. movzbl 17(%ebp), %edx
  112. andl $128, %edx
  113. shrl $7, %edx
  114. fxch %st(4)
  115. fmuls ones@GOTOFF(%ebx,%edx,4)
  116. fmul %st, %st(2)
  117. fxch %st(2)
  118. fadd %st(3), %st
  119. fstpt 92(%esp)
  120. fxch %st(1)
  121. fstpt 8(%ebp)
  122. fldt 8(%ebp)
  123. fldt 92(%esp)
  124. movl 92(%esp), %edx
  125. fsubp %st, %st(3)
  126. fxch %st(2)
  127. fmulp %st, %st(1)
  128. incl %edx
  129. movl %edx, %ecx
  130. fsubrp %st, %st(1)
  131. andl $2, %ecx
  132. fucom %st(1)
  133. fnstsw %ax
  134. fxch %st(1)
  135. fstp %st(0)
  136. shrl $1, %ecx
  137. sahf
  138. jp ..B1.13
  139. je ..B1.31
  140. ..B1.13:
  141. fldt .L_2il0floatpacket.4@GOTOFF(%ebx)
  142. testb $1, %dl
  143. fmul %st(1), %st
  144. fld %st(1)
  145. fsubr %st(1), %st
  146. fsubrp %st, %st(1)
  147. fld %st(0)
  148. fmul %st(1), %st
  149. fld %st(2)
  150. fsub %st(2), %st
  151. fmul %st, %st(2)
  152. fld %st(3)
  153. fxch %st(1)
  154. fmul %st(4), %st
  155. faddp %st, %st(3)
  156. fld %st(1)
  157. fld %st(4)
  158. fldl _TWO_53H@GOTOFF(%ebx)
  159. fld %st(0)
  160. fmul %st(5), %st
  161. fadd %st, %st(3)
  162. fsubrp %st, %st(3)
  163. fxch %st(2)
  164. fsubr %st, %st(4)
  165. fxch %st(4)
  166. faddp %st, %st(5)
  167. fxch %st(1)
  168. fmul %st, %st(2)
  169. fld %st(5)
  170. fadd %st(3), %st
  171. fsubp %st, %st(3)
  172. fxch %st(1)
  173. fsub %st(2), %st
  174. fstpt 80(%esp)
  175. fld %st(4)
  176. fmul %st(5), %st
  177. fld %st(0)
  178. fmul %st(1), %st
  179. flds ones@GOTOFF(%ebx,%ecx,4)
  180. fstps 12(%esp)
  181. je ..B1.15
  182. ..B1.14:
  183. fstp %st(6)
  184. fstp %st(2)
  185. fld %st(2)
  186. fmul %st(3), %st
  187. fmul %st, %st(1)
  188. fld %st(2)
  189. fmul %st(5), %st
  190. fxch %st(4)
  191. fstpt (%esp)
  192. fldt (%esp)
  193. fld %st(3)
  194. fmul %st(7), %st
  195. fxch %st(1)
  196. fmul %st(6), %st
  197. faddp %st, %st(5)
  198. fld %st(1)
  199. fadd %st(3), %st
  200. fsubp %st, %st(3)
  201. fxch %st(2)
  202. fstpt 16(%esp)
  203. fldt 16(%esp)
  204. fsubrp %st, %st(1)
  205. faddp %st, %st(3)
  206. fldt 84+_cosdl_poly_coeff@GOTOFF(%ebx)
  207. fmul %st(5), %st
  208. fldt 60+_cosdl_poly_coeff@GOTOFF(%ebx)
  209. faddp %st, %st(1)
  210. fmul %st(5), %st
  211. fldt 36+_cosdl_poly_coeff@GOTOFF(%ebx)
  212. faddp %st, %st(1)
  213. fmul %st(2), %st
  214. fldt 72+_cosdl_poly_coeff@GOTOFF(%ebx)
  215. fmul %st(6), %st
  216. fldt 48+_cosdl_poly_coeff@GOTOFF(%ebx)
  217. faddp %st, %st(1)
  218. fmul %st(6), %st
  219. fldt 24+_cosdl_poly_coeff@GOTOFF(%ebx)
  220. faddp %st, %st(1)
  221. faddp %st, %st(1)
  222. fmulp %st, %st(1)
  223. fldl 24+_cosdl_mp_poly_coeff@GOTOFF(%ebx)
  224. fmulp %st, %st(5)
  225. faddp %st, %st(4)
  226. fldl 8+_cosdl_mp_poly_coeff@GOTOFF(%ebx)
  227. fmulp %st, %st(1)
  228. faddp %st, %st(3)
  229. fldl 16+_cosdl_mp_poly_coeff@GOTOFF(%ebx)
  230. fmul %st, %st(1)
  231. fxch %st(1)
  232. faddp %st, %st(3)
  233. fldl _cosdl_mp_poly_coeff@GOTOFF(%ebx)
  234. fmul %st, %st(2)
  235. fxch %st(2)
  236. faddp %st, %st(3)
  237. fldt (%esp)
  238. fmulp %st, %st(2)
  239. fld %st(1)
  240. fldt 16(%esp)
  241. fmulp %st, %st(2)
  242. fadd %st(1), %st
  243. fsubr %st, %st(2)
  244. fxch %st(2)
  245. faddp %st, %st(1)
  246. faddp %st, %st(2)
  247. flds 12(%esp)
  248. fld %st(0)
  249. fldt .L_2il0floatpacket.0@GOTOFF(%ebx)
  250. fld %st(0)
  251. fadd %st(4), %st
  252. fmul %st, %st(2)
  253. fsubrp %st, %st(1)
  254. faddp %st, %st(3)
  255. fxch %st(3)
  256. faddp %st, %st(2)
  257. fmulp %st, %st(1)
  258. faddp %st, %st(1)
  259. fstpt 64(%esp)
  260. jmp ..B1.16
  261. ..B1.15:
  262. fld %st(6)
  263. fmul %st(2), %st
  264. fstpt 32(%esp)
  265. fld %st(4)
  266. fmul %st(4), %st
  267. fmul %st, %st(3)
  268. fxch %st(6)
  269. fmul %st(7), %st
  270. fxch %st(2)
  271. fstpt 16(%esp)
  272. fxch %st(3)
  273. fstpt (%esp)
  274. fldt 80(%esp)
  275. fmul %st, %st(4)
  276. fxch %st(1)
  277. faddp %st, %st(4)
  278. fld %st(4)
  279. fadd %st(2), %st
  280. fsubp %st, %st(2)
  281. fxch %st(1)
  282. fstpt 48(%esp)
  283. fldt 48(%esp)
  284. fsubrp %st, %st(4)
  285. fxch %st(3)
  286. faddp %st, %st(2)
  287. fldt 96+_sindl_poly_coeff@GOTOFF(%ebx)
  288. fmul %st(1), %st
  289. fldt 72+_sindl_poly_coeff@GOTOFF(%ebx)
  290. faddp %st, %st(1)
  291. fmul %st(1), %st
  292. fldt 48+_sindl_poly_coeff@GOTOFF(%ebx)
  293. faddp %st, %st(1)
  294. fmul %st(1), %st
  295. fldt 24+_sindl_poly_coeff@GOTOFF(%ebx)
  296. faddp %st, %st(1)
  297. fldt 60+_sindl_poly_coeff@GOTOFF(%ebx)
  298. fldt 36+_sindl_poly_coeff@GOTOFF(%ebx)
  299. fldt 16(%esp)
  300. fmulp %st, %st(3)
  301. fldt 84+_sindl_poly_coeff@GOTOFF(%ebx)
  302. fmul %st(4), %st
  303. faddp %st, %st(2)
  304. fxch %st(1)
  305. fmul %st(3), %st
  306. faddp %st, %st(1)
  307. fmulp %st, %st(2)
  308. faddp %st, %st(1)
  309. fldt 32(%esp)
  310. fmul %st, %st(1)
  311. fldl 24+_sindl_mp_poly_coeff@GOTOFF(%ebx)
  312. fmulp %st, %st(1)
  313. faddp %st, %st(1)
  314. fldl 8+_sindl_mp_poly_coeff@GOTOFF(%ebx)
  315. fmulp %st, %st(4)
  316. faddp %st, %st(3)
  317. fldl 16+_sindl_mp_poly_coeff@GOTOFF(%ebx)
  318. fmul %st, %st(1)
  319. fxch %st(1)
  320. faddp %st, %st(3)
  321. fldl _sindl_mp_poly_coeff@GOTOFF(%ebx)
  322. fmul %st, %st(2)
  323. fxch %st(2)
  324. faddp %st, %st(3)
  325. fldt (%esp)
  326. fmulp %st, %st(2)
  327. fld %st(1)
  328. fldt 48(%esp)
  329. fmulp %st, %st(2)
  330. fadd %st(1), %st
  331. fsubr %st, %st(2)
  332. fxch %st(2)
  333. faddp %st, %st(1)
  334. faddp %st, %st(2)
  335. flds 12(%esp)
  336. fmul %st, %st(1)
  337. fmulp %st, %st(2)
  338. faddp %st, %st(1)
  339. fstpt 64(%esp)
  340. ..B1.16:
  341. testl %edi, %edi
  342. je ..B1.18
  343. ..B1.17:
  344. fldcw 30(%esp)
  345. ..B1.18:
  346. fldt 64(%esp)
  347. addl $116, %esp
  348. popl %ebx
  349. popl %edi
  350. popl %esi
  351. movl %ebp, %esp
  352. popl %ebp
  353. ret
  354. ..B1.19:
  355. cmpl $32767, %esi
  356. je ..B1.37
  357. ..B1.20:
  358. lea -16446(%esi), %ecx
  359. cmpl $14, %ecx
  360. jle ..B1.22
  361. ..B1.21:
  362. addl $-16449, %esi
  363. movl $715827883, %eax
  364. imull %esi
  365. movl %esi, %ecx
  366. sarl $1, %edx
  367. sarl $31, %ecx
  368. subl %ecx, %edx
  369. lea (,%edx,8), %ecx
  370. lea (%ecx,%edx,4), %eax
  371. subl %eax, %esi
  372. lea 3(%esi), %ecx
  373. ..B1.22:
  374. movl %edi, (%esp)
  375. movl $381774871, %eax
  376. movl 12(%ebp), %edi
  377. movl %edi, %esi
  378. shll $8, %esi
  379. andl $-16777216, %edi
  380. movl %esi, %edx
  381. shrl $3, %edx
  382. mull %edx
  383. shrl $2, %edx
  384. imull $-360, %edx, %eax
  385. movl %edi, %edx
  386. shrl $19, %edi
  387. addl %eax, %esi
  388. movl $381774871, %eax
  389. shrl $16, %edx
  390. movl %edx, 4(%esp)
  391. mull %edi
  392. shrl $2, %edx
  393. imull $-360, %edx, %edi
  394. movl 4(%esp), %eax
  395. addl %edi, %eax
  396. movl 8(%ebp), %edi
  397. movl %edi, %edx
  398. shrl $3, %edx
  399. addl %eax, %esi
  400. movl $381774871, %eax
  401. mull %edx
  402. shrl $2, %edx
  403. imull $-360, %edx, %eax
  404. addl %eax, %edi
  405. movl $-1240768329, %eax
  406. addl %edi, %esi
  407. shll %cl, %esi
  408. imull %esi
  409. addl %esi, %edx
  410. movl %esi, %ecx
  411. sarl $8, %edx
  412. sarl $31, %ecx
  413. subl %ecx, %edx
  414. imull $-360, %edx, %eax
  415. addl %eax, %esi
  416. movl (%esp), %edi
  417. cmpl $180, %esi
  418. jl ..B1.24
  419. ..B1.23:
  420. addl $-180, %esi
  421. movl $3, %eax
  422. jmp ..B1.25
  423. ..B1.24:
  424. movl $1, %eax
  425. ..B1.25:
  426. cmpl $90, %esi
  427. jl ..B1.27
  428. ..B1.26:
  429. incl %eax
  430. addl $-90, %esi
  431. ..B1.27:
  432. movl %eax, %edx
  433. andl $1, %eax
  434. shll $4, %eax
  435. andl $2, %edx
  436. shll $5, %esi
  437. addl %esi, %eax
  438. shrl $1, %edx
  439. fldl __libm_sindl_cosdl_table@GOTOFF(%eax,%ebx)
  440. faddl 8+__libm_sindl_cosdl_table@GOTOFF(%ebx,%eax)
  441. fmuls ones@GOTOFF(%ebx,%edx,4)
  442. fstpt (%esp)
  443. ..B1.28:
  444. testl %edi, %edi
  445. je ..B1.30
  446. ..B1.29:
  447. fldcw 30(%esp)
  448. ..B1.30:
  449. fldt (%esp)
  450. addl $116, %esp
  451. popl %ebx
  452. popl %edi
  453. popl %esi
  454. movl %ebp, %esp
  455. popl %ebp
  456. ret
  457. ..B1.31:
  458. fstp %st(0)
  459. testb $1, %dl
  460. je ..B1.33
  461. ..B1.32:
  462. flds ones@GOTOFF(%ebx,%ecx,4)
  463. fstps (%esp)
  464. jmp ..B1.34
  465. ..B1.33:
  466. fldz
  467. fstps (%esp)
  468. ..B1.34:
  469. testl %edi, %edi
  470. je ..B1.36
  471. ..B1.35:
  472. fldcw 30(%esp)
  473. ..B1.36:
  474. flds (%esp)
  475. addl $116, %esp
  476. popl %ebx
  477. popl %edi
  478. popl %esi
  479. movl %ebp, %esp
  480. popl %ebp
  481. ret
  482. ..B1.37:
  483. fldt .L_2il0floatpacket.3@GOTOFF(%ebx)
  484. fldt 8(%ebp)
  485. fmulp %st, %st(1)
  486. fstpt (%esp)
  487. jmp ..B1.28
  488. ..B1.38:
  489. xorl %edi, %edi
  490. jmp ..B1.7
  491. .align 16,0x90
  492. .type cosdl,@function
  493. .size cosdl,.-cosdl
  494. .data
  495. # -- End cosdl
  496. .section .rodata, "a"
  497. .align 16
  498. .align 16
  499. .L_2il0floatpacket.0:
  500. .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xff,0x3f,0x00,0x00
  501. .type .L_2il0floatpacket.0,@object
  502. .size .L_2il0floatpacket.0,12
  503. .space 4, 0x00 # pad
  504. .align 16
  505. .L_2il0floatpacket.1:
  506. .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x3e,0x40,0x00,0x00
  507. .type .L_2il0floatpacket.1,@object
  508. .size .L_2il0floatpacket.1,12
  509. .space 4, 0x00 # pad
  510. .align 16
  511. .L_2il0floatpacket.2:
  512. .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xb4,0x05,0x40,0x00,0x00
  513. .type .L_2il0floatpacket.2,@object
  514. .size .L_2il0floatpacket.2,12
  515. .space 4, 0x00 # pad
  516. .align 16
  517. .L_2il0floatpacket.3:
  518. .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
  519. .type .L_2il0floatpacket.3,@object
  520. .size .L_2il0floatpacket.3,12
  521. .space 4, 0x00 # pad
  522. .align 16
  523. .L_2il0floatpacket.4:
  524. .byte 0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x80,0x20,0x40,0x00,0x00
  525. .type .L_2il0floatpacket.4,@object
  526. .size .L_2il0floatpacket.4,12
  527. .space 4, 0x00 # pad
  528. .align 16
  529. _cosdl_poly_coeff:
  530. .word 52350
  531. .word 41213
  532. .word 3800
  533. .word 40885
  534. .word 49138
  535. .word 0
  536. .word 28613
  537. .word 7908
  538. .word 35668
  539. .word 34008
  540. .word 16355
  541. .word 0
  542. .word 19927
  543. .word 58728
  544. .word 49885
  545. .word 45261
  546. .word 49106
  547. .word 0
  548. .word 32884
  549. .word 22035
  550. .word 23267
  551. .word 64541
  552. .word 16320
  553. .word 0
  554. .word 36401
  555. .word 51897
  556. .word 8309
  557. .word 57265
  558. .word 49070
  559. .word 0
  560. .word 34286
  561. .word 2728
  562. .word 41564
  563. .word 34642
  564. .word 16284
  565. .word 0
  566. .word 63248
  567. .word 18030
  568. .word 35596
  569. .word 60796
  570. .word 49032
  571. .word 0
  572. .word 52149
  573. .word 21294
  574. .word 63985
  575. .word 40123
  576. .word 16245
  577. .word 0
  578. .type _cosdl_poly_coeff,@object
  579. .size _cosdl_poly_coeff,96
  580. .align 16
  581. _cosdl_mp_poly_coeff:
  582. .long 3675529145
  583. .long 3206805153
  584. .long 2134983071
  585. .long 3151100167
  586. .long 1787026573
  587. .long 1043372817
  588. .long 205083639
  589. .long 988746860
  590. .type _cosdl_mp_poly_coeff,@object
  591. .size _cosdl_mp_poly_coeff,32
  592. .align 16
  593. _sindl_poly_coeff:
  594. .word 51374
  595. .word 38121
  596. .word 13586
  597. .word 36602
  598. .word 16377
  599. .word 0
  600. .word 50116
  601. .word 41339
  602. .word 4204
  603. .word 60892
  604. .word 49130
  605. .word 0
  606. .word 33704
  607. .word 2155
  608. .word 42839
  609. .word 60780
  610. .word 16346
  611. .word 0
  612. .word 21250
  613. .word 19076
  614. .word 27901
  615. .word 57780
  616. .word 49097
  617. .word 0
  618. .word 9076
  619. .word 49244
  620. .word 613
  621. .word 64083
  622. .word 16311
  623. .word 0
  624. .word 40572
  625. .word 30418
  626. .word 36251
  627. .word 46520
  628. .word 49061
  629. .word 0
  630. .word 3227
  631. .word 25505
  632. .word 5540
  633. .word 47626
  634. .word 16274
  635. .word 0
  636. .word 60933
  637. .word 3300
  638. .word 57416
  639. .word 36218
  640. .word 49023
  641. .word 0
  642. .word 45811
  643. .word 42646
  644. .word 37125
  645. .word 42185
  646. .word 16235
  647. .word 0
  648. .type _sindl_poly_coeff,@object
  649. .size _sindl_poly_coeff,108
  650. .space 4, 0x00 # pad
  651. .align 16
  652. _sindl_mp_poly_coeff:
  653. .long 2723323193
  654. .long 1066524486
  655. .long 2863989530
  656. .long 1008058840
  657. .long 227815288
  658. .long 3199056770
  659. .long 3752327299
  660. .long 3142458725
  661. .type _sindl_mp_poly_coeff,@object
  662. .size _sindl_mp_poly_coeff,32
  663. .align 4
  664. ones:
  665. .long 0x3f800000
  666. .long 0xbf800000
  667. .type ones,@object
  668. .size ones,8
  669. .align 4
  670. _TWO_53H:
  671. .long 0
  672. .long 1128792064
  673. .type _TWO_53H,@object
  674. .size _TWO_53H,8
  675. .align 2
  676. _Rcp90:
  677. .word 46603
  678. .word 2912
  679. .word 24758
  680. .word 46603
  681. .word 16376
  682. .word 0
  683. .type _Rcp90,@object
  684. .size _Rcp90,12
  685. .data
  686. .hidden __libm_sindl_cosdl_table
  687. .section .note.GNU-stack, ""
  688. # End