sincosf_gen.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "sincosf_gen.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin sincosf
  41. .text
  42. .align 16,0x90
  43. .globl sincosf
  44. sincosf:
  45. # parameter 1: %xmm0
  46. # parameter 2: %rdi
  47. # parameter 3: %rsi
  48. ..B1.1:
  49. .cfi_startproc
  50. ..___tag_value_sincosf.1:
  51. ..L2:
  52. subq $40, %rsp
  53. .cfi_def_cfa_offset 48
  54. movq %rdi, (%rsp)
  55. movq %rsi, 8(%rsp)
  56. ..B1.2:
  57. pextrw $1, %xmm0, %eax
  58. andw $32767, %ax
  59. subw $128, %ax
  60. cmpw $17791, %ax
  61. ja .L_2TAG_PACKET_0.0.1
  62. movq PI_32_RECIP(%rip), %xmm1
  63. mulss %xmm0, %xmm1
  64. movapd BIT_31(%rip), %xmm3
  65. cvtss2si %xmm1, %edx
  66. movl %edx, %ecx
  67. andpd %xmm0, %xmm3
  68. cvtss2sd %xmm0, %xmm0
  69. movlhps %xmm0, %xmm0
  70. addss SHIFTER(%rip), %xmm1
  71. subss SHIFTER(%rip), %xmm1
  72. movapd %xmm3, %xmm4
  73. psllq $32, %xmm3
  74. psllq $29, %xmm4
  75. xorpd %xmm4, %xmm3
  76. addl $1865216, %edx
  77. shll $4, %edx
  78. addl $1865232, %ecx
  79. shll $4, %ecx
  80. movq P_1(%rip), %xmm2
  81. xorpd %xmm3, %xmm2
  82. xorpd P_2(%rip), %xmm3
  83. movlhps %xmm2, %xmm2
  84. movlhps %xmm3, %xmm3
  85. lea Ctable(%rip), %rax
  86. andq $1008, %rdx
  87. andq $1008, %rcx
  88. addq %rax, %rcx
  89. addq %rdx, %rax
  90. psllq $29, %xmm1
  91. movlhps %xmm1, %xmm1
  92. mulpd %xmm1, %xmm2
  93. mulpd %xmm1, %xmm3
  94. subpd %xmm2, %xmm0
  95. addpd %xmm0, %xmm3
  96. mulpd %xmm0, %xmm0
  97. movapd C_1(%rip), %xmm4
  98. movapd S_1(%rip), %xmm5
  99. movapd ONE(%rip), %xmm1
  100. mulpd %xmm0, %xmm4
  101. mulpd %xmm0, %xmm5
  102. movapd C_2(%rip), %xmm6
  103. movapd S_2(%rip), %xmm7
  104. mulpd %xmm0, %xmm0
  105. addpd %xmm1, %xmm4
  106. mulpd %xmm0, %xmm6
  107. addpd %xmm1, %xmm5
  108. mulpd %xmm0, %xmm7
  109. addpd %xmm6, %xmm4
  110. addpd %xmm7, %xmm5
  111. movq (%rax), %xmm2
  112. movq 8(%rax), %xmm1
  113. movhpd (%rcx), %xmm2
  114. movhpd 8(%rcx), %xmm1
  115. mulpd %xmm1, %xmm3
  116. mulpd %xmm4, %xmm2
  117. mulpd %xmm5, %xmm3
  118. addpd %xmm3, %xmm2
  119. cvtpd2ps %xmm2, %xmm0
  120. movq (%rsp), %rax
  121. movss %xmm0, (%rax)
  122. movq 8(%rsp), %rax
  123. pshufd $1, %xmm0, %xmm0
  124. movss %xmm0, (%rax)
  125. jmp ..B1.7
  126. .L_2TAG_PACKET_0.0.1:
  127. jg .L_2TAG_PACKET_1.0.1
  128. cvtss2sd %xmm0, %xmm0
  129. movq ONE(%rip), %xmm1
  130. movq %xmm0, %xmm2
  131. mulsd ONE_M230(%rip), %xmm0
  132. mulsd %xmm2, %xmm2
  133. subsd %xmm2, %xmm1
  134. cvtsd2ss %xmm0, %xmm0
  135. cvtsd2ss %xmm1, %xmm1
  136. movq (%rsp), %rax
  137. movss %xmm0, (%rax)
  138. movq 8(%rsp), %rax
  139. movss %xmm1, (%rax)
  140. jmp ..B1.7
  141. .L_2TAG_PACKET_1.0.1:
  142. movd %xmm0, %eax
  143. andl $2139095040, %eax
  144. cmpl $2139095040, %eax
  145. je .L_2TAG_PACKET_2.0.1
  146. cvtss2sd %xmm0, %xmm0
  147. movq %xmm0, 16(%rsp)
  148. ..B1.3:
  149. movsd 16(%rsp), %xmm0
  150. ..___tag_value_sincosf.4:
  151. call __libm_sse2_sincos@PLT
  152. ..___tag_value_sincosf.5:
  153. ..B1.4:
  154. movsd %xmm0, 24(%rsp)
  155. ..B1.5:
  156. movq 24(%rsp), %xmm0
  157. cvtsd2ss %xmm0, %xmm0
  158. cvtsd2ss %xmm1, %xmm1
  159. movq (%rsp), %rax
  160. movss %xmm0, (%rax)
  161. movq 8(%rsp), %rax
  162. movss %xmm1, (%rax)
  163. jmp ..B1.7
  164. .L_2TAG_PACKET_2.0.1:
  165. pxor %xmm1, %xmm1
  166. mulss %xmm1, %xmm0
  167. movq (%rsp), %rax
  168. movss %xmm0, (%rax)
  169. movq 8(%rsp), %rax
  170. movss %xmm0, (%rax)
  171. .L_2TAG_PACKET_3.0.1:
  172. ..B1.7:
  173. addq $40, %rsp
  174. .cfi_def_cfa_offset 8
  175. ret
  176. .align 16,0x90
  177. .cfi_endproc
  178. .type sincosf,@function
  179. .size sincosf,.-sincosf
  180. .data
  181. # -- End sincosf
  182. .section .rodata, "a"
  183. .align 16
  184. .align 16
  185. BIT_31:
  186. .long 2147483648
  187. .long 0
  188. .long 0
  189. .long 0
  190. .type BIT_31,@object
  191. .size BIT_31,16
  192. .align 16
  193. P_1:
  194. .long 1413758976
  195. .long 2008621563
  196. .type P_1,@object
  197. .size P_1,8
  198. .space 8, 0x00 # pad
  199. .align 16
  200. P_2:
  201. .long 1734816687
  202. .long 1966270393
  203. .type P_2,@object
  204. .size P_2,8
  205. .space 8, 0x00 # pad
  206. .align 16
  207. Ctable:
  208. .long 0
  209. .long 0
  210. .long 0
  211. .long 1072693248
  212. .long 3156849708
  213. .long 1069094822
  214. .long 2748392742
  215. .long 1072683149
  216. .long 1013556747
  217. .long 1070135480
  218. .long 3489094832
  219. .long 1072652951
  220. .long 785751814
  221. .long 1070765062
  222. .long 1455828442
  223. .long 1072602945
  224. .long 2796464483
  225. .long 1071152610
  226. .long 3476196678
  227. .long 1072533611
  228. .long 939980347
  229. .long 1071524701
  230. .long 4051746225
  231. .long 1072445618
  232. .long 967731400
  233. .long 1071761211
  234. .long 688824739
  235. .long 1072339814
  236. .long 621354454
  237. .long 1071926515
  238. .long 1796544321
  239. .long 1072217216
  240. .long 1719614413
  241. .long 1072079006
  242. .long 1719614413
  243. .long 1072079006
  244. .long 1796544321
  245. .long 1072217216
  246. .long 621354454
  247. .long 1071926515
  248. .long 688824739
  249. .long 1072339814
  250. .long 967731400
  251. .long 1071761211
  252. .long 4051746225
  253. .long 1072445618
  254. .long 939980347
  255. .long 1071524701
  256. .long 3476196678
  257. .long 1072533611
  258. .long 2796464483
  259. .long 1071152610
  260. .long 1455828442
  261. .long 1072602945
  262. .long 785751814
  263. .long 1070765062
  264. .long 3489094832
  265. .long 1072652951
  266. .long 1013556747
  267. .long 1070135480
  268. .long 2748392742
  269. .long 1072683149
  270. .long 3156849708
  271. .long 1069094822
  272. .long 0
  273. .long 1072693248
  274. .long 0
  275. .long 0
  276. .long 2748392742
  277. .long 1072683149
  278. .long 3156849708
  279. .long 3216578470
  280. .long 3489094832
  281. .long 1072652951
  282. .long 1013556747
  283. .long 3217619128
  284. .long 1455828442
  285. .long 1072602945
  286. .long 785751814
  287. .long 3218248710
  288. .long 3476196678
  289. .long 1072533611
  290. .long 2796464483
  291. .long 3218636258
  292. .long 4051746225
  293. .long 1072445618
  294. .long 939980347
  295. .long 3219008349
  296. .long 688824739
  297. .long 1072339814
  298. .long 967731400
  299. .long 3219244859
  300. .long 1796544321
  301. .long 1072217216
  302. .long 621354454
  303. .long 3219410163
  304. .long 1719614413
  305. .long 1072079006
  306. .long 1719614413
  307. .long 3219562654
  308. .long 621354454
  309. .long 1071926515
  310. .long 1796544321
  311. .long 3219700864
  312. .long 967731400
  313. .long 1071761211
  314. .long 688824739
  315. .long 3219823462
  316. .long 939980347
  317. .long 1071524701
  318. .long 4051746225
  319. .long 3219929266
  320. .long 2796464483
  321. .long 1071152610
  322. .long 3476196678
  323. .long 3220017259
  324. .long 785751814
  325. .long 1070765062
  326. .long 1455828442
  327. .long 3220086593
  328. .long 1013556747
  329. .long 1070135480
  330. .long 3489094832
  331. .long 3220136599
  332. .long 3156849708
  333. .long 1069094822
  334. .long 2748392742
  335. .long 3220166797
  336. .long 0
  337. .long 0
  338. .long 0
  339. .long 3220176896
  340. .long 3156849708
  341. .long 3216578470
  342. .long 2748392742
  343. .long 3220166797
  344. .long 1013556747
  345. .long 3217619128
  346. .long 3489094832
  347. .long 3220136599
  348. .long 785751814
  349. .long 3218248710
  350. .long 1455828442
  351. .long 3220086593
  352. .long 2796464483
  353. .long 3218636258
  354. .long 3476196678
  355. .long 3220017259
  356. .long 939980347
  357. .long 3219008349
  358. .long 4051746225
  359. .long 3219929266
  360. .long 967731400
  361. .long 3219244859
  362. .long 688824739
  363. .long 3219823462
  364. .long 621354454
  365. .long 3219410163
  366. .long 1796544321
  367. .long 3219700864
  368. .long 1719614413
  369. .long 3219562654
  370. .long 1719614413
  371. .long 3219562654
  372. .long 1796544321
  373. .long 3219700864
  374. .long 621354454
  375. .long 3219410163
  376. .long 688824739
  377. .long 3219823462
  378. .long 967731400
  379. .long 3219244859
  380. .long 4051746225
  381. .long 3219929266
  382. .long 939980347
  383. .long 3219008349
  384. .long 3476196678
  385. .long 3220017259
  386. .long 2796464483
  387. .long 3218636258
  388. .long 1455828442
  389. .long 3220086593
  390. .long 785751814
  391. .long 3218248710
  392. .long 3489094832
  393. .long 3220136599
  394. .long 1013556747
  395. .long 3217619128
  396. .long 2748392742
  397. .long 3220166797
  398. .long 3156849708
  399. .long 3216578470
  400. .long 0
  401. .long 3220176896
  402. .long 0
  403. .long 0
  404. .long 2748392742
  405. .long 3220166797
  406. .long 3156849708
  407. .long 1069094822
  408. .long 3489094832
  409. .long 3220136599
  410. .long 1013556747
  411. .long 1070135480
  412. .long 1455828442
  413. .long 3220086593
  414. .long 785751814
  415. .long 1070765062
  416. .long 3476196678
  417. .long 3220017259
  418. .long 2796464483
  419. .long 1071152610
  420. .long 4051746225
  421. .long 3219929266
  422. .long 939980347
  423. .long 1071524701
  424. .long 688824739
  425. .long 3219823462
  426. .long 967731400
  427. .long 1071761211
  428. .long 1796544321
  429. .long 3219700864
  430. .long 621354454
  431. .long 1071926515
  432. .long 1719614413
  433. .long 3219562654
  434. .long 1719614413
  435. .long 1072079006
  436. .long 621354454
  437. .long 3219410163
  438. .long 1796544321
  439. .long 1072217216
  440. .long 967731400
  441. .long 3219244859
  442. .long 688824739
  443. .long 1072339814
  444. .long 939980347
  445. .long 3219008349
  446. .long 4051746225
  447. .long 1072445618
  448. .long 2796464483
  449. .long 3218636258
  450. .long 3476196678
  451. .long 1072533611
  452. .long 785751814
  453. .long 3218248710
  454. .long 1455828442
  455. .long 1072602945
  456. .long 1013556747
  457. .long 3217619128
  458. .long 3489094832
  459. .long 1072652951
  460. .long 3156849708
  461. .long 3216578470
  462. .long 2748392742
  463. .long 1072683149
  464. .type Ctable,@object
  465. .size Ctable,1024
  466. .align 16
  467. C_1:
  468. .long 0
  469. .long 3219128320
  470. .long 0
  471. .long 3219128320
  472. .type C_1,@object
  473. .size C_1,16
  474. .align 16
  475. S_1:
  476. .long 1431655765
  477. .long 3217380693
  478. .long 1431655765
  479. .long 3217380693
  480. .type S_1,@object
  481. .size S_1,16
  482. .align 16
  483. ONE:
  484. .long 0
  485. .long 1072693248
  486. .long 0
  487. .long 1072693248
  488. .type ONE,@object
  489. .size ONE,16
  490. .align 16
  491. C_2:
  492. .long 1431655765
  493. .long 1067799893
  494. .long 1431655765
  495. .long 1067799893
  496. .type C_2,@object
  497. .size C_2,16
  498. .align 16
  499. S_2:
  500. .long 286331153
  501. .long 1065423121
  502. .long 286331153
  503. .long 1065423121
  504. .type S_2,@object
  505. .size S_2,16
  506. .align 8
  507. PI_32_RECIP:
  508. .long 1092811139
  509. .long 0
  510. .type PI_32_RECIP,@object
  511. .size PI_32_RECIP,8
  512. .align 8
  513. SHIFTER:
  514. .long 1262485504
  515. .type SHIFTER,@object
  516. .size SHIFTER,4
  517. .space 4, 0x00 # pad
  518. .align 8
  519. ONE_M230:
  520. .long 4286578688
  521. .long 1072693247
  522. .type ONE_M230,@object
  523. .size ONE_M230,8
  524. .data
  525. .section .note.GNU-stack, ""
  526. // -- Begin DWARF2 SEGMENT .eh_frame
  527. .section .eh_frame,"a",@progbits
  528. .eh_frame_seg:
  529. .align 1
  530. # End