exp_gen.S 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "exp_gen.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin exp
  41. .text
  42. .align 16,0x90
  43. .globl exp
  44. exp:
  45. # parameter 1: %xmm0
  46. ..B1.1:
  47. .cfi_startproc
  48. ..___tag_value_exp.1:
  49. ..L2:
  50. subq $24, %rsp
  51. .cfi_def_cfa_offset 32
  52. movsd %xmm0, 8(%rsp)
  53. ..B1.2:
  54. unpcklpd %xmm0, %xmm0
  55. movapd cv(%rip), %xmm1
  56. movapd Shifter(%rip), %xmm6
  57. movapd 16+cv(%rip), %xmm2
  58. movapd 32+cv(%rip), %xmm3
  59. pextrw $3, %xmm0, %eax
  60. andl $32767, %eax
  61. movl $16527, %edx
  62. subl %eax, %edx
  63. subl $15504, %eax
  64. orl %eax, %edx
  65. cmpl $-2147483648, %edx
  66. jae .L_2TAG_PACKET_0.0.2
  67. lea Tbl_addr(%rip), %r8
  68. mulpd %xmm0, %xmm1
  69. addpd %xmm6, %xmm1
  70. movapd %xmm1, %xmm7
  71. subpd %xmm6, %xmm1
  72. mulpd %xmm1, %xmm2
  73. movapd 64+cv(%rip), %xmm4
  74. mulpd %xmm1, %xmm3
  75. movapd 80+cv(%rip), %xmm5
  76. subpd %xmm2, %xmm0
  77. movd %xmm7, %eax
  78. movl %eax, %ecx
  79. andl $63, %ecx
  80. shll $4, %ecx
  81. sarl $6, %eax
  82. movl %eax, %edx
  83. movdqa mmask(%rip), %xmm6
  84. pand %xmm6, %xmm7
  85. movdqa bias(%rip), %xmm6
  86. paddq %xmm6, %xmm7
  87. psllq $46, %xmm7
  88. subpd %xmm3, %xmm0
  89. movapd (%rcx,%r8), %xmm2
  90. mulpd %xmm0, %xmm4
  91. movapd %xmm0, %xmm6
  92. movapd %xmm0, %xmm1
  93. mulpd %xmm6, %xmm6
  94. mulpd %xmm6, %xmm0
  95. addpd %xmm4, %xmm5
  96. mulsd %xmm6, %xmm0
  97. mulpd 48+cv(%rip), %xmm6
  98. addsd %xmm2, %xmm1
  99. unpckhpd %xmm2, %xmm2
  100. mulpd %xmm5, %xmm0
  101. addsd %xmm0, %xmm1
  102. orpd %xmm7, %xmm2
  103. unpckhpd %xmm0, %xmm0
  104. addsd %xmm1, %xmm0
  105. addsd %xmm6, %xmm0
  106. addl $894, %edx
  107. cmpl $1916, %edx
  108. ja .L_2TAG_PACKET_1.0.2
  109. mulsd %xmm2, %xmm0
  110. addsd %xmm2, %xmm0
  111. jmp ..B1.5
  112. .L_2TAG_PACKET_1.0.2:
  113. xorpd %xmm3, %xmm3
  114. movapd ALLONES(%rip), %xmm4
  115. movl $-1022, %edx
  116. subl %eax, %edx
  117. movd %edx, %xmm5
  118. psllq %xmm5, %xmm4
  119. movl %eax, %ecx
  120. sarl $1, %eax
  121. pinsrw $3, %eax, %xmm3
  122. movapd ebias(%rip), %xmm6
  123. psllq $4, %xmm3
  124. psubd %xmm3, %xmm2
  125. mulsd %xmm2, %xmm0
  126. cmpl $52, %edx
  127. jg .L_2TAG_PACKET_2.0.2
  128. andpd %xmm2, %xmm4
  129. paddd %xmm6, %xmm3
  130. subsd %xmm4, %xmm2
  131. addsd %xmm2, %xmm0
  132. cmpl $1023, %ecx
  133. jge .L_2TAG_PACKET_3.0.2
  134. pextrw $3, %xmm0, %ecx
  135. andl $32768, %ecx
  136. orl %ecx, %edx
  137. cmpl $0, %edx
  138. je .L_2TAG_PACKET_4.0.2
  139. movapd %xmm0, %xmm6
  140. addsd %xmm4, %xmm0
  141. mulsd %xmm3, %xmm0
  142. pextrw $3, %xmm0, %ecx
  143. andl $32752, %ecx
  144. cmpl $0, %ecx
  145. je .L_2TAG_PACKET_5.0.2
  146. jmp ..B1.5
  147. .L_2TAG_PACKET_5.0.2:
  148. mulsd %xmm3, %xmm6
  149. mulsd %xmm3, %xmm4
  150. movsd %xmm6, %xmm0
  151. pxor %xmm4, %xmm6
  152. psrad $31, %xmm6
  153. pshufd $85, %xmm6, %xmm6
  154. psllq $1, %xmm0
  155. psrlq $1, %xmm0
  156. pxor %xmm6, %xmm0
  157. psrlq $63, %xmm6
  158. paddq %xmm6, %xmm0
  159. paddq %xmm4, %xmm0
  160. movl $15, (%rsp)
  161. jmp .L_2TAG_PACKET_6.0.2
  162. .L_2TAG_PACKET_4.0.2:
  163. addsd %xmm4, %xmm0
  164. mulsd %xmm3, %xmm0
  165. jmp ..B1.5
  166. .L_2TAG_PACKET_3.0.2:
  167. addsd %xmm4, %xmm0
  168. mulsd %xmm3, %xmm0
  169. pextrw $3, %xmm0, %ecx
  170. andl $32752, %ecx
  171. cmpl $32752, %ecx
  172. jnb .L_2TAG_PACKET_7.0.2
  173. jmp ..B1.5
  174. .L_2TAG_PACKET_2.0.2:
  175. paddd %xmm6, %xmm3
  176. addpd %xmm2, %xmm0
  177. mulsd %xmm3, %xmm0
  178. movl $15, (%rsp)
  179. jmp .L_2TAG_PACKET_6.0.2
  180. .L_2TAG_PACKET_8.0.2:
  181. cmpl $2146435072, %eax
  182. jae .L_2TAG_PACKET_9.0.2
  183. movl 12(%rsp), %eax
  184. cmpl $-2147483648, %eax
  185. jae .L_2TAG_PACKET_10.0.2
  186. movsd XMAX(%rip), %xmm0
  187. mulsd %xmm0, %xmm0
  188. .L_2TAG_PACKET_7.0.2:
  189. movl $14, (%rsp)
  190. jmp .L_2TAG_PACKET_6.0.2
  191. .L_2TAG_PACKET_10.0.2:
  192. movsd XMIN(%rip), %xmm0
  193. mulsd %xmm0, %xmm0
  194. movl $15, (%rsp)
  195. jmp .L_2TAG_PACKET_6.0.2
  196. .L_2TAG_PACKET_9.0.2:
  197. movl 8(%rsp), %edx
  198. cmpl $2146435072, %eax
  199. ja .L_2TAG_PACKET_11.0.2
  200. cmpl $0, %edx
  201. jne .L_2TAG_PACKET_11.0.2
  202. movl 12(%rsp), %eax
  203. cmpl $2146435072, %eax
  204. jne .L_2TAG_PACKET_12.0.2
  205. movsd INF(%rip), %xmm0
  206. jmp ..B1.5
  207. .L_2TAG_PACKET_12.0.2:
  208. movsd ZERO(%rip), %xmm0
  209. jmp ..B1.5
  210. .L_2TAG_PACKET_11.0.2:
  211. movsd 8(%rsp), %xmm0
  212. addsd %xmm0, %xmm0
  213. jmp ..B1.5
  214. .L_2TAG_PACKET_0.0.2:
  215. movl 12(%rsp), %eax
  216. andl $2147483647, %eax
  217. cmpl $1083179008, %eax
  218. jae .L_2TAG_PACKET_8.0.2
  219. movsd 8(%rsp), %xmm0
  220. addsd ONE_val(%rip), %xmm0
  221. jmp ..B1.5
  222. .L_2TAG_PACKET_6.0.2:
  223. movsd %xmm0, 16(%rsp)
  224. ..B1.3:
  225. movq 16(%rsp), %xmm0
  226. .L_2TAG_PACKET_13.0.2:
  227. ..B1.5:
  228. addq $24, %rsp
  229. .cfi_def_cfa_offset 8
  230. ret
  231. .align 16,0x90
  232. .cfi_endproc
  233. .type exp,@function
  234. .size exp,.-exp
  235. .data
  236. # -- End exp
  237. .section .rodata, "a"
  238. .align 16
  239. .align 16
  240. cv:
  241. .long 1697350398
  242. .long 1079448903
  243. .long 1697350398
  244. .long 1079448903
  245. .long 4277796864
  246. .long 1065758274
  247. .long 4277796864
  248. .long 1065758274
  249. .long 3164486458
  250. .long 1025308570
  251. .long 3164486458
  252. .long 1025308570
  253. .long 4294967294
  254. .long 1071644671
  255. .long 4294967294
  256. .long 1071644671
  257. .long 3811088480
  258. .long 1062650204
  259. .long 1432067621
  260. .long 1067799893
  261. .long 3230715663
  262. .long 1065423125
  263. .long 1431604129
  264. .long 1069897045
  265. .type cv,@object
  266. .size cv,96
  267. .align 16
  268. Shifter:
  269. .long 0
  270. .long 1127743488
  271. .long 0
  272. .long 1127743488
  273. .type Shifter,@object
  274. .size Shifter,16
  275. .align 16
  276. Tbl_addr:
  277. .long 0
  278. .long 0
  279. .long 0
  280. .long 0
  281. .long 235107661
  282. .long 1018002367
  283. .long 1048019040
  284. .long 11418
  285. .long 896005651
  286. .long 1015861842
  287. .long 3541402996
  288. .long 22960
  289. .long 1642514529
  290. .long 1012987726
  291. .long 410360776
  292. .long 34629
  293. .long 1568897900
  294. .long 1016568486
  295. .long 1828292879
  296. .long 46424
  297. .long 1882168529
  298. .long 1010744893
  299. .long 852742562
  300. .long 58348
  301. .long 509852888
  302. .long 1017336174
  303. .long 3490863952
  304. .long 70401
  305. .long 653277307
  306. .long 1017431380
  307. .long 2930322911
  308. .long 82586
  309. .long 1649557430
  310. .long 1017729363
  311. .long 1014845818
  312. .long 94904
  313. .long 1058231231
  314. .long 1015777676
  315. .long 3949972341
  316. .long 107355
  317. .long 1044000607
  318. .long 1016786167
  319. .long 828946858
  320. .long 119943
  321. .long 1151779725
  322. .long 1015705409
  323. .long 2288159958
  324. .long 132667
  325. .long 3819481236
  326. .long 1016499965
  327. .long 1853186616
  328. .long 145530
  329. .long 2552227826
  330. .long 1015039787
  331. .long 1709341917
  332. .long 158533
  333. .long 1829350193
  334. .long 1015216097
  335. .long 4112506593
  336. .long 171677
  337. .long 1913391795
  338. .long 1015756674
  339. .long 2799960843
  340. .long 184965
  341. .long 1303423926
  342. .long 1015238005
  343. .long 171030293
  344. .long 198398
  345. .long 1574172746
  346. .long 1016061241
  347. .long 2992903935
  348. .long 211976
  349. .long 3424156969
  350. .long 1017196428
  351. .long 926591434
  352. .long 225703
  353. .long 1938513547
  354. .long 1017631273
  355. .long 887463926
  356. .long 239579
  357. .long 2804567149
  358. .long 1015390024
  359. .long 1276261410
  360. .long 253606
  361. .long 631083525
  362. .long 1017690182
  363. .long 569847337
  364. .long 267786
  365. .long 1623370770
  366. .long 1011049453
  367. .long 1617004845
  368. .long 282120
  369. .long 3667985273
  370. .long 1013894369
  371. .long 3049340112
  372. .long 296610
  373. .long 3145379760
  374. .long 1014403278
  375. .long 3577096743
  376. .long 311258
  377. .long 2603100681
  378. .long 1017152460
  379. .long 1990012070
  380. .long 326066
  381. .long 3249202951
  382. .long 1017448880
  383. .long 1453150081
  384. .long 341035
  385. .long 419288974
  386. .long 1016280325
  387. .long 917841882
  388. .long 356167
  389. .long 3793507337
  390. .long 1016095713
  391. .long 3712504873
  392. .long 371463
  393. .long 728023093
  394. .long 1016345318
  395. .long 363667784
  396. .long 386927
  397. .long 2582678538
  398. .long 1017123460
  399. .long 2956612996
  400. .long 402558
  401. .long 7592966
  402. .long 1016721543
  403. .long 2186617380
  404. .long 418360
  405. .long 228611441
  406. .long 1016696141
  407. .long 1719614412
  408. .long 434334
  409. .long 2261665670
  410. .long 1017457593
  411. .long 1013258798
  412. .long 450482
  413. .long 544148907
  414. .long 1017323666
  415. .long 3907805043
  416. .long 466805
  417. .long 2383914918
  418. .long 1017143586
  419. .long 1447192520
  420. .long 483307
  421. .long 1176412038
  422. .long 1017267372
  423. .long 1944781190
  424. .long 499988
  425. .long 2882956373
  426. .long 1013312481
  427. .long 919555682
  428. .long 516851
  429. .long 3154077648
  430. .long 1016528543
  431. .long 2571947538
  432. .long 533897
  433. .long 348651999
  434. .long 1016405780
  435. .long 2604962540
  436. .long 551129
  437. .long 3253791412
  438. .long 1015920431
  439. .long 1110089947
  440. .long 568549
  441. .long 1509121860
  442. .long 1014756995
  443. .long 2568320822
  444. .long 586158
  445. .long 2617649212
  446. .long 1017340090
  447. .long 2966275556
  448. .long 603959
  449. .long 553214634
  450. .long 1016457425
  451. .long 2682146383
  452. .long 621954
  453. .long 730975783
  454. .long 1014083580
  455. .long 2191782032
  456. .long 640145
  457. .long 1486499517
  458. .long 1016818996
  459. .long 2069751140
  460. .long 658534
  461. .long 2595788928
  462. .long 1016407932
  463. .long 2990417244
  464. .long 677123
  465. .long 1853053619
  466. .long 1015310724
  467. .long 1434058175
  468. .long 695915
  469. .long 2462790535
  470. .long 1015814775
  471. .long 2572866477
  472. .long 714911
  473. .long 3693944214
  474. .long 1017259110
  475. .long 3092190714
  476. .long 734114
  477. .long 2979333550
  478. .long 1017188654
  479. .long 4076559942
  480. .long 753526
  481. .long 174054861
  482. .long 1014300631
  483. .long 2420883922
  484. .long 773150
  485. .long 816778419
  486. .long 1014197934
  487. .long 3716502172
  488. .long 792987
  489. .long 3507050924
  490. .long 1015341199
  491. .long 777507147
  492. .long 813041
  493. .long 1821514088
  494. .long 1013410604
  495. .long 3706687593
  496. .long 833312
  497. .long 920623539
  498. .long 1016295433
  499. .long 1242007931
  500. .long 853805
  501. .long 2789017511
  502. .long 1014276997
  503. .long 3707479175
  504. .long 874520
  505. .long 3586233004
  506. .long 1015962192
  507. .long 64696965
  508. .long 895462
  509. .long 474650514
  510. .long 1016642419
  511. .long 863738718
  512. .long 916631
  513. .long 1614448851
  514. .long 1014281732
  515. .long 3884662774
  516. .long 938030
  517. .long 2450082086
  518. .long 1016164135
  519. .long 2728693977
  520. .long 959663
  521. .long 1101668360
  522. .long 1015989180
  523. .long 3999357479
  524. .long 981531
  525. .long 835814894
  526. .long 1015702697
  527. .long 1533953344
  528. .long 1003638
  529. .long 1301400989
  530. .long 1014466875
  531. .long 2174652632
  532. .long 1025985
  533. .type Tbl_addr,@object
  534. .size Tbl_addr,1024
  535. .align 16
  536. mmask:
  537. .long 4294967232
  538. .long 0
  539. .long 4294967232
  540. .long 0
  541. .type mmask,@object
  542. .size mmask,16
  543. .align 16
  544. bias:
  545. .long 65472
  546. .long 0
  547. .long 65472
  548. .long 0
  549. .type bias,@object
  550. .size bias,16
  551. .align 16
  552. ALLONES:
  553. .long 4294967295
  554. .long 4294967295
  555. .long 4294967295
  556. .long 4294967295
  557. .type ALLONES,@object
  558. .size ALLONES,16
  559. .align 16
  560. ebias:
  561. .long 0
  562. .long 1072693248
  563. .long 0
  564. .long 1072693248
  565. .type ebias,@object
  566. .size ebias,16
  567. .align 4
  568. XMAX:
  569. .long 4294967295
  570. .long 2146435071
  571. .type XMAX,@object
  572. .size XMAX,8
  573. .align 4
  574. XMIN:
  575. .long 0
  576. .long 1048576
  577. .type XMIN,@object
  578. .size XMIN,8
  579. .align 4
  580. INF:
  581. .long 0
  582. .long 2146435072
  583. .type INF,@object
  584. .size INF,8
  585. .align 4
  586. ZERO:
  587. .long 0
  588. .long 0
  589. .type ZERO,@object
  590. .size ZERO,8
  591. .align 4
  592. ONE_val:
  593. .long 0
  594. .long 1072693248
  595. .type ONE_val,@object
  596. .size ONE_val,8
  597. .data
  598. .section .note.GNU-stack, ""
  599. // -- Begin DWARF2 SEGMENT .eh_frame
  600. .section .eh_frame,"a",@progbits
  601. .eh_frame_seg:
  602. .align 1
  603. # End