expm1_wmt.S 17 KB


  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "expm1_wmt.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin static_func
  41. .text
  42. .align 16,0x90
  43. static_func:
  44. ..B1.1:
  45. ..L1:
  46. call ..L2
  47. ..L2:
  48. popl %eax
  49. lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
  50. lea static_const_table@GOTOFF(%eax), %eax
  51. ret
  52. .align 16,0x90
  53. .type static_func,@function
  54. .size static_func,.-static_func
  55. .data
  56. # -- End static_func
  57. .text
  58. # -- Begin expm1
  59. .text
  60. .align 16,0x90
  61. .globl expm1
  62. expm1:
  63. # parameter 1: 8 + %ebp
  64. ..B2.1:
  65. ..L3:
  66. ..B2.2:
  67. pushl %ebp
  68. movl %esp, %ebp
  69. subl $120, %esp
  70. movl %ebx, 64(%esp)
  71. call static_func
  72. movl %eax, %ebx
  73. movsd 128(%esp), %xmm0
  74. unpcklpd %xmm0, %xmm0
  75. movapd 64(%ebx), %xmm1
  76. movapd 48(%ebx), %xmm6
  77. movapd 80(%ebx), %xmm2
  78. movapd 96(%ebx), %xmm3
  79. pextrw $3, %xmm0, %eax
  80. andl $32767, %eax
  81. movl $16527, %edx
  82. subl %eax, %edx
  83. subl $16304, %eax
  84. orl %eax, %edx
  85. cmpl $-2147483648, %edx
  86. jae .L_2TAG_PACKET_0.0.3
  87. mulpd %xmm0, %xmm1
  88. addpd %xmm6, %xmm1
  89. movapd %xmm1, %xmm7
  90. subpd %xmm6, %xmm1
  91. mulpd %xmm1, %xmm2
  92. movapd 112(%ebx), %xmm4
  93. mulpd %xmm1, %xmm3
  94. movapd 128(%ebx), %xmm5
  95. subpd %xmm2, %xmm0
  96. movd %xmm7, %eax
  97. movl %eax, %ecx
  98. andl $63, %ecx
  99. shll $4, %ecx
  100. sarl $6, %eax
  101. movl %eax, %edx
  102. subpd %xmm3, %xmm0
  103. movapd 160(%ebx,%ecx), %xmm2
  104. movsd 144(%ebx), %xmm3
  105. mulpd %xmm0, %xmm4
  106. movapd %xmm0, %xmm1
  107. mulpd %xmm0, %xmm0
  108. mulsd %xmm0, %xmm3
  109. addpd %xmm4, %xmm5
  110. mulsd %xmm0, %xmm0
  111. movapd %xmm2, %xmm4
  112. unpckhpd %xmm2, %xmm2
  113. movdqa 16(%ebx), %xmm6
  114. pand %xmm6, %xmm7
  115. movdqa 32(%ebx), %xmm6
  116. paddq %xmm6, %xmm7
  117. psllq $46, %xmm7
  118. mulsd %xmm0, %xmm3
  119. mulpd %xmm5, %xmm0
  120. addsd %xmm3, %xmm0
  121. addl $894, %edx
  122. cmpl $1916, %edx
  123. ja .L_2TAG_PACKET_1.0.3
  124. xorpd %xmm3, %xmm3
  125. movl $16368, %eax
  126. pinsrw $3, %eax, %xmm3
  127. orpd %xmm7, %xmm2
  128. mulsd %xmm4, %xmm7
  129. movapd %xmm3, %xmm6
  130. addsd %xmm1, %xmm3
  131. pextrw $3, %xmm2, %edx
  132. pshufd $238, %xmm0, %xmm5
  133. psrlq $38, %xmm3
  134. psllq $38, %xmm3
  135. movapd %xmm2, %xmm4
  136. subsd %xmm3, %xmm6
  137. addsd %xmm5, %xmm0
  138. addsd %xmm6, %xmm1
  139. addsd %xmm7, %xmm4
  140. mulsd %xmm3, %xmm7
  141. mulsd %xmm2, %xmm3
  142. xorpd %xmm5, %xmm5
  143. movl $16368, %eax
  144. pinsrw $3, %eax, %xmm5
  145. addsd %xmm1, %xmm0
  146. movl $17184, %ecx
  147. subl %edx, %ecx
  148. subl $16256, %edx
  149. orl %edx, %ecx
  150. jl .L_2TAG_PACKET_2.0.3
  151. mulsd %xmm4, %xmm0
  152. subsd %xmm5, %xmm3
  153. addsd %xmm7, %xmm0
  154. addsd %xmm3, %xmm0
  155. .L_2TAG_PACKET_3.0.3:
  156. jmp .L_2TAG_PACKET_4.0.3
  157. .L_2TAG_PACKET_2.0.3:
  158. cmpl $0, %edx
  159. jl .L_2TAG_PACKET_5.0.3
  160. mulsd %xmm4, %xmm0
  161. subsd %xmm5, %xmm7
  162. addsd %xmm7, %xmm0
  163. addsd %xmm3, %xmm0
  164. jmp .L_2TAG_PACKET_3.0.3
  165. .L_2TAG_PACKET_5.0.3:
  166. mulsd %xmm4, %xmm0
  167. addsd %xmm7, %xmm0
  168. addsd %xmm3, %xmm0
  169. subsd %xmm5, %xmm0
  170. jmp .L_2TAG_PACKET_3.0.3
  171. .L_2TAG_PACKET_1.0.3:
  172. movl 132(%esp), %ecx
  173. addsd %xmm0, %xmm1
  174. unpckhpd %xmm0, %xmm0
  175. addsd %xmm1, %xmm0
  176. cmpl $0, %ecx
  177. jl .L_2TAG_PACKET_6.0.3
  178. fstcw 24(%esp)
  179. movzwl 24(%esp), %edx
  180. orl $768, %edx
  181. movw %dx, 28(%esp)
  182. fldcw 28(%esp)
  183. movl %eax, %edx
  184. sarl $1, %eax
  185. subl %eax, %edx
  186. movdqa (%ebx), %xmm6
  187. pandn %xmm2, %xmm6
  188. addl $1023, %eax
  189. movd %eax, %xmm3
  190. psllq $52, %xmm3
  191. orpd %xmm3, %xmm6
  192. mulsd %xmm3, %xmm4
  193. movsd %xmm0, 8(%esp)
  194. fldl 8(%esp)
  195. movsd %xmm6, 16(%esp)
  196. fldl 16(%esp)
  197. movsd %xmm4, 16(%esp)
  198. fldl 16(%esp)
  199. addl $1023, %edx
  200. movd %edx, %xmm4
  201. psllq $52, %xmm4
  202. faddp %st, %st(1)
  203. fmul %st, %st(1)
  204. faddp %st, %st(1)
  205. movsd %xmm4, 8(%esp)
  206. fldl 8(%esp)
  207. fmulp %st, %st(1)
  208. fstpl 8(%esp)
  209. movsd 8(%esp), %xmm0
  210. fldcw 24(%esp)
  211. pextrw $3, %xmm0, %ecx
  212. andl $32752, %ecx
  213. cmpl $32752, %ecx
  214. jae .L_2TAG_PACKET_7.0.3
  215. jmp .L_2TAG_PACKET_4.0.3
  216. cmpl $-2147483648, %ecx
  217. jb .L_2TAG_PACKET_7.0.3
  218. jmp .L_2TAG_PACKET_4.0.3
  219. .L_2TAG_PACKET_7.0.3:
  220. movl $41, %edx
  221. .L_2TAG_PACKET_8.0.3:
  222. movsd %xmm0, (%esp)
  223. movsd 128(%esp), %xmm0
  224. subl $32, %esp
  225. lea 128(%esp), %eax
  226. movl %eax, (%esp)
  227. lea 128(%esp), %eax
  228. movl %eax, 4(%esp)
  229. lea 32(%esp), %eax
  230. movl %eax, 8(%esp)
  231. movl %edx, %eax
  232. movl %eax, 12(%esp)
  233. call __libm_error_support
  234. addl $32, %esp
  235. fldl (%esp)
  236. jmp .L_2TAG_PACKET_9.0.3
  237. .L_2TAG_PACKET_10.0.3:
  238. cmpl $2146435072, %eax
  239. jae .L_2TAG_PACKET_11.0.3
  240. movsd 1272(%ebx), %xmm0
  241. mulsd %xmm0, %xmm0
  242. movl $41, %edx
  243. jmp .L_2TAG_PACKET_8.0.3
  244. .L_2TAG_PACKET_11.0.3:
  245. movl 132(%esp), %eax
  246. movl 128(%esp), %edx
  247. movl %eax, %ecx
  248. andl $2147483647, %eax
  249. cmpl $2146435072, %eax
  250. ja .L_2TAG_PACKET_12.0.3
  251. cmpl $0, %edx
  252. jne .L_2TAG_PACKET_12.0.3
  253. cmpl $0, %ecx
  254. jl .L_2TAG_PACKET_13.0.3
  255. movsd 1256(%ebx), %xmm0
  256. jmp .L_2TAG_PACKET_4.0.3
  257. .L_2TAG_PACKET_13.0.3:
  258. jmp .L_2TAG_PACKET_6.0.3
  259. .L_2TAG_PACKET_12.0.3:
  260. movsd 128(%esp), %xmm0
  261. addsd %xmm0, %xmm0
  262. jmp .L_2TAG_PACKET_4.0.3
  263. .L_2TAG_PACKET_14.0.3:
  264. addl $16304, %eax
  265. cmpl $15504, %eax
  266. jb .L_2TAG_PACKET_15.0.3
  267. movapd 1184(%ebx), %xmm2
  268. pshufd $68, %xmm0, %xmm1
  269. movapd 1200(%ebx), %xmm3
  270. movapd 1216(%ebx), %xmm4
  271. movsd 1232(%ebx), %xmm5
  272. mulsd %xmm1, %xmm1
  273. xorpd %xmm6, %xmm6
  274. movl $16352, %eax
  275. pinsrw $3, %eax, %xmm6
  276. mulpd %xmm0, %xmm2
  277. xorpd %xmm7, %xmm7
  278. movl $16368, %edx
  279. pinsrw $3, %edx, %xmm7
  280. addpd %xmm3, %xmm2
  281. mulsd %xmm1, %xmm5
  282. pshufd $228, %xmm1, %xmm3
  283. mulpd %xmm1, %xmm1
  284. mulsd %xmm0, %xmm6
  285. mulpd %xmm0, %xmm2
  286. addpd %xmm4, %xmm2
  287. movapd %xmm7, %xmm4
  288. addsd %xmm6, %xmm7
  289. mulpd %xmm3, %xmm1
  290. psrlq $27, %xmm7
  291. psllq $27, %xmm7
  292. movsd 1288(%ebx), %xmm3
  293. subsd %xmm7, %xmm4
  294. mulpd %xmm1, %xmm2
  295. addsd %xmm4, %xmm6
  296. pshufd $238, %xmm2, %xmm1
  297. addsd %xmm2, %xmm6
  298. andpd %xmm0, %xmm3
  299. movapd %xmm0, %xmm4
  300. addsd %xmm6, %xmm1
  301. subsd %xmm3, %xmm0
  302. addsd %xmm5, %xmm1
  303. mulsd %xmm7, %xmm3
  304. mulsd %xmm7, %xmm0
  305. mulsd %xmm1, %xmm4
  306. addsd %xmm4, %xmm0
  307. addsd %xmm3, %xmm0
  308. jmp .L_2TAG_PACKET_4.0.3
  309. .L_2TAG_PACKET_15.0.3:
  310. cmpl $16, %eax
  311. jae .L_2TAG_PACKET_3.0.3
  312. movapd %xmm0, %xmm2
  313. movd %xmm0, %eax
  314. psrlq $31, %xmm2
  315. movd %xmm2, %ecx
  316. orl %ecx, %eax
  317. je .L_2TAG_PACKET_3.0.3
  318. movl $16, %edx
  319. xorpd %xmm1, %xmm1
  320. pinsrw $3, %edx, %xmm1
  321. mulsd %xmm1, %xmm1
  322. movl $42, %edx
  323. jmp .L_2TAG_PACKET_8.0.3
  324. .L_2TAG_PACKET_0.0.3:
  325. cmpl $0, %eax
  326. jl .L_2TAG_PACKET_14.0.3
  327. movl 132(%esp), %eax
  328. cmpl $1083179008, %eax
  329. jge .L_2TAG_PACKET_10.0.3
  330. cmpl $-1048576, %eax
  331. jae .L_2TAG_PACKET_11.0.3
  332. .L_2TAG_PACKET_6.0.3:
  333. xorpd %xmm0, %xmm0
  334. movl $49136, %eax
  335. pinsrw $3, %eax, %xmm0
  336. jmp .L_2TAG_PACKET_4.0.3
  337. .L_2TAG_PACKET_4.0.3:
  338. movsd %xmm0, 48(%esp)
  339. fldl 48(%esp)
  340. .L_2TAG_PACKET_9.0.3:
  341. movl 64(%esp), %ebx
  342. movl %ebp, %esp
  343. popl %ebp
  344. ret
  345. ..B2.3:
  346. .align 16,0x90
  347. .type expm1,@function
  348. .size expm1,.-expm1
  349. .data
  350. # -- End expm1
  351. .text
  352. # -- Begin __libm_error_support
  353. .text
  354. .align 16,0x90
  355. __libm_error_support:
  356. # parameter 1: 4 + %esp
  357. # parameter 2: 8 + %esp
  358. # parameter 3: 12 + %esp
  359. # parameter 4: 16 + %esp
  360. ..B3.1:
  361. ..L4:
  362. ret
  363. .align 16,0x90
  364. .type __libm_error_support,@function
  365. .size __libm_error_support,.-__libm_error_support
  366. .data
  367. # -- End __libm_error_support
  368. .section .rodata, "a"
  369. .align 16
  370. .align 16
  371. static_const_table:
  372. .long 0
  373. .long 4293918720
  374. .long 0
  375. .long 4293918720
  376. .long 4294967232
  377. .long 0
  378. .long 4294967232
  379. .long 0
  380. .long 65472
  381. .long 0
  382. .long 65472
  383. .long 0
  384. .long 0
  385. .long 1127743488
  386. .long 0
  387. .long 1127743488
  388. .long 1697350398
  389. .long 1079448903
  390. .long 1697350398
  391. .long 1079448903
  392. .long 4277796864
  393. .long 1065758274
  394. .long 4277796864
  395. .long 1065758274
  396. .long 3164486458
  397. .long 1025308570
  398. .long 3164486458
  399. .long 1025308570
  400. .long 1963358694
  401. .long 1065423121
  402. .long 1431655765
  403. .long 1069897045
  404. .long 1431655765
  405. .long 1067799893
  406. .long 0
  407. .long 1071644672
  408. .long 381774871
  409. .long 1062650220
  410. .long 381774871
  411. .long 1062650220
  412. .long 0
  413. .long 0
  414. .long 0
  415. .long 0
  416. .long 1000070955
  417. .long 1042145304
  418. .long 1040187392
  419. .long 11418
  420. .long 988267849
  421. .long 1039500660
  422. .long 3539992576
  423. .long 22960
  424. .long 36755401
  425. .long 1042114290
  426. .long 402653184
  427. .long 34629
  428. .long 3634769483
  429. .long 1042178627
  430. .long 1820327936
  431. .long 46424
  432. .long 2155991225
  433. .long 1041560680
  434. .long 847249408
  435. .long 58348
  436. .long 2766913307
  437. .long 1039293264
  438. .long 3489660928
  439. .long 70401
  440. .long 3651174602
  441. .long 1040488175
  442. .long 2927624192
  443. .long 82586
  444. .long 3073892131
  445. .long 1042240606
  446. .long 1006632960
  447. .long 94904
  448. .long 1328391742
  449. .long 1042019037
  450. .long 3942645760
  451. .long 107355
  452. .long 2650893825
  453. .long 1041903210
  454. .long 822083584
  455. .long 119943
  456. .long 2397289153
  457. .long 1041802037
  458. .long 2281701376
  459. .long 132667
  460. .long 430997175
  461. .long 1042110606
  462. .long 1845493760
  463. .long 145530
  464. .long 1230936525
  465. .long 1041801015
  466. .long 1702887424
  467. .long 158533
  468. .long 740675935
  469. .long 1040178913
  470. .long 4110417920
  471. .long 171677
  472. .long 3489810261
  473. .long 1041825986
  474. .long 2793406464
  475. .long 184965
  476. .long 2532600530
  477. .long 1040767882
  478. .long 167772160
  479. .long 198398
  480. .long 3542557060
  481. .long 1041827263
  482. .long 2986344448
  483. .long 211976
  484. .long 1401563777
  485. .long 1041061093
  486. .long 922746880
  487. .long 225703
  488. .long 3129406026
  489. .long 1041852413
  490. .long 880803840
  491. .long 239579
  492. .long 900993572
  493. .long 1039283234
  494. .long 1275068416
  495. .long 253606
  496. .long 2115029358
  497. .long 1042140042
  498. .long 562036736
  499. .long 267786
  500. .long 1086643152
  501. .long 1041785419
  502. .long 1610612736
  503. .long 282120
  504. .long 82864366
  505. .long 1041256244
  506. .long 3045064704
  507. .long 296610
  508. .long 2392968152
  509. .long 1040913683
  510. .long 3573547008
  511. .long 311258
  512. .long 2905856183
  513. .long 1040002214
  514. .long 1988100096
  515. .long 326066
  516. .long 3742008261
  517. .long 1040011137
  518. .long 1451229184
  519. .long 341035
  520. .long 863393794
  521. .long 1040880621
  522. .long 914358272
  523. .long 356167
  524. .long 1446136837
  525. .long 1041372426
  526. .long 3707764736
  527. .long 371463
  528. .long 927855201
  529. .long 1040617636
  530. .long 360710144
  531. .long 386927
  532. .long 1492679939
  533. .long 1041050306
  534. .long 2952790016
  535. .long 402558
  536. .long 608827001
  537. .long 1041582217
  538. .long 2181038080
  539. .long 418360
  540. .long 606260204
  541. .long 1042271987
  542. .long 1711276032
  543. .long 434334
  544. .long 3163044019
  545. .long 1041843851
  546. .long 1006632960
  547. .long 450482
  548. .long 4148747325
  549. .long 1041962972
  550. .long 3900702720
  551. .long 466805
  552. .long 802924201
  553. .long 1041275378
  554. .long 1442840576
  555. .long 483307
  556. .long 3052749833
  557. .long 1041940577
  558. .long 1937768448
  559. .long 499988
  560. .long 2216116399
  561. .long 1041486744
  562. .long 914358272
  563. .long 516851
  564. .long 2729697836
  565. .long 1041445764
  566. .long 2566914048
  567. .long 533897
  568. .long 540608356
  569. .long 1041310907
  570. .long 2600468480
  571. .long 551129
  572. .long 2916344493
  573. .long 1040535661
  574. .long 1107296256
  575. .long 568549
  576. .long 731391814
  577. .long 1039497014
  578. .long 2566914048
  579. .long 586158
  580. .long 1024722704
  581. .long 1041461625
  582. .long 2961178624
  583. .long 603959
  584. .long 3806831748
  585. .long 1041732499
  586. .long 2675965952
  587. .long 621954
  588. .long 238953304
  589. .long 1040316488
  590. .long 2189426688
  591. .long 640145
  592. .long 749123235
  593. .long 1041725785
  594. .long 2063597568
  595. .long 658534
  596. .long 1168187977
  597. .long 1041175214
  598. .long 2986344448
  599. .long 677123
  600. .long 3506096399
  601. .long 1042186095
  602. .long 1426063360
  603. .long 695915
  604. .long 1470221620
  605. .long 1041675499
  606. .long 2566914048
  607. .long 714911
  608. .long 3182425146
  609. .long 1041483134
  610. .long 3087007744
  611. .long 734114
  612. .long 3131698208
  613. .long 1042208657
  614. .long 4068474880
  615. .long 753526
  616. .long 2300504125
  617. .long 1041428596
  618. .long 2415919104
  619. .long 773150
  620. .long 2290297931
  621. .long 1037388400
  622. .long 3716153344
  623. .long 792987
  624. .long 3532148223
  625. .long 1041626194
  626. .long 771751936
  627. .long 813041
  628. .long 1161884404
  629. .long 1042015258
  630. .long 3699376128
  631. .long 833312
  632. .long 876383176
  633. .long 1037968878
  634. .long 1241513984
  635. .long 853805
  636. .long 3379986796
  637. .long 1042213153
  638. .long 3699376128
  639. .long 874520
  640. .long 1545797737
  641. .long 1041681569
  642. .long 58720256
  643. .long 895462
  644. .long 2925146801
  645. .long 1042212567
  646. .long 855638016
  647. .long 916631
  648. .long 1316627971
  649. .long 1038516204
  650. .long 3883925504
  651. .long 938030
  652. .long 3267869137
  653. .long 1040337004
  654. .long 2726297600
  655. .long 959663
  656. .long 3720868999
  657. .long 1041782409
  658. .long 3992977408
  659. .long 981531
  660. .long 433316142
  661. .long 1041994064
  662. .long 1526726656
  663. .long 1003638
  664. .long 781232103
  665. .long 1040093400
  666. .long 2172649472
  667. .long 1025985
  668. .long 2773927732
  669. .long 1053236707
  670. .long 381774871
  671. .long 1062650220
  672. .long 379653899
  673. .long 1056571845
  674. .long 286331153
  675. .long 1065423121
  676. .long 436314138
  677. .long 1059717536
  678. .long 1431655765
  679. .long 1067799893
  680. .long 1431655765
  681. .long 1069897045
  682. .long 0
  683. .long 1071644672
  684. .long 0
  685. .long 1072693248
  686. .long 0
  687. .long 2146435072
  688. .long 0
  689. .long 0
  690. .long 4294967295
  691. .long 2146435071
  692. .long 0
  693. .long 1048576
  694. .long 4227858432
  695. .long 4294967295
  696. .type static_const_table,@object
  697. .size static_const_table,1296
  698. .data
  699. .section .note.GNU-stack, ""
  700. # End