scalbl.S 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "scalbl.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin scalbl
  41. .text
  42. .align 16,0x90
  43. .globl scalbl
  44. scalbl:
  45. # parameter 1: 8 + %ebp
  46. # parameter 2: 20 + %ebp
  47. ..B1.1:
  48. ..L1:
  49. pushl %ebp
  50. movl %esp, %ebp
  51. andl $-16, %esp
  52. pushl %esi
  53. pushl %edi
  54. pushl %ebx
  55. subl $36, %esp
  56. ..B1.2:
  57. fnstcw 22(%esp)
  58. ..B1.3:
  59. movzwl 16(%ebp), %edi
  60. andl $32767, %edi
  61. movzwl 28(%ebp), %ecx
  62. andl $32767, %ecx
  63. call ..L2
  64. ..L2:
  65. popl %esi
  66. lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%esi), %esi
  67. cmpl $32767, %edi
  68. je ..B1.107
  69. ..B1.4:
  70. cmpl $32767, %ecx
  71. je ..B1.74
  72. ..B1.5:
  73. cmpl $16383, %ecx
  74. jl ..B1.33
  75. ..B1.6:
  76. movzwl 22(%esp), %edx
  77. movl %edx, %eax
  78. andl $768, %eax
  79. cmpl $768, %eax
  80. je ..B1.57
  81. ..B1.7:
  82. orl $-64768, %edx
  83. movw %dx, 20(%esp)
  84. ..B1.8:
  85. fldcw 20(%esp)
  86. ..B1.9:
  87. movl $1, %ebx
  88. ..B1.10:
  89. cmpl $16414, %ecx
  90. jge ..B1.15
  91. ..B1.11:
  92. fldt 20(%ebp)
  93. fldl _TWO_63H@GOTOFF(%esi)
  94. fadd %st(1), %st
  95. fstpt 8(%esp)
  96. fildl 8(%esp)
  97. movl 8(%esp), %edx
  98. fucompp
  99. fnstsw %ax
  100. sahf
  101. jp ..B1.12
  102. je ..B1.20
  103. ..B1.12:
  104. fldl _infs@GOTOFF(%esi)
  105. testl %ebx, %ebx
  106. fmull _zeros@GOTOFF(%esi)
  107. fstpt 8(%esp)
  108. je ..B1.14
  109. ..B1.13:
  110. fldcw 22(%esp)
  111. ..B1.14:
  112. fldt 8(%esp)
  113. addl $36, %esp
  114. popl %ebx
  115. popl %edi
  116. popl %esi
  117. movl %ebp, %esp
  118. popl %ebp
  119. ret
  120. ..B1.15:
  121. movzbl 29(%ebp), %eax
  122. andl $128, %eax
  123. shrl $7, %eax
  124. cmpl $16446, %ecx
  125. movl large_integers@GOTOFF(%esi,%eax,4), %edx
  126. jge ..B1.20
  127. ..B1.16:
  128. addl $2, %ecx
  129. movl 20(%ebp), %eax
  130. shll %cl, %eax
  131. testl %eax, %eax
  132. je ..B1.20
  133. ..B1.17:
  134. fldl _infs@GOTOFF(%esi)
  135. testl %ebx, %ebx
  136. fmull _zeros@GOTOFF(%esi)
  137. fstpt 8(%esp)
  138. je ..B1.19
  139. ..B1.18:
  140. fldcw 22(%esp)
  141. ..B1.19:
  142. fldt 8(%esp)
  143. addl $36, %esp
  144. popl %ebx
  145. popl %edi
  146. popl %esi
  147. movl %ebp, %esp
  148. popl %ebp
  149. ret
  150. ..B1.20:
  151. testl %edi, %edi
  152. jne ..B1.27
  153. ..B1.21:
  154. cmpl $0, 12(%ebp)
  155. jne ..B1.26
  156. ..B1.22:
  157. cmpl $0, 8(%ebp)
  158. jne ..B1.26
  159. ..B1.23:
  160. testl %ebx, %ebx
  161. je ..B1.25
  162. ..B1.24:
  163. fldcw 22(%esp)
  164. ..B1.25:
  165. fldt 8(%ebp)
  166. addl $36, %esp
  167. popl %ebx
  168. popl %edi
  169. popl %esi
  170. movl %ebp, %esp
  171. popl %ebp
  172. ret
  173. ..B1.26:
  174. fldt 8(%ebp)
  175. fmull _TWO_75@GOTOFF(%esi)
  176. fstpt 8(%ebp)
  177. movzwl 16(%ebp), %edi
  178. andl $32767, %edi
  179. addl $-75, %edi
  180. ..B1.27:
  181. cmpl $65536, %edx
  182. jle ..L3
  183. movl $65536, %edx
  184. ..L3:
  185. cmpl $-65536, %edx
  186. jge ..L4
  187. movl $-65536, %edx
  188. ..L4:
  189. lea (%edi,%edx), %ecx
  190. testl %ecx, %ecx
  191. jle ..B1.51
  192. ..B1.28:
  193. cmpl $32767, %ecx
  194. jge ..B1.50
  195. ..B1.29:
  196. movzwl 16(%ebp), %eax
  197. andl $32767, %ecx
  198. andl $-32768, %eax
  199. orl %ecx, %eax
  200. movw %ax, 16(%ebp)
  201. fldt 8(%ebp)
  202. ..B1.30:
  203. fstpt 8(%esp)
  204. testl %ebx, %ebx
  205. je ..B1.32
  206. ..B1.31:
  207. fldcw 22(%esp)
  208. ..B1.32:
  209. fldt 8(%esp)
  210. addl $36, %esp
  211. popl %ebx
  212. popl %edi
  213. popl %esi
  214. movl %ebp, %esp
  215. popl %ebp
  216. ret
  217. ..B1.33:
  218. testl %ecx, %ecx
  219. jne ..B1.42
  220. ..B1.34:
  221. cmpl $0, 24(%ebp)
  222. jne ..B1.42
  223. ..B1.35:
  224. cmpl $0, 20(%ebp)
  225. jne ..B1.42
  226. ..B1.36:
  227. movzwl 22(%esp), %edx
  228. movl %edx, %eax
  229. andl $768, %eax
  230. cmpl $768, %eax
  231. je ..B1.58
  232. ..B1.37:
  233. orl $-64768, %edx
  234. movw %dx, 20(%esp)
  235. ..B1.38:
  236. fldcw 20(%esp)
  237. ..B1.39:
  238. fldt 8(%ebp)
  239. fmull _ones@GOTOFF(%esi)
  240. fstpt 8(%esp)
  241. ..B1.40:
  242. fldcw 22(%esp)
  243. ..B1.41:
  244. fldt 8(%esp)
  245. addl $36, %esp
  246. popl %ebx
  247. popl %edi
  248. popl %esi
  249. movl %ebp, %esp
  250. popl %ebp
  251. ret
  252. ..B1.42:
  253. cmpl $32767, %edi
  254. je ..B1.60
  255. ..B1.43:
  256. movzwl 22(%esp), %edx
  257. movl %edx, %eax
  258. andl $768, %eax
  259. cmpl $768, %eax
  260. je ..B1.59
  261. ..B1.44:
  262. orl $-64768, %edx
  263. movw %dx, 20(%esp)
  264. ..B1.45:
  265. fldcw 20(%esp)
  266. ..B1.46:
  267. movl $1, %eax
  268. ..B1.47:
  269. fldl _infs@GOTOFF(%esi)
  270. testl %eax, %eax
  271. fmull _zeros@GOTOFF(%esi)
  272. fstpt 8(%esp)
  273. je ..B1.49
  274. ..B1.48:
  275. fldcw 22(%esp)
  276. ..B1.49:
  277. fldt 8(%esp)
  278. addl $36, %esp
  279. popl %ebx
  280. popl %edi
  281. popl %esi
  282. movl %ebp, %esp
  283. popl %ebp
  284. ret
  285. ..B1.50:
  286. fldt _large_value_80@GOTOFF(%esi)
  287. movzbl 17(%ebp), %edx
  288. andl $128, %edx
  289. shrl $7, %edx
  290. lea (,%edx,8), %eax
  291. lea (%eax,%edx,4), %ecx
  292. fldt _large_value_80@GOTOFF(%esi,%ecx)
  293. fmulp %st, %st(1)
  294. jmp ..B1.30
  295. ..B1.51:
  296. cmpl $-63, %ecx
  297. jge ..B1.53
  298. ..B1.52:
  299. fldt _small_value_80@GOTOFF(%esi)
  300. movzbl 17(%ebp), %edx
  301. andl $128, %edx
  302. shrl $7, %edx
  303. lea (,%edx,8), %eax
  304. lea (%eax,%edx,4), %ecx
  305. fldt _small_value_80@GOTOFF(%esi,%ecx)
  306. fmulp %st, %st(1)
  307. jmp ..B1.54
  308. ..B1.53:
  309. fldt 8(%ebp)
  310. lea 75(%edi,%edx), %edx
  311. movzwl 16(%ebp), %eax
  312. andl $32767, %edx
  313. andl $-32768, %eax
  314. fstpt 8(%esp)
  315. orl %edx, %eax
  316. movw %ax, 16(%esp)
  317. fldt 8(%esp)
  318. fmull 8+_TWO_75@GOTOFF(%esi)
  319. ..B1.54:
  320. fstpt 8(%esp)
  321. testl %ebx, %ebx
  322. je ..B1.56
  323. ..B1.55:
  324. fldcw 22(%esp)
  325. ..B1.56:
  326. fldt 8(%esp)
  327. addl $36, %esp
  328. popl %ebx
  329. popl %edi
  330. popl %esi
  331. movl %ebp, %esp
  332. popl %ebp
  333. ret
  334. ..B1.57:
  335. xorl %ebx, %ebx
  336. jmp ..B1.10
  337. ..B1.58:
  338. fldt 8(%ebp)
  339. fmull _ones@GOTOFF(%esi)
  340. fstpt 8(%esp)
  341. jmp ..B1.41
  342. ..B1.59:
  343. xorl %eax, %eax
  344. jmp ..B1.47
  345. ..B1.60:
  346. testl %ecx, %ecx
  347. jg ..B1.68
  348. ..B1.61:
  349. movzwl 22(%esp), %edx
  350. movl %edx, %eax
  351. andl $768, %eax
  352. cmpl $768, %eax
  353. je ..B1.70
  354. ..B1.62:
  355. orl $-64768, %edx
  356. movw %dx, 20(%esp)
  357. ..B1.63:
  358. fldcw 20(%esp)
  359. ..B1.64:
  360. movl $1, %eax
  361. ..B1.65:
  362. fldl _infs@GOTOFF(%esi)
  363. testl %eax, %eax
  364. fmull _zeros@GOTOFF(%esi)
  365. fstpt 8(%esp)
  366. je ..B1.67
  367. ..B1.66:
  368. fldcw 22(%esp)
  369. ..B1.67:
  370. fldt 8(%esp)
  371. addl $36, %esp
  372. popl %ebx
  373. popl %edi
  374. popl %esi
  375. movl %ebp, %esp
  376. popl %ebp
  377. ret
  378. ..B1.68:
  379. cmpl $1, %ecx
  380. je ..B1.71
  381. ..B1.69:
  382. movzbl 17(%ebp), %eax
  383. andl $128, %eax
  384. shrl $7, %eax
  385. fldl _infs@GOTOFF(%esi,%eax,8)
  386. addl $36, %esp
  387. popl %ebx
  388. popl %edi
  389. popl %esi
  390. movl %ebp, %esp
  391. popl %ebp
  392. ret
  393. ..B1.70:
  394. xorl %eax, %eax
  395. jmp ..B1.65
  396. ..B1.71:
  397. movl 24(%ebp), %eax
  398. cmpl $-2147483648, %eax
  399. jb ..B1.61
  400. ..B1.72:
  401. jne ..B1.69
  402. ..B1.73:
  403. cmpl $0, 20(%ebp)
  404. jbe ..B1.61
  405. jmp ..B1.69
  406. ..B1.74:
  407. cmpl $-2147483648, 24(%ebp)
  408. jne ..B1.76
  409. ..B1.75:
  410. cmpl $0, 20(%ebp)
  411. je ..B1.82
  412. ..B1.76:
  413. movzwl 22(%esp), %edx
  414. movl %edx, %eax
  415. andl $768, %eax
  416. cmpl $768, %eax
  417. je ..B1.102
  418. ..B1.77:
  419. orl $-64768, %edx
  420. movw %dx, 20(%esp)
  421. ..B1.78:
  422. fldcw 20(%esp)
  423. ..B1.79:
  424. fldt 8(%ebp)
  425. fldt 20(%ebp)
  426. faddp %st, %st(1)
  427. fstpt 8(%esp)
  428. ..B1.80:
  429. fldcw 22(%esp)
  430. ..B1.81:
  431. fldt 8(%esp)
  432. addl $36, %esp
  433. popl %ebx
  434. popl %edi
  435. popl %esi
  436. movl %ebp, %esp
  437. popl %ebp
  438. ret
  439. ..B1.82:
  440. movzwl 22(%esp), %edx
  441. movl %edx, %eax
  442. andl $768, %eax
  443. cmpl $768, %eax
  444. je ..B1.106
  445. ..B1.83:
  446. orl $-64768, %edx
  447. movw %dx, 20(%esp)
  448. ..B1.84:
  449. fldcw 20(%esp)
  450. ..B1.85:
  451. movl $1, %eax
  452. ..B1.86:
  453. testl %edi, %edi
  454. jne ..B1.94
  455. ..B1.87:
  456. cmpl $0, 12(%ebp)
  457. jne ..B1.89
  458. ..B1.88:
  459. cmpl $0, 8(%ebp)
  460. je ..B1.90
  461. ..B1.112:
  462. cmpl $0, 12(%ebp)
  463. ..B1.89:
  464. fldl _smallest_value_64@GOTOFF(%esi)
  465. fstpl (%esp)
  466. jne ..B1.95
  467. ..B1.113:
  468. cmpl $0, 8(%ebp)
  469. ..B1.90:
  470. jne ..B1.95
  471. ..B1.91:
  472. movzbl 29(%ebp), %edx
  473. testl $128, %edx
  474. je ..B1.93
  475. ..B1.92:
  476. fldt 8(%ebp)
  477. fstpt 8(%esp)
  478. jmp ..B1.99
  479. ..B1.93:
  480. fldl _infs@GOTOFF(%esi)
  481. fmull _zeros@GOTOFF(%esi)
  482. fstpt 8(%esp)
  483. jmp ..B1.99
  484. ..B1.94:
  485. cmpl $32767, %edi
  486. je ..B1.103
  487. ..B1.95:
  488. movzbl 17(%ebp), %ecx
  489. andl $128, %ecx
  490. movzbl 29(%ebp), %edx
  491. shrl $7, %ecx
  492. testl $128, %edx
  493. je ..B1.97
  494. ..B1.96:
  495. fldl _zeros@GOTOFF(%esi,%ecx,8)
  496. jmp ..B1.98
  497. ..B1.97:
  498. fldl _infs@GOTOFF(%esi,%ecx,8)
  499. ..B1.98:
  500. fstpt 8(%esp)
  501. ..B1.99:
  502. testl %eax, %eax
  503. je ..B1.101
  504. ..B1.100:
  505. fldcw 22(%esp)
  506. ..B1.101:
  507. fldt 8(%esp)
  508. addl $36, %esp
  509. popl %ebx
  510. popl %edi
  511. popl %esi
  512. movl %ebp, %esp
  513. popl %ebp
  514. ret
  515. ..B1.102:
  516. fldt 8(%ebp)
  517. fldt 20(%ebp)
  518. faddp %st, %st(1)
  519. fstpt 8(%esp)
  520. jmp ..B1.81
  521. ..B1.103:
  522. movzbl 29(%ebp), %edx
  523. testl $128, %edx
  524. jne ..B1.105
  525. ..B1.104:
  526. fldt 8(%ebp)
  527. fstpt 8(%esp)
  528. jmp ..B1.99
  529. ..B1.105:
  530. fldl _infs@GOTOFF(%esi)
  531. fmull _zeros@GOTOFF(%esi)
  532. fstpt 8(%esp)
  533. jmp ..B1.99
  534. ..B1.106:
  535. xorl %eax, %eax
  536. jmp ..B1.86
  537. ..B1.107:
  538. cmpl $-2147483648, 12(%ebp)
  539. jne ..B1.76
  540. ..B1.108:
  541. cmpl $0, 8(%ebp)
  542. jne ..B1.76
  543. ..B1.109:
  544. cmpl $32767, %ecx
  545. je ..B1.74
  546. jmp ..B1.33
  547. .align 16,0x90
  548. .type scalbl,@function
  549. .size scalbl,.-scalbl
  550. .data
  551. # -- End scalbl
  552. .section .rodata, "a"
  553. .align 4
  554. .align 4
  555. _TWO_63H:
  556. .long 0
  557. .long 1139277824
  558. .type _TWO_63H,@object
  559. .size _TWO_63H,8
  560. .align 4
  561. _infs:
  562. .long 0
  563. .long 2146435072
  564. .long 0
  565. .long 4293918720
  566. .type _infs,@object
  567. .size _infs,16
  568. .align 4
  569. _zeros:
  570. .long 0
  571. .long 0
  572. .long 0
  573. .long 2147483648
  574. .type _zeros,@object
  575. .size _zeros,16
  576. .align 4
  577. large_integers:
  578. .long 65536
  579. .long -65536
  580. .type large_integers,@object
  581. .size large_integers,8
  582. .align 4
  583. _TWO_75:
  584. .long 0
  585. .long 1151336448
  586. .long 0
  587. .long 994050048
  588. .type _TWO_75,@object
  589. .size _TWO_75,16
  590. .align 4
  591. _ones:
  592. .long 0
  593. .long 1072693248
  594. .long 0
  595. .long 3220176896
  596. .type _ones,@object
  597. .size _ones,16
  598. .align 4
  599. _smallest_value_64:
  600. .long 1
  601. .long 0
  602. .long 1
  603. .long 2147483648
  604. .type _smallest_value_64,@object
  605. .size _smallest_value_64,16
  606. .align 2
  607. _large_value_80:
  608. .word 0
  609. .word 0
  610. .word 0
  611. .word 32768
  612. .word 26383
  613. .word 0
  614. .word 0
  615. .word 0
  616. .word 0
  617. .word 32768
  618. .word 59151
  619. .word 0
  620. .type _large_value_80,@object
  621. .size _large_value_80,24
  622. .align 2
  623. _small_value_80:
  624. .word 0
  625. .word 0
  626. .word 0
  627. .word 32768
  628. .word 6383
  629. .word 0
  630. .word 0
  631. .word 0
  632. .word 0
  633. .word 32768
  634. .word 39151
  635. .word 0
  636. .type _small_value_80,@object
  637. .size _small_value_80,24
  638. .data
  639. .section .note.GNU-stack, ""
  640. # End