scalbl.S 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "scalbl.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin scalbl
  41. .text
  42. .align 16,0x90
  43. .globl scalbl
  44. scalbl:
  45. # parameter 1: 48 + %rsp
  46. # parameter 2: 64 + %rsp
  47. ..B1.1:
  48. .cfi_startproc
  49. ..___tag_value_scalbl.1:
  50. ..L2:
  51. subq $40, %rsp
  52. .cfi_def_cfa_offset 48
  53. xorb %dl, %dl
  54. ..B1.2:
  55. fnstcw 34(%rsp)
  56. ..B1.3:
  57. movzwl 56(%rsp), %eax
  58. andl $32767, %eax
  59. movzwl 72(%rsp), %ecx
  60. andl $32767, %ecx
  61. cmpl $32767, %eax
  62. je ..B1.100
  63. ..B1.4:
  64. cmpl $32767, %ecx
  65. je ..B1.69
  66. ..B1.5:
  67. cmpl $16383, %ecx
  68. jl ..B1.32
  69. ..B1.6:
  70. movzwl 34(%rsp), %edi
  71. movl %edi, %esi
  72. andl $768, %esi
  73. cmpl $768, %esi
  74. je ..B1.10
  75. ..B1.7:
  76. orl $-64768, %edi
  77. movw %di, 32(%rsp)
  78. ..B1.8:
  79. fldcw 32(%rsp)
  80. ..B1.9:
  81. movb $1, %dl
  82. ..B1.10:
  83. cmpl $16414, %ecx
  84. jge ..B1.15
  85. ..B1.11:
  86. fldt 64(%rsp)
  87. lea _TWO_63H(%rip), %rcx
  88. fldl (%rcx)
  89. fadd %st(1), %st
  90. fstpt 16(%rsp)
  91. fildl 16(%rsp)
  92. movl 16(%rsp), %edi
  93. fucomip %st(1), %st
  94. fstp %st(0)
  95. jp ..B1.12
  96. je ..B1.20
  97. ..B1.12:
  98. lea _infs(%rip), %rax
  99. lea _zeros(%rip), %rcx
  100. testb %dl, %dl
  101. movsd (%rax), %xmm0
  102. mulsd (%rcx), %xmm0
  103. movsd %xmm0, 8(%rsp)
  104. fldl 8(%rsp)
  105. fstpt 16(%rsp)
  106. je ..B1.14
  107. ..B1.13:
  108. fldcw 34(%rsp)
  109. ..B1.14:
  110. fldt 16(%rsp)
  111. addq $40, %rsp
  112. .cfi_def_cfa_offset 8
  113. ret
  114. .cfi_def_cfa_offset 48
  115. ..B1.15:
  116. movb 73(%rsp), %sil
  117. lea large_integers(%rip), %rdi
  118. andb $-128, %sil
  119. shrb $7, %sil
  120. movzbl %sil, %r8d
  121. cmpl $16446, %ecx
  122. movl (%rdi,%r8,4), %edi
  123. jge ..B1.20
  124. ..B1.16:
  125. addl $2, %ecx
  126. movl 64(%rsp), %esi
  127. shll %cl, %esi
  128. testl %esi, %esi
  129. je ..B1.20
  130. ..B1.17:
  131. lea _infs(%rip), %rax
  132. lea _zeros(%rip), %rcx
  133. testb %dl, %dl
  134. movsd (%rax), %xmm0
  135. mulsd (%rcx), %xmm0
  136. movsd %xmm0, 8(%rsp)
  137. fldl 8(%rsp)
  138. fstpt 16(%rsp)
  139. je ..B1.19
  140. ..B1.18:
  141. fldcw 34(%rsp)
  142. ..B1.19:
  143. fldt 16(%rsp)
  144. addq $40, %rsp
  145. .cfi_def_cfa_offset 8
  146. ret
  147. .cfi_def_cfa_offset 48
  148. ..B1.20:
  149. testl %eax, %eax
  150. jne ..B1.26
  151. ..B1.21:
  152. cmpq $0, 48(%rsp)
  153. jne ..B1.25
  154. ..B1.22:
  155. testb %dl, %dl
  156. je ..B1.24
  157. ..B1.23:
  158. fldcw 34(%rsp)
  159. ..B1.24:
  160. fldt 48(%rsp)
  161. addq $40, %rsp
  162. .cfi_def_cfa_offset 8
  163. ret
  164. .cfi_def_cfa_offset 48
  165. ..B1.25:
  166. fldt 48(%rsp)
  167. lea _TWO_75(%rip), %rax
  168. fmull (%rax)
  169. fstpt 48(%rsp)
  170. movzwl 56(%rsp), %eax
  171. andl $32767, %eax
  172. addl $-75, %eax
  173. ..B1.26:
  174. movl $65536, %ecx
  175. cmpl $65536, %edi
  176. cmovg %ecx, %edi
  177. movl $-65536, %esi
  178. cmpl $-65536, %edi
  179. cmovge %edi, %esi
  180. lea (%rax,%rsi), %ecx
  181. testl %ecx, %ecx
  182. jle ..B1.49
  183. ..B1.27:
  184. cmpl $32767, %ecx
  185. jge ..B1.48
  186. ..B1.28:
  187. movzwl 56(%rsp), %eax
  188. andl $32767, %ecx
  189. andl $-32768, %eax
  190. orl %ecx, %eax
  191. movw %ax, 56(%rsp)
  192. fldt 48(%rsp)
  193. ..B1.29:
  194. fstpt 16(%rsp)
  195. testb %dl, %dl
  196. je ..B1.31
  197. ..B1.30:
  198. fldcw 34(%rsp)
  199. ..B1.31:
  200. fldt 16(%rsp)
  201. addq $40, %rsp
  202. .cfi_def_cfa_offset 8
  203. ret
  204. .cfi_def_cfa_offset 48
  205. ..B1.32:
  206. testl %ecx, %ecx
  207. jne ..B1.40
  208. ..B1.33:
  209. cmpq $0, 64(%rsp)
  210. jne ..B1.40
  211. ..B1.34:
  212. movzwl 34(%rsp), %edx
  213. movl %edx, %eax
  214. andl $768, %eax
  215. cmpl $768, %eax
  216. je ..B1.55
  217. ..B1.35:
  218. orl $-64768, %edx
  219. movw %dx, 32(%rsp)
  220. ..B1.36:
  221. fldcw 32(%rsp)
  222. ..B1.37:
  223. fldt 48(%rsp)
  224. lea _ones(%rip), %rax
  225. fmull (%rax)
  226. fstpt 16(%rsp)
  227. ..B1.38:
  228. fldcw 34(%rsp)
  229. ..B1.39:
  230. fldt 16(%rsp)
  231. addq $40, %rsp
  232. .cfi_def_cfa_offset 8
  233. ret
  234. .cfi_def_cfa_offset 48
  235. ..B1.40:
  236. cmpl $32767, %eax
  237. je ..B1.56
  238. ..B1.41:
  239. movzwl 34(%rsp), %ecx
  240. movl %ecx, %eax
  241. andl $768, %eax
  242. cmpl $768, %eax
  243. je ..B1.45
  244. ..B1.42:
  245. orl $-64768, %ecx
  246. movw %cx, 32(%rsp)
  247. ..B1.43:
  248. fldcw 32(%rsp)
  249. ..B1.44:
  250. movb $1, %dl
  251. ..B1.45:
  252. lea _infs(%rip), %rax
  253. lea _zeros(%rip), %rcx
  254. testb %dl, %dl
  255. movsd (%rax), %xmm0
  256. mulsd (%rcx), %xmm0
  257. movsd %xmm0, 8(%rsp)
  258. fldl 8(%rsp)
  259. fstpt 16(%rsp)
  260. je ..B1.47
  261. ..B1.46:
  262. fldcw 34(%rsp)
  263. ..B1.47:
  264. fldt 16(%rsp)
  265. addq $40, %rsp
  266. .cfi_def_cfa_offset 8
  267. ret
  268. .cfi_def_cfa_offset 48
  269. ..B1.48:
  270. movb 57(%rsp), %al
  271. lea _large_value_80(%rip), %rsi
  272. andb $-128, %al
  273. shrb $7, %al
  274. fldt (%rsi)
  275. movzbl %al, %ecx
  276. shlq $4, %rcx
  277. fldt (%rsi,%rcx)
  278. fmulp %st, %st(1)
  279. jmp ..B1.29
  280. ..B1.49:
  281. cmpl $-63, %ecx
  282. jge ..B1.51
  283. ..B1.50:
  284. movb 57(%rsp), %al
  285. lea _small_value_80(%rip), %rsi
  286. andb $-128, %al
  287. shrb $7, %al
  288. fldt (%rsi)
  289. movzbl %al, %ecx
  290. shlq $4, %rcx
  291. fldt (%rsi,%rcx)
  292. fmulp %st, %st(1)
  293. jmp ..B1.52
  294. ..B1.51:
  295. fldt 48(%rsp)
  296. lea 75(%rax,%rsi), %eax
  297. movzwl 56(%rsp), %ecx
  298. andl $32767, %eax
  299. lea 8+_TWO_75(%rip), %rsi
  300. andl $-32768, %ecx
  301. fstpt 16(%rsp)
  302. orl %eax, %ecx
  303. movw %cx, 24(%rsp)
  304. fldt 16(%rsp)
  305. fmull (%rsi)
  306. ..B1.52:
  307. fstpt 16(%rsp)
  308. testb %dl, %dl
  309. je ..B1.54
  310. ..B1.53:
  311. fldcw 34(%rsp)
  312. ..B1.54:
  313. fldt 16(%rsp)
  314. addq $40, %rsp
  315. .cfi_def_cfa_offset 8
  316. ret
  317. .cfi_def_cfa_offset 48
  318. ..B1.55:
  319. fldt 48(%rsp)
  320. lea _ones(%rip), %rax
  321. fmull (%rax)
  322. fstpt 16(%rsp)
  323. jmp ..B1.39
  324. ..B1.56:
  325. testl %ecx, %ecx
  326. jle ..B1.61
  327. ..B1.57:
  328. cmpl $1, %ecx
  329. jne ..B1.68
  330. ..B1.58:
  331. movl 68(%rsp), %eax
  332. cmpl $-2147483648, %eax
  333. jb ..B1.61
  334. ..B1.59:
  335. jne ..B1.68
  336. ..B1.60:
  337. cmpl $0, 64(%rsp)
  338. ja ..B1.68
  339. ..B1.61:
  340. movzwl 34(%rsp), %ecx
  341. movl %ecx, %eax
  342. andl $768, %eax
  343. cmpl $768, %eax
  344. je ..B1.65
  345. ..B1.62:
  346. orl $-64768, %ecx
  347. movw %cx, 32(%rsp)
  348. ..B1.63:
  349. fldcw 32(%rsp)
  350. ..B1.64:
  351. movb $1, %dl
  352. ..B1.65:
  353. lea _infs(%rip), %rax
  354. lea _zeros(%rip), %rcx
  355. testb %dl, %dl
  356. movsd (%rax), %xmm0
  357. mulsd (%rcx), %xmm0
  358. movsd %xmm0, 8(%rsp)
  359. fldl 8(%rsp)
  360. fstpt 16(%rsp)
  361. je ..B1.67
  362. ..B1.66:
  363. fldcw 34(%rsp)
  364. ..B1.67:
  365. fldt 16(%rsp)
  366. addq $40, %rsp
  367. .cfi_def_cfa_offset 8
  368. ret
  369. .cfi_def_cfa_offset 48
  370. ..B1.68:
  371. movb 57(%rsp), %al
  372. lea _infs(%rip), %rdx
  373. andb $-128, %al
  374. shrb $7, %al
  375. movzbl %al, %ecx
  376. fldl (%rdx,%rcx,8)
  377. addq $40, %rsp
  378. .cfi_def_cfa_offset 8
  379. ret
  380. .cfi_def_cfa_offset 48
  381. ..B1.69:
  382. movq $0x8000000000000000, %rcx
  383. cmpq 64(%rsp), %rcx
  384. je ..B1.77
  385. ..B1.70:
  386. movzwl 34(%rsp), %edx
  387. movl %edx, %eax
  388. andl $768, %eax
  389. cmpl $768, %eax
  390. je ..B1.76
  391. ..B1.71:
  392. orl $-64768, %edx
  393. movw %dx, 32(%rsp)
  394. ..B1.72:
  395. fldcw 32(%rsp)
  396. ..B1.73:
  397. fldt 48(%rsp)
  398. fldt 64(%rsp)
  399. faddp %st, %st(1)
  400. fstpt 16(%rsp)
  401. ..B1.74:
  402. fldcw 34(%rsp)
  403. ..B1.75:
  404. fldt 16(%rsp)
  405. addq $40, %rsp
  406. .cfi_def_cfa_offset 8
  407. ret
  408. .cfi_def_cfa_offset 48
  409. ..B1.76:
  410. fldt 48(%rsp)
  411. fldt 64(%rsp)
  412. faddp %st, %st(1)
  413. fstpt 16(%rsp)
  414. jmp ..B1.75
  415. ..B1.77:
  416. movzwl 34(%rsp), %esi
  417. movl %esi, %ecx
  418. andl $768, %ecx
  419. cmpl $768, %ecx
  420. je ..B1.81
  421. ..B1.78:
  422. orl $-64768, %esi
  423. movw %si, 32(%rsp)
  424. ..B1.79:
  425. fldcw 32(%rsp)
  426. ..B1.80:
  427. movb $1, %dl
  428. ..B1.81:
  429. testl %eax, %eax
  430. jne ..B1.89
  431. ..B1.82:
  432. cmpl $0, 52(%rsp)
  433. jne ..B1.84
  434. ..B1.83:
  435. cmpl $0, 48(%rsp)
  436. je ..B1.85
  437. ..B1.84:
  438. lea _smallest_value_64(%rip), %rax
  439. movq (%rax), %rcx
  440. movq %rcx, (%rsp)
  441. ..B1.85:
  442. cmpq $0, 48(%rsp)
  443. jne ..B1.90
  444. ..B1.86:
  445. testb $-128, 73(%rsp)
  446. je ..B1.88
  447. ..B1.87:
  448. fldt 48(%rsp)
  449. fstpt 16(%rsp)
  450. jmp ..B1.94
  451. ..B1.88:
  452. lea _infs(%rip), %rax
  453. lea _zeros(%rip), %rcx
  454. movsd (%rax), %xmm0
  455. mulsd (%rcx), %xmm0
  456. movsd %xmm0, 8(%rsp)
  457. fldl 8(%rsp)
  458. fstpt 16(%rsp)
  459. jmp ..B1.94
  460. ..B1.89:
  461. cmpl $32767, %eax
  462. je ..B1.97
  463. ..B1.90:
  464. movb 57(%rsp), %al
  465. andb $-128, %al
  466. shrb $7, %al
  467. movzbl %al, %ecx
  468. testb $-128, 73(%rsp)
  469. je ..B1.92
  470. ..B1.91:
  471. lea _zeros(%rip), %rax
  472. movsd (%rax,%rcx,8), %xmm0
  473. jmp ..B1.93
  474. ..B1.92:
  475. lea _infs(%rip), %rax
  476. movsd (%rax,%rcx,8), %xmm0
  477. ..B1.93:
  478. movsd %xmm0, 8(%rsp)
  479. fldl 8(%rsp)
  480. fstpt 16(%rsp)
  481. ..B1.94:
  482. testb %dl, %dl
  483. je ..B1.96
  484. ..B1.95:
  485. fldcw 34(%rsp)
  486. ..B1.96:
  487. fldt 16(%rsp)
  488. addq $40, %rsp
  489. .cfi_def_cfa_offset 8
  490. ret
  491. .cfi_def_cfa_offset 48
  492. ..B1.97:
  493. testb $-128, 73(%rsp)
  494. jne ..B1.99
  495. ..B1.98:
  496. fldt 48(%rsp)
  497. fstpt 16(%rsp)
  498. jmp ..B1.94
  499. ..B1.99:
  500. lea _infs(%rip), %rax
  501. lea _zeros(%rip), %rcx
  502. movsd (%rax), %xmm0
  503. mulsd (%rcx), %xmm0
  504. movsd %xmm0, 8(%rsp)
  505. fldl 8(%rsp)
  506. fstpt 16(%rsp)
  507. jmp ..B1.94
  508. ..B1.100:
  509. movq $0x8000000000000000, %rsi
  510. cmpq 48(%rsp), %rsi
  511. jne ..B1.70
  512. ..B1.101:
  513. cmpl $32767, %ecx
  514. je ..B1.69
  515. jmp ..B1.32
  516. .align 16,0x90
  517. .cfi_endproc
  518. .type scalbl,@function
  519. .size scalbl,.-scalbl
  520. .data
  521. # -- End scalbl
  522. .section .rodata, "a"
  523. .align 4
  524. .align 4
  525. _TWO_63H:
  526. .long 0
  527. .long 1139277824
  528. .type _TWO_63H,@object
  529. .size _TWO_63H,8
  530. .align 4
  531. _infs:
  532. .long 0
  533. .long 2146435072
  534. .long 0
  535. .long 4293918720
  536. .type _infs,@object
  537. .size _infs,16
  538. .align 4
  539. _zeros:
  540. .long 0
  541. .long 0
  542. .long 0
  543. .long 2147483648
  544. .type _zeros,@object
  545. .size _zeros,16
  546. .align 4
  547. large_integers:
  548. .long 65536
  549. .long -65536
  550. .type large_integers,@object
  551. .size large_integers,8
  552. .align 4
  553. _TWO_75:
  554. .long 0
  555. .long 1151336448
  556. .long 0
  557. .long 994050048
  558. .type _TWO_75,@object
  559. .size _TWO_75,16
  560. .align 4
  561. _ones:
  562. .long 0
  563. .long 1072693248
  564. .long 0
  565. .long 3220176896
  566. .type _ones,@object
  567. .size _ones,16
  568. .align 4
  569. _smallest_value_64:
  570. .long 1
  571. .long 0
  572. .long 1
  573. .long 2147483648
  574. .type _smallest_value_64,@object
  575. .size _smallest_value_64,16
  576. .align 2
  577. _large_value_80:
  578. .word 0
  579. .word 0
  580. .word 0
  581. .word 32768
  582. .word 26383
  583. .word 0
  584. .word 0
  585. .word 0
  586. .word 0
  587. .word 0
  588. .word 0
  589. .word 32768
  590. .word 59151
  591. .word 0
  592. .word 0
  593. .word 0
  594. .type _large_value_80,@object
  595. .size _large_value_80,32
  596. .align 2
  597. _small_value_80:
  598. .word 0
  599. .word 0
  600. .word 0
  601. .word 32768
  602. .word 6383
  603. .word 0
  604. .word 0
  605. .word 0
  606. .word 0
  607. .word 0
  608. .word 0
  609. .word 32768
  610. .word 39151
  611. .word 0
  612. .word 0
  613. .word 0
  614. .type _small_value_80,@object
  615. .size _small_value_80,32
  616. .data
  617. .section .note.GNU-stack, ""
  618. // -- Begin DWARF2 SEGMENT .eh_frame
  619. .section .eh_frame,"a",@progbits
  620. .eh_frame_seg:
  621. .align 1
  622. # End