scalbf.S 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "scalbf.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin scalbf
  41. .text
  42. .align 16,0x90
  43. .globl scalbf
  44. scalbf:
  45. # parameter 1: %xmm0
  46. # parameter 2: %xmm1
  47. ..B1.1:
  48. .cfi_startproc
  49. ..___tag_value_scalbf.1:
  50. ..L2:
  51. movss %xmm0, -16(%rsp)
  52. movzwl -14(%rsp), %esi
  53. movl %esi, %edx
  54. movss %xmm1, -24(%rsp)
  55. andl $32640, %edx
  56. movzwl -22(%rsp), %r8d
  57. andl $32640, %r8d
  58. shrl $7, %edx
  59. shrl $7, %r8d
  60. cmpl $255, %edx
  61. je ..B1.48
  62. ..B1.2:
  63. cmpl $255, %r8d
  64. je ..B1.31
  65. ..B1.3:
  66. cmpl $127, %r8d
  67. jl ..B1.16
  68. ..B1.4:
  69. cmpl $150, %r8d
  70. jge ..B1.7
  71. ..B1.5:
  72. movd %xmm1, %edi
  73. movl %r8d, %ecx
  74. andl $8388607, %edi
  75. negl %ecx
  76. movl %edi, %eax
  77. addl $22, %ecx
  78. orl $8388608, %eax
  79. addl $10, %r8d
  80. shrl %cl, %eax
  81. movl %r8d, %ecx
  82. shll %cl, %edi
  83. testl %edi, %edi
  84. je ..B1.8
  85. ..B1.6:
  86. pxor %xmm0, %xmm0
  87. mulss .L_2il0floatpacket.0(%rip), %xmm0
  88. ret
  89. ..B1.7:
  90. movl $65536, %eax
  91. ..B1.8:
  92. testl %edx, %edx
  93. jne ..B1.11
  94. ..B1.9:
  95. testl $8388607, -16(%rsp)
  96. je ..B1.15
  97. ..B1.10:
  98. movss -16(%rsp), %xmm0
  99. mulss .L_2il0floatpacket.1(%rip), %xmm0
  100. movss %xmm0, -16(%rsp)
  101. movzwl -14(%rsp), %esi
  102. movl %esi, %edx
  103. andl $32640, %edx
  104. shrl $7, %edx
  105. addl $-25, %edx
  106. ..B1.11:
  107. movl $65536, %ecx
  108. cmpl $65536, %eax
  109. movzbl -21(%rsp), %edi
  110. cmovg %ecx, %eax
  111. movl %eax, %r8d
  112. negl %r8d
  113. testl $128, %edi
  114. cmovne %r8d, %eax
  115. lea (%rdx,%rax), %ecx
  116. testl %ecx, %ecx
  117. jle ..B1.27
  118. ..B1.12:
  119. cmpl $255, %ecx
  120. jge ..B1.26
  121. ..B1.13:
  122. movzbl %cl, %ecx
  123. andl $-32641, %esi
  124. shll $7, %ecx
  125. orl %ecx, %esi
  126. movw %si, -14(%rsp)
  127. movss -16(%rsp), %xmm0
  128. ..B1.14:
  129. ret
  130. ..B1.15:
  131. movss -16(%rsp), %xmm0
  132. ret
  133. ..B1.16:
  134. testl %r8d, %r8d
  135. jne ..B1.19
  136. ..B1.17:
  137. testl $8388607, -24(%rsp)
  138. je ..B1.15
  139. ..B1.19:
  140. cmpl $255, %edx
  141. jne ..B1.6
  142. ..B1.20:
  143. testl %r8d, %r8d
  144. jle ..B1.6
  145. ..B1.21:
  146. cmpl $1, %r8d
  147. jne ..B1.23
  148. ..B1.22:
  149. testl $8388607, -24(%rsp)
  150. jle ..B1.6
  151. ..B1.23:
  152. movb -13(%rsp), %al
  153. lea _infs(%rip), %rdx
  154. andb $-128, %al
  155. shrb $7, %al
  156. movzbl %al, %ecx
  157. movss (%rdx,%rcx,4), %xmm0
  158. ret
  159. ..B1.26:
  160. movb -13(%rsp), %al
  161. lea _large_value_32(%rip), %rdx
  162. andb $-128, %al
  163. shrb $7, %al
  164. movzbl %al, %ecx
  165. movss (%rdx,%rcx,4), %xmm0
  166. mulss .L_2il0floatpacket.2(%rip), %xmm0
  167. movss %xmm0, -24(%rsp)
  168. ret
  169. ..B1.27:
  170. cmpl $-23, %ecx
  171. jge ..B1.29
  172. ..B1.28:
  173. movb -13(%rsp), %al
  174. lea _small_value_32(%rip), %rdx
  175. andb $-128, %al
  176. shrb $7, %al
  177. movzbl %al, %ecx
  178. movss (%rdx,%rcx,4), %xmm0
  179. mulss .L_2il0floatpacket.3(%rip), %xmm0
  180. jmp ..B1.30
  181. ..B1.29:
  182. movl -16(%rsp), %ecx
  183. lea 25(%rdx,%rax), %eax
  184. movl %ecx, -24(%rsp)
  185. shrl $16, %ecx
  186. movzbl %al, %eax
  187. andl $-32641, %ecx
  188. shll $7, %eax
  189. orl %eax, %ecx
  190. movw %cx, -22(%rsp)
  191. movss -24(%rsp), %xmm0
  192. mulss .L_2il0floatpacket.4(%rip), %xmm0
  193. ..B1.30:
  194. movss %xmm0, -24(%rsp)
  195. ret
  196. ..B1.31:
  197. testl $8388607, -24(%rsp)
  198. jne ..B1.43
  199. ..B1.32:
  200. testl %edx, %edx
  201. jne ..B1.38
  202. ..B1.33:
  203. testl $8388607, -16(%rsp)
  204. jne ..B1.39
  205. ..B1.34:
  206. testb $-128, -21(%rsp)
  207. je ..B1.36
  208. ..B1.35:
  209. movss -16(%rsp), %xmm0
  210. ret
  211. ..B1.36:
  212. pxor %xmm0, %xmm0
  213. mulss .L_2il0floatpacket.0(%rip), %xmm0
  214. ..B1.37:
  215. ret
  216. ..B1.38:
  217. cmpl $255, %edx
  218. je ..B1.44
  219. ..B1.39:
  220. movb -13(%rsp), %al
  221. andb $-128, %al
  222. shrb $7, %al
  223. movzbl %al, %edx
  224. testb $-128, -21(%rsp)
  225. je ..B1.41
  226. ..B1.40:
  227. lea _zeros(%rip), %rax
  228. movss (%rax,%rdx,4), %xmm0
  229. ret
  230. ..B1.41:
  231. lea _infs(%rip), %rax
  232. movss (%rax,%rdx,4), %xmm0
  233. ..B1.42:
  234. ret
  235. ..B1.43:
  236. movss -16(%rsp), %xmm0
  237. addss -24(%rsp), %xmm0
  238. ret
  239. ..B1.44:
  240. testb $-128, -21(%rsp)
  241. jne ..B1.46
  242. ..B1.45:
  243. movss -16(%rsp), %xmm0
  244. ret
  245. ..B1.46:
  246. pxor %xmm0, %xmm0
  247. mulss .L_2il0floatpacket.0(%rip), %xmm0
  248. ..B1.47:
  249. ret
  250. ..B1.48:
  251. testl $8388607, -16(%rsp)
  252. jne ..B1.43
  253. ..B1.49:
  254. cmpl $255, %r8d
  255. je ..B1.31
  256. jmp ..B1.16
  257. .align 16,0x90
  258. .cfi_endproc
  259. .type scalbf,@function
  260. .size scalbf,.-scalbf
  261. .data
  262. # -- End scalbf
  263. .section .rodata, "a"
  264. .align 4
  265. .align 4
  266. .L_2il0floatpacket.0:
  267. .long 0x7f800000
  268. .type .L_2il0floatpacket.0,@object
  269. .size .L_2il0floatpacket.0,4
  270. .align 4
  271. .L_2il0floatpacket.1:
  272. .long 0x4c000000
  273. .type .L_2il0floatpacket.1,@object
  274. .size .L_2il0floatpacket.1,4
  275. .align 4
  276. .L_2il0floatpacket.2:
  277. .long 0x71800000
  278. .type .L_2il0floatpacket.2,@object
  279. .size .L_2il0floatpacket.2,4
  280. .align 4
  281. .L_2il0floatpacket.3:
  282. .long 0x0d800000
  283. .type .L_2il0floatpacket.3,@object
  284. .size .L_2il0floatpacket.3,4
  285. .align 4
  286. .L_2il0floatpacket.4:
  287. .long 0x33000000
  288. .type .L_2il0floatpacket.4,@object
  289. .size .L_2il0floatpacket.4,4
  290. .align 4
  291. _infs:
  292. .long 2139095040
  293. .long 4286578688
  294. .type _infs,@object
  295. .size _infs,8
  296. .align 4
  297. _large_value_32:
  298. .long 1904214016
  299. .long 4051697664
  300. .type _large_value_32,@object
  301. .size _large_value_32,8
  302. .align 4
  303. _small_value_32:
  304. .long 226492416
  305. .long 2373976064
  306. .type _small_value_32,@object
  307. .size _small_value_32,8
  308. .align 4
  309. _zeros:
  310. .long 0
  311. .long 2147483648
  312. .type _zeros,@object
  313. .size _zeros,8
  314. .data
  315. .section .note.GNU-stack, ""
  316. // -- Begin DWARF2 SEGMENT .eh_frame
  317. .section .eh_frame,"a",@progbits
  318. .eh_frame_seg:
  319. .align 1
  320. # End