exp10f_gen.S 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "exp10f_gen.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin exp10f
  41. .text
  42. .align 16,0x90
  43. .globl exp10f
  44. exp10f:
  45. # parameter 1: %xmm0
  46. ..B1.1:
  47. .cfi_startproc
  48. ..___tag_value_exp10f.1:
  49. ..L2:
  50. movd %xmm0, %edx
  51. movss %xmm0, -8(%rsp)
  52. movl %edx, %esi
  53. movl %edx, %ecx
  54. andl $2147483647, %esi
  55. shrl $31, %ecx
  56. cmpl $8388608, %esi
  57. jb ..B1.17
  58. ..B1.2:
  59. movl %ecx, %edi
  60. lea range(%rip), %rax
  61. cmpl (%rax,%rdi,4), %esi
  62. ja ..B1.8
  63. ..B1.3:
  64. movss .L_2il0floatpacket.0(%rip), %xmm1
  65. cmpl $1065353216, %edx
  66. jl ..B1.7
  67. ..B1.4:
  68. movl %esi, %ecx
  69. shrl $23, %ecx
  70. addl $10, %ecx
  71. shll %cl, %esi
  72. testl %esi, %esi
  73. jne ..B1.7
  74. ..B1.5:
  75. addss %xmm1, %xmm0
  76. movss %xmm0, -16(%rsp)
  77. movl -16(%rsp), %eax
  78. shll $10, %eax
  79. sarl $10, %eax
  80. cmpl $10, %eax
  81. jg ..B1.7
  82. ..B1.6:
  83. movslq %eax, %rax
  84. lea exact_values(%rip), %rdx
  85. movss (%rdx,%rax,4), %xmm0
  86. ret
  87. ..B1.7:
  88. pxor %xmm3, %xmm3
  89. movq $0x3ff0000000000000, %rcx
  90. cvtss2sd -8(%rsp), %xmm3
  91. movaps %xmm3, %xmm0
  92. mulsd .L_2il0floatpacket.1(%rip), %xmm0
  93. movsd .L_2il0floatpacket.6(%rip), %xmm4
  94. movq __libm_expf_table_64@GOTPCREL(%rip), %rsi
  95. addsd .L_2il0floatpacket.2(%rip), %xmm0
  96. cvtsd2ss %xmm0, %xmm0
  97. movss %xmm0, -16(%rsp)
  98. movss -16(%rsp), %xmm2
  99. movsd .L_2il0floatpacket.4(%rip), %xmm0
  100. subss %xmm1, %xmm2
  101. cvtss2sd %xmm2, %xmm2
  102. mulsd .L_2il0floatpacket.3(%rip), %xmm2
  103. movl -16(%rsp), %edx
  104. addsd %xmm2, %xmm3
  105. movaps %xmm3, %xmm1
  106. andl $4194303, %edx
  107. mulsd %xmm3, %xmm1
  108. movl %edx, %eax
  109. mulsd %xmm1, %xmm0
  110. mulsd %xmm1, %xmm4
  111. addsd .L_2il0floatpacket.5(%rip), %xmm0
  112. addsd .L_2il0floatpacket.7(%rip), %xmm4
  113. mulsd %xmm1, %xmm0
  114. mulsd %xmm3, %xmm4
  115. shll $26, %eax
  116. addsd %xmm4, %xmm0
  117. sarl $26, %eax
  118. movslq %eax, %rax
  119. subl %eax, %edx
  120. shrl $6, %edx
  121. movsd 256(%rsi,%rax,8), %xmm5
  122. mulsd %xmm5, %xmm0
  123. shlq $52, %rdx
  124. addsd %xmm5, %xmm0
  125. addq %rcx, %rdx
  126. movq %rdx, -24(%rsp)
  127. mulsd -24(%rsp), %xmm0
  128. cvtsd2ss %xmm0, %xmm0
  129. ret
  130. ..B1.8:
  131. cmpl $2139095040, %esi
  132. jae ..B1.13
  133. ..B1.9:
  134. testl %ecx, %ecx
  135. je ..B1.11
  136. ..B1.10:
  137. movss .L_2il0floatpacket.8(%rip), %xmm0
  138. mulss %xmm0, %xmm0
  139. ret
  140. ..B1.11:
  141. movss .L_2il0floatpacket.9(%rip), %xmm0
  142. mulss %xmm0, %xmm0
  143. ..B1.12:
  144. ret
  145. ..B1.13:
  146. jne ..B1.15
  147. ..B1.14:
  148. lea _inf_zero(%rip), %rax
  149. movss (%rax,%rdi,4), %xmm0
  150. ret
  151. ..B1.15:
  152. movss -8(%rsp), %xmm0
  153. ..B1.16:
  154. ret
  155. ..B1.17:
  156. movss .L_2il0floatpacket.10(%rip), %xmm0
  157. addss -8(%rsp), %xmm0
  158. ret
  159. .align 16,0x90
  160. .cfi_endproc
  161. .type exp10f,@function
  162. .size exp10f,.-exp10f
  163. .data
  164. # -- End exp10f
  165. .section .rodata, "a"
  166. .align 8
  167. .align 8
  168. .L_2il0floatpacket.1:
  169. .long 0x0979a371,0x406a934f
  170. .type .L_2il0floatpacket.1,@object
  171. .size .L_2il0floatpacket.1,8
  172. .align 8
  173. .L_2il0floatpacket.2:
  174. .long 0x00000000,0x41680000
  175. .type .L_2il0floatpacket.2,@object
  176. .size .L_2il0floatpacket.2,8
  177. .align 8
  178. .L_2il0floatpacket.3:
  179. .long 0x509f79ff,0xbf734413
  180. .type .L_2il0floatpacket.3,@object
  181. .size .L_2il0floatpacket.3,8
  182. .align 8
  183. .L_2il0floatpacket.4:
  184. .long 0xe2724acf,0x3ff2d784
  185. .type .L_2il0floatpacket.4,@object
  186. .size .L_2il0floatpacket.4,8
  187. .align 8
  188. .L_2il0floatpacket.5:
  189. .long 0xcd26a2f3,0x40053524
  190. .type .L_2il0floatpacket.5,@object
  191. .size .L_2il0floatpacket.5,8
  192. .align 8
  193. .L_2il0floatpacket.6:
  194. .long 0xd3f4b8ec,0x400046f7
  195. .type .L_2il0floatpacket.6,@object
  196. .size .L_2il0floatpacket.6,8
  197. .align 8
  198. .L_2il0floatpacket.7:
  199. .long 0xbbb47127,0x40026bb1
  200. .type .L_2il0floatpacket.7,@object
  201. .size .L_2il0floatpacket.7,8
  202. .align 4
  203. .L_2il0floatpacket.0:
  204. .long 0x4b400000
  205. .type .L_2il0floatpacket.0,@object
  206. .size .L_2il0floatpacket.0,4
  207. .align 4
  208. .L_2il0floatpacket.8:
  209. .long 0x0d800000
  210. .type .L_2il0floatpacket.8,@object
  211. .size .L_2il0floatpacket.8,4
  212. .align 4
  213. .L_2il0floatpacket.9:
  214. .long 0x71800000
  215. .type .L_2il0floatpacket.9,@object
  216. .size .L_2il0floatpacket.9,4
  217. .align 4
  218. .L_2il0floatpacket.10:
  219. .long 0x3f800000
  220. .type .L_2il0floatpacket.10,@object
  221. .size .L_2il0floatpacket.10,4
  222. .align 4
  223. range:
  224. .long 1109008539
  225. .long 1110823542
  226. .type range,@object
  227. .size range,8
  228. .align 4
  229. exact_values:
  230. .long 0x00000000
  231. .long 0x41200000
  232. .long 0x42c80000
  233. .long 0x447a0000
  234. .long 0x461c4000
  235. .long 0x47c35000
  236. .long 0x49742400
  237. .long 0x4b189680
  238. .long 0x4cbebc20
  239. .long 0x4e6e6b28
  240. .long 0x501502f9
  241. .type exact_values,@object
  242. .size exact_values,44
  243. .align 4
  244. _inf_zero:
  245. .long 2139095040
  246. .long 0
  247. .type _inf_zero,@object
  248. .size _inf_zero,8
  249. .data
  250. .section .note.GNU-stack, ""
  251. // -- Begin DWARF2 SEGMENT .eh_frame
  252. .section .eh_frame,"a",@progbits
  253. .eh_frame_seg:
  254. .align 1
  255. # End
  256. .globl pow10f
  257. .equ pow10f, exp10f