exp2f_gen.S 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "exp2f_gen.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin exp2f
  41. .text
  42. .align 16,0x90
  43. .globl exp2f
  44. exp2f:
  45. # parameter 1: %xmm0
  46. ..B1.1:
  47. .cfi_startproc
  48. ..___tag_value_exp2f.1:
  49. ..L2:
  50. movaps %xmm0, %xmm1
  51. movd %xmm1, %esi
  52. movss %xmm1, -8(%rsp)
  53. andl $2147483647, %esi
  54. lea -1065353216(%rsi), %eax
  55. cmpl $58589184, %eax
  56. ja ..B1.7
  57. ..B1.2:
  58. movl %esi, %ecx
  59. movl %esi, %eax
  60. shrl $23, %ecx
  61. addl $10, %ecx
  62. shll %cl, %eax
  63. testl %eax, %eax
  64. jne ..B1.7
  65. ..B1.3:
  66. movss .L_2il0floatpacket.0(%rip), %xmm0
  67. addss %xmm0, %xmm1
  68. movss %xmm1, -12(%rsp)
  69. movl -12(%rsp), %ecx
  70. shll $10, %ecx
  71. sarl $10, %ecx
  72. cmpl $-149, %ecx
  73. jl ..B1.7
  74. ..B1.4:
  75. movl %ecx, %eax
  76. addl $127, %eax
  77. jle ..B1.23
  78. ..B1.5:
  79. shll $23, %eax
  80. movl %eax, -16(%rsp)
  81. ..B1.6:
  82. movss -16(%rsp), %xmm0
  83. ret
  84. ..B1.7:
  85. cmpl $1123680256, %esi
  86. jae ..B1.12
  87. ..B1.8:
  88. cmpl $830472192, %esi
  89. jae ..B1.10
  90. ..B1.9:
  91. movss .L_2il0floatpacket.10(%rip), %xmm0
  92. addss -8(%rsp), %xmm0
  93. movss %xmm0, -12(%rsp)
  94. ret
  95. ..B1.10:
  96. pxor %xmm3, %xmm3
  97. pxor %xmm6, %xmm6
  98. cvtss2sd -8(%rsp), %xmm3
  99. movaps %xmm3, %xmm0
  100. mulsd .L_2il0floatpacket.1(%rip), %xmm0
  101. movsd .L_2il0floatpacket.6(%rip), %xmm4
  102. movq __libm_expf_table_64@GOTPCREL(%rip), %rcx
  103. addsd .L_2il0floatpacket.2(%rip), %xmm0
  104. cvtsd2ss %xmm0, %xmm0
  105. movss %xmm0, -12(%rsp)
  106. movss -12(%rsp), %xmm1
  107. movsd .L_2il0floatpacket.4(%rip), %xmm0
  108. movl -12(%rsp), %edx
  109. movl %edx, %eax
  110. shll $26, %eax
  111. subss .L_2il0floatpacket.0(%rip), %xmm1
  112. cvtss2sd %xmm1, %xmm1
  113. mulsd .L_2il0floatpacket.3(%rip), %xmm1
  114. sarl $26, %eax
  115. addsd %xmm1, %xmm3
  116. movaps %xmm3, %xmm2
  117. mulsd %xmm3, %xmm2
  118. mulsd %xmm2, %xmm0
  119. mulsd %xmm2, %xmm4
  120. addsd .L_2il0floatpacket.5(%rip), %xmm0
  121. addsd .L_2il0floatpacket.7(%rip), %xmm4
  122. mulsd %xmm2, %xmm0
  123. mulsd %xmm3, %xmm4
  124. movslq %eax, %rax
  125. addsd %xmm4, %xmm0
  126. movsd 256(%rcx,%rax,8), %xmm5
  127. subl %eax, %edx
  128. mulsd %xmm5, %xmm0
  129. shrl $6, %edx
  130. addsd %xmm5, %xmm0
  131. shll $23, %edx
  132. addl $1065353216, %edx
  133. movl %edx, -24(%rsp)
  134. cvtss2sd -24(%rsp), %xmm6
  135. mulsd %xmm6, %xmm0
  136. cvtsd2ss %xmm0, %xmm0
  137. movss %xmm0, -16(%rsp)
  138. ..B1.11:
  139. ret
  140. ..B1.12:
  141. lea range(%rip), %rdx
  142. movl -8(%rsp), %eax
  143. shrl $31, %eax
  144. cmpl (%rdx,%rax,4), %esi
  145. ja ..B1.14
  146. ..B1.13:
  147. pxor %xmm3, %xmm3
  148. movq $0x3ff0000000000000, %rcx
  149. cvtss2sd -8(%rsp), %xmm3
  150. movaps %xmm3, %xmm0
  151. mulsd .L_2il0floatpacket.1(%rip), %xmm0
  152. movsd .L_2il0floatpacket.6(%rip), %xmm4
  153. movq __libm_expf_table_64@GOTPCREL(%rip), %rsi
  154. addsd .L_2il0floatpacket.2(%rip), %xmm0
  155. cvtsd2ss %xmm0, %xmm0
  156. movss %xmm0, -12(%rsp)
  157. movss -12(%rsp), %xmm1
  158. movsd .L_2il0floatpacket.4(%rip), %xmm0
  159. movl -12(%rsp), %edx
  160. andl $4194303, %edx
  161. subss .L_2il0floatpacket.0(%rip), %xmm1
  162. cvtss2sd %xmm1, %xmm1
  163. mulsd .L_2il0floatpacket.3(%rip), %xmm1
  164. movl %edx, %eax
  165. shll $26, %eax
  166. addsd %xmm1, %xmm3
  167. movaps %xmm3, %xmm2
  168. mulsd %xmm3, %xmm2
  169. mulsd %xmm2, %xmm0
  170. mulsd %xmm2, %xmm4
  171. addsd .L_2il0floatpacket.5(%rip), %xmm0
  172. addsd .L_2il0floatpacket.7(%rip), %xmm4
  173. mulsd %xmm2, %xmm0
  174. mulsd %xmm3, %xmm4
  175. sarl $26, %eax
  176. addsd %xmm4, %xmm0
  177. movslq %eax, %rax
  178. subl %eax, %edx
  179. shrl $6, %edx
  180. movsd 256(%rsi,%rax,8), %xmm5
  181. mulsd %xmm5, %xmm0
  182. shlq $52, %rdx
  183. addsd %xmm5, %xmm0
  184. addq %rcx, %rdx
  185. movq %rdx, -24(%rsp)
  186. mulsd -24(%rsp), %xmm0
  187. cvtsd2ss %xmm0, %xmm0
  188. movss %xmm0, -16(%rsp)
  189. ret
  190. ..B1.14:
  191. cmpl $2139095040, %esi
  192. jae ..B1.19
  193. ..B1.15:
  194. testq %rax, %rax
  195. je ..B1.17
  196. ..B1.16:
  197. movss .L_2il0floatpacket.8(%rip), %xmm0
  198. mulss %xmm0, %xmm0
  199. movss %xmm0, -16(%rsp)
  200. ret
  201. ..B1.17:
  202. movss .L_2il0floatpacket.9(%rip), %xmm0
  203. mulss %xmm0, %xmm0
  204. movss %xmm0, -16(%rsp)
  205. ..B1.18:
  206. ret
  207. ..B1.19:
  208. jne ..B1.21
  209. ..B1.20:
  210. lea _inf_zero(%rip), %rdx
  211. movss (%rdx,%rax,4), %xmm0
  212. ret
  213. ..B1.21:
  214. movss -8(%rsp), %xmm0
  215. addss %xmm0, %xmm0
  216. ..B1.22:
  217. ret
  218. ..B1.23:
  219. addl $149, %ecx
  220. movl $1, %eax
  221. shll %cl, %eax
  222. movl %eax, -16(%rsp)
  223. jmp ..B1.6
  224. .align 16,0x90
  225. .cfi_endproc
  226. .type exp2f,@function
  227. .size exp2f,.-exp2f
  228. .data
  229. # -- End exp2f
  230. .section .rodata, "a"
  231. .align 8
  232. .align 8
  233. .L_2il0floatpacket.1:
  234. .long 0x00000000,0x40500000
  235. .type .L_2il0floatpacket.1,@object
  236. .size .L_2il0floatpacket.1,8
  237. .align 8
  238. .L_2il0floatpacket.2:
  239. .long 0x00000000,0x41680000
  240. .type .L_2il0floatpacket.2,@object
  241. .size .L_2il0floatpacket.2,8
  242. .align 8
  243. .L_2il0floatpacket.3:
  244. .long 0x00000000,0xbf900000
  245. .type .L_2il0floatpacket.3,@object
  246. .size .L_2il0floatpacket.3,8
  247. .align 8
  248. .L_2il0floatpacket.4:
  249. .long 0x3e46f43a,0x3f83ce0f
  250. .type .L_2il0floatpacket.4,@object
  251. .size .L_2il0floatpacket.4,8
  252. .align 8
  253. .L_2il0floatpacket.5:
  254. .long 0x081585e7,0x3fcebfbe
  255. .type .L_2il0floatpacket.5,@object
  256. .size .L_2il0floatpacket.5,8
  257. .align 8
  258. .L_2il0floatpacket.6:
  259. .long 0xd93cf576,0x3fac6af0
  260. .type .L_2il0floatpacket.6,@object
  261. .size .L_2il0floatpacket.6,8
  262. .align 8
  263. .L_2il0floatpacket.7:
  264. .long 0xfef9277b,0x3fe62e42
  265. .type .L_2il0floatpacket.7,@object
  266. .size .L_2il0floatpacket.7,8
  267. .align 4
  268. .L_2il0floatpacket.0:
  269. .long 0x4b400000
  270. .type .L_2il0floatpacket.0,@object
  271. .size .L_2il0floatpacket.0,4
  272. .align 4
  273. .L_2il0floatpacket.8:
  274. .long 0x0d800000
  275. .type .L_2il0floatpacket.8,@object
  276. .size .L_2il0floatpacket.8,4
  277. .align 4
  278. .L_2il0floatpacket.9:
  279. .long 0x71800000
  280. .type .L_2il0floatpacket.9,@object
  281. .size .L_2il0floatpacket.9,4
  282. .align 4
  283. .L_2il0floatpacket.10:
  284. .long 0x3f800000
  285. .type .L_2il0floatpacket.10,@object
  286. .size .L_2il0floatpacket.10,4
  287. .align 4
  288. range:
  289. .long 1124073471
  290. .long 1126039552
  291. .type range,@object
  292. .size range,8
  293. .align 4
  294. _inf_zero:
  295. .long 2139095040
  296. .long 0
  297. .type _inf_zero,@object
  298. .size _inf_zero,8
  299. .data
  300. .section .note.GNU-stack, ""
  301. // -- Begin DWARF2 SEGMENT .eh_frame
  302. .section .eh_frame,"a",@progbits
  303. .eh_frame_seg:
  304. .align 1
  305. # End