expm1f_gen.S 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "expm1f_gen.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin expm1f
  41. .text
  42. .align 16,0x90
  43. .globl expm1f
  44. expm1f:
  45. # parameter 1: %xmm0
  46. ..B1.1:
  47. .cfi_startproc
  48. ..___tag_value_expm1f.1:
  49. ..L2:
  50. movd %xmm0, %eax
  51. movss %xmm0, -8(%rsp)
  52. movl %eax, %ecx
  53. andl $2147483647, %ecx
  54. cmpl $1118652779, %ecx
  55. jae ..B1.12
  56. ..B1.2:
  57. cmpl $1040187392, %ecx
  58. jae ..B1.11
  59. ..B1.3:
  60. cmpl $838860800, %ecx
  61. jae ..B1.10
  62. ..B1.4:
  63. cmpl $8388608, %ecx
  64. jb ..B1.6
  65. ..B1.5:
  66. movss .L_2il0floatpacket.15(%rip), %xmm1
  67. mulss %xmm1, %xmm1
  68. addss %xmm1, %xmm0
  69. ret
  70. ..B1.6:
  71. testl %ecx, %ecx
  72. je ..B1.8
  73. ..B1.7:
  74. movss .L_2il0floatpacket.15(%rip), %xmm0
  75. pxor %xmm2, %xmm2
  76. mulss %xmm0, %xmm0
  77. pxor %xmm1, %xmm1
  78. cvtss2sd -8(%rsp), %xmm2
  79. cvtss2sd %xmm0, %xmm0
  80. cvtsd2ss %xmm0, %xmm1
  81. addsd %xmm0, %xmm2
  82. pxor %xmm0, %xmm0
  83. movss %xmm1, -16(%rsp)
  84. cvtsd2ss %xmm2, %xmm0
  85. ret
  86. ..B1.8:
  87. movss -8(%rsp), %xmm0
  88. ..B1.9:
  89. ret
  90. ..B1.10:
  91. movss -8(%rsp), %xmm1
  92. movaps %xmm1, %xmm0
  93. mulss %xmm1, %xmm0
  94. cvtss2sd %xmm1, %xmm1
  95. cvtss2sd %xmm0, %xmm0
  96. movsd .L_2il0floatpacket.9(%rip), %xmm3
  97. mulsd %xmm0, %xmm3
  98. movsd .L_2il0floatpacket.12(%rip), %xmm2
  99. mulsd %xmm0, %xmm2
  100. addsd .L_2il0floatpacket.10(%rip), %xmm3
  101. mulsd %xmm0, %xmm3
  102. addsd .L_2il0floatpacket.13(%rip), %xmm2
  103. mulsd %xmm0, %xmm2
  104. addsd .L_2il0floatpacket.11(%rip), %xmm3
  105. mulsd %xmm0, %xmm3
  106. addsd .L_2il0floatpacket.14(%rip), %xmm2
  107. mulsd %xmm1, %xmm3
  108. mulsd %xmm0, %xmm2
  109. addsd %xmm2, %xmm3
  110. addsd %xmm1, %xmm3
  111. cvtsd2ss %xmm3, %xmm3
  112. movaps %xmm3, %xmm0
  113. ret
  114. ..B1.11:
  115. pxor %xmm3, %xmm3
  116. pxor %xmm6, %xmm6
  117. cvtss2sd -8(%rsp), %xmm3
  118. movaps %xmm3, %xmm0
  119. mulsd .L_2il0floatpacket.0(%rip), %xmm0
  120. movsd .L_2il0floatpacket.6(%rip), %xmm4
  121. movq __libm_expf_table_64@GOTPCREL(%rip), %rcx
  122. addsd .L_2il0floatpacket.1(%rip), %xmm0
  123. cvtsd2ss %xmm0, %xmm0
  124. movss %xmm0, -16(%rsp)
  125. movss -16(%rsp), %xmm1
  126. movsd .L_2il0floatpacket.4(%rip), %xmm0
  127. movl -16(%rsp), %edx
  128. movl %edx, %eax
  129. shll $26, %eax
  130. subss .L_2il0floatpacket.2(%rip), %xmm1
  131. cvtss2sd %xmm1, %xmm1
  132. mulsd .L_2il0floatpacket.3(%rip), %xmm1
  133. sarl $26, %eax
  134. addsd %xmm1, %xmm3
  135. movaps %xmm3, %xmm2
  136. mulsd %xmm3, %xmm2
  137. mulsd %xmm2, %xmm0
  138. mulsd %xmm2, %xmm4
  139. addsd .L_2il0floatpacket.5(%rip), %xmm0
  140. addsd .L_2il0floatpacket.7(%rip), %xmm4
  141. mulsd %xmm2, %xmm0
  142. mulsd %xmm3, %xmm4
  143. movslq %eax, %rax
  144. addsd %xmm4, %xmm0
  145. movsd 256(%rcx,%rax,8), %xmm5
  146. subl %eax, %edx
  147. mulsd %xmm5, %xmm0
  148. shrl $6, %edx
  149. addsd %xmm5, %xmm0
  150. shll $23, %edx
  151. addl $1065353216, %edx
  152. movl %edx, -24(%rsp)
  153. cvtss2sd -24(%rsp), %xmm6
  154. mulsd %xmm6, %xmm0
  155. addsd .L_2il0floatpacket.8(%rip), %xmm0
  156. cvtsd2ss %xmm0, %xmm0
  157. ret
  158. ..B1.12:
  159. lea range(%rip), %rdx
  160. shrl $31, %eax
  161. cmpl (%rdx,%rax,4), %ecx
  162. jb ..B1.21
  163. ..B1.13:
  164. cmpl $2139095040, %ecx
  165. jae ..B1.17
  166. ..B1.14:
  167. testq %rax, %rax
  168. je ..B1.22
  169. ..B1.15:
  170. movss .L_2il0floatpacket.16(%rip), %xmm0
  171. ..B1.16:
  172. ret
  173. ..B1.17:
  174. jne ..B1.19
  175. ..B1.18:
  176. lea _inf_none(%rip), %rdx
  177. movss (%rdx,%rax,4), %xmm0
  178. ret
  179. ..B1.19:
  180. movss -8(%rsp), %xmm0
  181. ..B1.20:
  182. ret
  183. ..B1.21:
  184. pxor %xmm3, %xmm3
  185. movq $0x3ff0000000000000, %rcx
  186. cvtss2sd -8(%rsp), %xmm3
  187. movaps %xmm3, %xmm0
  188. mulsd .L_2il0floatpacket.0(%rip), %xmm0
  189. movsd .L_2il0floatpacket.6(%rip), %xmm4
  190. movq __libm_expf_table_64@GOTPCREL(%rip), %rsi
  191. addsd .L_2il0floatpacket.1(%rip), %xmm0
  192. cvtsd2ss %xmm0, %xmm0
  193. movss %xmm0, -16(%rsp)
  194. movss -16(%rsp), %xmm1
  195. movsd .L_2il0floatpacket.4(%rip), %xmm0
  196. movl -16(%rsp), %edx
  197. andl $4194303, %edx
  198. subss .L_2il0floatpacket.2(%rip), %xmm1
  199. cvtss2sd %xmm1, %xmm1
  200. mulsd .L_2il0floatpacket.3(%rip), %xmm1
  201. movl %edx, %eax
  202. shll $26, %eax
  203. addsd %xmm1, %xmm3
  204. movaps %xmm3, %xmm2
  205. mulsd %xmm3, %xmm2
  206. mulsd %xmm2, %xmm0
  207. mulsd %xmm2, %xmm4
  208. addsd .L_2il0floatpacket.5(%rip), %xmm0
  209. addsd .L_2il0floatpacket.7(%rip), %xmm4
  210. mulsd %xmm2, %xmm0
  211. mulsd %xmm3, %xmm4
  212. sarl $26, %eax
  213. addsd %xmm4, %xmm0
  214. movslq %eax, %rax
  215. subl %eax, %edx
  216. shrl $6, %edx
  217. movsd 256(%rsi,%rax,8), %xmm5
  218. mulsd %xmm5, %xmm0
  219. shlq $52, %rdx
  220. addsd %xmm5, %xmm0
  221. addq %rcx, %rdx
  222. movq %rdx, -24(%rsp)
  223. mulsd -24(%rsp), %xmm0
  224. addsd .L_2il0floatpacket.8(%rip), %xmm0
  225. cvtsd2ss %xmm0, %xmm0
  226. ret
  227. ..B1.22:
  228. movss .L_2il0floatpacket.17(%rip), %xmm0
  229. mulss %xmm0, %xmm0
  230. ret
  231. .align 16,0x90
  232. .cfi_endproc
  233. .type expm1f,@function
  234. .size expm1f,.-expm1f
  235. .data
  236. # -- End expm1f
  237. .section .rodata, "a"
  238. .align 8
  239. .align 8
  240. .L_2il0floatpacket.0:
  241. .long 0x652b82fe,0x40571547
  242. .type .L_2il0floatpacket.0,@object
  243. .size .L_2il0floatpacket.0,8
  244. .align 8
  245. .L_2il0floatpacket.1:
  246. .long 0x00000000,0x41680000
  247. .type .L_2il0floatpacket.1,@object
  248. .size .L_2il0floatpacket.1,8
  249. .align 8
  250. .L_2il0floatpacket.3:
  251. .long 0xfefa39ef,0xbf862e42
  252. .type .L_2il0floatpacket.3,@object
  253. .size .L_2il0floatpacket.3,8
  254. .align 8
  255. .L_2il0floatpacket.4:
  256. .long 0xeb78fa85,0x3fa56420
  257. .type .L_2il0floatpacket.4,@object
  258. .size .L_2il0floatpacket.4,8
  259. .align 8
  260. .L_2il0floatpacket.5:
  261. .long 0x008d6118,0x3fe00000
  262. .type .L_2il0floatpacket.5,@object
  263. .size .L_2il0floatpacket.5,8
  264. .align 8
  265. .L_2il0floatpacket.6:
  266. .long 0xda752d4f,0x3fc55550
  267. .type .L_2il0floatpacket.6,@object
  268. .size .L_2il0floatpacket.6,8
  269. .align 8
  270. .L_2il0floatpacket.7:
  271. .long 0xffffe7c6,0x3fefffff
  272. .type .L_2il0floatpacket.7,@object
  273. .size .L_2il0floatpacket.7,8
  274. .align 8
  275. .L_2il0floatpacket.8:
  276. .long 0x00000000,0xbff00000
  277. .type .L_2il0floatpacket.8,@object
  278. .size .L_2il0floatpacket.8,8
  279. .align 8
  280. .L_2il0floatpacket.9:
  281. .long 0xd2809cf8,0x3f2a04e1
  282. .type .L_2il0floatpacket.9,@object
  283. .size .L_2il0floatpacket.9,8
  284. .align 8
  285. .L_2il0floatpacket.10:
  286. .long 0xeb5ecfe9,0x3f811110
  287. .type .L_2il0floatpacket.10,@object
  288. .size .L_2il0floatpacket.10,8
  289. .align 8
  290. .L_2il0floatpacket.11:
  291. .long 0x55579a3a,0x3fc55555
  292. .type .L_2il0floatpacket.11,@object
  293. .size .L_2il0floatpacket.11,8
  294. .align 8
  295. .L_2il0floatpacket.12:
  296. .long 0x9843cb2c,0x3f56c445
  297. .type .L_2il0floatpacket.12,@object
  298. .size .L_2il0floatpacket.12,8
  299. .align 8
  300. .L_2il0floatpacket.13:
  301. .long 0x27a9b288,0x3fa55555
  302. .type .L_2il0floatpacket.13,@object
  303. .size .L_2il0floatpacket.13,8
  304. .align 8
  305. .L_2il0floatpacket.14:
  306. .long 0x00016df6,0x3fe00000
  307. .type .L_2il0floatpacket.14,@object
  308. .size .L_2il0floatpacket.14,8
  309. .align 4
  310. .L_2il0floatpacket.2:
  311. .long 0x4b400000
  312. .type .L_2il0floatpacket.2,@object
  313. .size .L_2il0floatpacket.2,4
  314. .align 4
  315. .L_2il0floatpacket.15:
  316. .long 0x0d800000
  317. .type .L_2il0floatpacket.15,@object
  318. .size .L_2il0floatpacket.15,4
  319. .align 4
  320. .L_2il0floatpacket.16:
  321. .long 0xbf800000
  322. .type .L_2il0floatpacket.16,@object
  323. .size .L_2il0floatpacket.16,4
  324. .align 4
  325. .L_2il0floatpacket.17:
  326. .long 0x71800000
  327. .type .L_2il0floatpacket.17,@object
  328. .size .L_2il0floatpacket.17,4
  329. .align 4
  330. range:
  331. .long 1118925336
  332. .long 1107296256
  333. .type range,@object
  334. .size range,8
  335. .align 4
  336. _inf_none:
  337. .long 2139095040
  338. .long 3212836864
  339. .type _inf_none,@object
  340. .size _inf_none,8
  341. .data
  342. .section .note.GNU-stack, ""
  343. // -- Begin DWARF2 SEGMENT .eh_frame
  344. .section .eh_frame,"a",@progbits
  345. .eh_frame_seg:
  346. .align 1
  347. # End