expm1_gen.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "expm1_gen.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin expm1
  41. .text
  42. .align 16,0x90
  43. .globl expm1
  44. expm1:
  45. # parameter 1: %xmm0
  46. ..B1.1:
  47. .cfi_startproc
  48. ..___tag_value_expm1.1:
  49. ..L2:
  50. lea _range(%rip), %rsi
  51. movsd %xmm0, -24(%rsp)
  52. movl -20(%rsp), %edx
  53. movl %edx, %eax
  54. shrl $31, %edx
  55. andl $2147483647, %eax
  56. cmpl 4(%rsi,%rdx,8), %eax
  57. jb ..B1.4
  58. ..B1.2:
  59. jne ..B1.20
  60. ..B1.3:
  61. movl -24(%rsp), %ecx
  62. cmpl (%rsi,%rdx,8), %ecx
  63. ja ..B1.20
  64. ..B1.4:
  65. cmpl $1064304640, %eax
  66. jae ..B1.13
  67. ..B1.5:
  68. cmpl $1012924416, %eax
  69. jae ..B1.12
  70. ..B1.6:
  71. cmpl $1048576, %eax
  72. jb ..B1.8
  73. ..B1.7:
  74. lea _small_value_64(%rip), %rax
  75. movsd (%rax), %xmm0
  76. mulsd %xmm0, %xmm0
  77. addsd -24(%rsp), %xmm0
  78. ret
  79. ..B1.8:
  80. movl -24(%rsp), %edx
  81. orl %edx, %eax
  82. je ..B1.10
  83. ..B1.9:
  84. lea _small_value_64(%rip), %rax
  85. movsd -24(%rsp), %xmm0
  86. movsd (%rax), %xmm1
  87. mulsd %xmm1, %xmm1
  88. movsd %xmm1, -16(%rsp)
  89. addsd %xmm1, %xmm0
  90. ret
  91. ..B1.10:
  92. movsd -24(%rsp), %xmm0
  93. ..B1.11:
  94. ret
  95. ..B1.12:
  96. movsd -24(%rsp), %xmm0
  97. movaps %xmm0, %xmm3
  98. mulsd %xmm0, %xmm3
  99. movsd .L_2il0floatpacket.4(%rip), %xmm4
  100. movsd .L_2il0floatpacket.6(%rip), %xmm1
  101. mulsd %xmm3, %xmm4
  102. mulsd %xmm3, %xmm1
  103. addsd .L_2il0floatpacket.5(%rip), %xmm4
  104. addsd .L_2il0floatpacket.7(%rip), %xmm1
  105. mulsd %xmm3, %xmm4
  106. mulsd %xmm0, %xmm1
  107. movsd .L_2il0floatpacket.8(%rip), %xmm2
  108. addsd %xmm1, %xmm4
  109. movsd %xmm3, -8(%rsp)
  110. mulsd %xmm3, %xmm4
  111. mulsd %xmm2, %xmm3
  112. addsd %xmm3, %xmm4
  113. addsd %xmm4, %xmm0
  114. ret
  115. ..B1.13:
  116. movsd -24(%rsp), %xmm1
  117. lea _TWO_52H(%rip), %rcx
  118. movaps %xmm1, %xmm0
  119. mulsd .L_2il0floatpacket.0(%rip), %xmm0
  120. movsd (%rcx), %xmm2
  121. movsd .L_2il0floatpacket.1(%rip), %xmm3
  122. addsd %xmm2, %xmm0
  123. movsd %xmm0, -16(%rsp)
  124. movsd -16(%rsp), %xmm5
  125. movsd .L_2il0floatpacket.2(%rip), %xmm4
  126. subsd %xmm2, %xmm5
  127. mulsd %xmm5, %xmm3
  128. mulsd %xmm4, %xmm5
  129. subsd %xmm3, %xmm1
  130. movaps %xmm1, %xmm8
  131. movsd .L_2il0floatpacket.9(%rip), %xmm6
  132. subsd %xmm5, %xmm8
  133. movsd .L_2il0floatpacket.3(%rip), %xmm7
  134. movaps %xmm8, %xmm9
  135. movl -16(%rsp), %esi
  136. movl %esi, %edi
  137. shll $25, %edi
  138. addsd %xmm8, %xmm6
  139. subsd %xmm8, %xmm1
  140. movsd %xmm6, -8(%rsp)
  141. subsd %xmm5, %xmm1
  142. movl $0, -8(%rsp)
  143. addsd %xmm1, %xmm9
  144. movsd -8(%rsp), %xmm6
  145. sarl $25, %edi
  146. addsd %xmm6, %xmm7
  147. subl %edi, %esi
  148. subsd %xmm7, %xmm8
  149. movslq %edi, %rdi
  150. addsd %xmm8, %xmm1
  151. shrl $7, %esi
  152. shlq $4, %rdi
  153. movq __libm_exp_table_128@GOTPCREL(%rip), %r8
  154. cmpl $1078204908, -20(%rsp)
  155. jle ..B1.15
  156. ..B1.14:
  157. movaps %xmm9, %xmm3
  158. lea SC2_BIAS(%rip), %rax
  159. movsd .L_2il0floatpacket.4(%rip), %xmm4
  160. movsd .L_2il0floatpacket.6(%rip), %xmm0
  161. movsd .L_2il0floatpacket.8(%rip), %xmm2
  162. mulsd %xmm9, %xmm3
  163. mulsd %xmm3, %xmm4
  164. mulsd %xmm3, %xmm0
  165. addsd .L_2il0floatpacket.5(%rip), %xmm4
  166. addsd .L_2il0floatpacket.7(%rip), %xmm0
  167. mulsd %xmm3, %xmm4
  168. mulsd %xmm9, %xmm0
  169. movsd 1032(%r8,%rdi), %xmm5
  170. addsd %xmm0, %xmm4
  171. mulsd %xmm3, %xmm4
  172. mulsd %xmm2, %xmm3
  173. movslq (%rax,%rdx,4), %rcx
  174. addsd %xmm3, %xmm4
  175. addq %rsi, %rcx
  176. addsd %xmm4, %xmm1
  177. movaps %xmm1, %xmm0
  178. addsd %xmm6, %xmm1
  179. mulsd %xmm5, %xmm0
  180. mulsd %xmm6, %xmm5
  181. mulsd 1024(%r8,%rdi), %xmm1
  182. shlq $52, %rcx
  183. addsd %xmm1, %xmm0
  184. movq %rcx, -24(%rsp)
  185. addsd %xmm5, %xmm0
  186. mulsd -24(%rsp), %xmm0
  187. lea _SC2(%rip), %rsi
  188. mulsd (%rsi,%rdx,8), %xmm0
  189. ret
  190. ..B1.15:
  191. movaps %xmm9, %xmm3
  192. movq $0x3ff0000000000000, %rdx
  193. mulsd %xmm9, %xmm3
  194. addl $-1078081678, %eax
  195. movsd .L_2il0floatpacket.4(%rip), %xmm4
  196. movsd .L_2il0floatpacket.6(%rip), %xmm0
  197. mulsd %xmm3, %xmm4
  198. mulsd %xmm3, %xmm0
  199. addsd .L_2il0floatpacket.5(%rip), %xmm4
  200. addsd .L_2il0floatpacket.7(%rip), %xmm0
  201. mulsd %xmm3, %xmm4
  202. mulsd %xmm9, %xmm0
  203. movsd .L_2il0floatpacket.8(%rip), %xmm2
  204. addsd %xmm0, %xmm4
  205. mulsd %xmm3, %xmm4
  206. mulsd %xmm2, %xmm3
  207. movsd 1032(%r8,%rdi), %xmm5
  208. addsd %xmm3, %xmm4
  209. shlq $52, %rsi
  210. addsd %xmm4, %xmm1
  211. movaps %xmm1, %xmm0
  212. addq %rdx, %rsi
  213. mulsd %xmm5, %xmm0
  214. addsd %xmm6, %xmm1
  215. mulsd %xmm6, %xmm5
  216. mulsd 1024(%r8,%rdi), %xmm1
  217. movq %rsi, -24(%rsp)
  218. cmpl $216675, %eax
  219. addsd %xmm1, %xmm0
  220. movsd -24(%rsp), %xmm1
  221. mulsd %xmm1, %xmm5
  222. mulsd %xmm1, %xmm0
  223. ja ..B1.17
  224. ..B1.16:
  225. movsd .L_2il0floatpacket.9(%rip), %xmm1
  226. subsd %xmm1, %xmm0
  227. jmp ..B1.18
  228. ..B1.17:
  229. movsd .L_2il0floatpacket.9(%rip), %xmm1
  230. movaps %xmm5, %xmm2
  231. movaps %xmm1, %xmm3
  232. subsd %xmm1, %xmm5
  233. movsd %xmm5, -8(%rsp)
  234. addsd %xmm5, %xmm3
  235. subsd %xmm2, %xmm3
  236. subsd %xmm3, %xmm0
  237. ..B1.18:
  238. addsd %xmm5, %xmm0
  239. ..B1.19:
  240. ret
  241. ..B1.20:
  242. cmpl $2146435072, %eax
  243. jae ..B1.24
  244. ..B1.21:
  245. testq %rdx, %rdx
  246. je ..B1.28
  247. ..B1.22:
  248. lea _small_value_64(%rip), %rax
  249. movsd .L_2il0floatpacket.3(%rip), %xmm0
  250. addsd (%rax), %xmm0
  251. ..B1.23:
  252. ret
  253. ..B1.24:
  254. addl $-2146435072, %eax
  255. orl -24(%rsp), %eax
  256. jne ..B1.26
  257. ..B1.25:
  258. lea _inf_none(%rip), %rax
  259. movsd (%rax,%rdx,8), %xmm0
  260. ret
  261. ..B1.26:
  262. movsd -24(%rsp), %xmm0
  263. ..B1.27:
  264. ret
  265. ..B1.28:
  266. lea _large_value_64(%rip), %rax
  267. movsd (%rax), %xmm0
  268. mulsd %xmm0, %xmm0
  269. ret
  270. .align 16,0x90
  271. .cfi_endproc
  272. .type expm1,@function
  273. .size expm1,.-expm1
  274. .data
  275. # -- End expm1
  276. .section .rodata, "a"
  277. .align 8
  278. .align 8
  279. .L_2il0floatpacket.0:
  280. .long 0x652b82fe,0x40671547
  281. .type .L_2il0floatpacket.0,@object
  282. .size .L_2il0floatpacket.0,8
  283. .align 8
  284. .L_2il0floatpacket.1:
  285. .long 0x00000000,0x3f762e42
  286. .type .L_2il0floatpacket.1,@object
  287. .size .L_2il0floatpacket.1,8
  288. .align 8
  289. .L_2il0floatpacket.2:
  290. .long 0x3de6af28,0x3e2fdf47
  291. .type .L_2il0floatpacket.2,@object
  292. .size .L_2il0floatpacket.2,8
  293. .align 8
  294. .L_2il0floatpacket.3:
  295. .long 0x00000000,0xbff00000
  296. .type .L_2il0floatpacket.3,@object
  297. .size .L_2il0floatpacket.3,8
  298. .align 8
  299. .L_2il0floatpacket.4:
  300. .long 0x87372663,0x3f56c16c
  301. .type .L_2il0floatpacket.4,@object
  302. .size .L_2il0floatpacket.4,8
  303. .align 8
  304. .L_2il0floatpacket.5:
  305. .long 0x5555541d,0x3fa55555
  306. .type .L_2il0floatpacket.5,@object
  307. .size .L_2il0floatpacket.5,8
  308. .align 8
  309. .L_2il0floatpacket.6:
  310. .long 0x6887cd7c,0x3f811111
  311. .type .L_2il0floatpacket.6,@object
  312. .size .L_2il0floatpacket.6,8
  313. .align 8
  314. .L_2il0floatpacket.7:
  315. .long 0x55555405,0x3fc55555
  316. .type .L_2il0floatpacket.7,@object
  317. .size .L_2il0floatpacket.7,8
  318. .align 8
  319. .L_2il0floatpacket.8:
  320. .long 0x00000000,0x3fe00000
  321. .type .L_2il0floatpacket.8,@object
  322. .size .L_2il0floatpacket.8,8
  323. .align 8
  324. .L_2il0floatpacket.9:
  325. .long 0x00000000,0x3ff00000
  326. .type .L_2il0floatpacket.9,@object
  327. .size .L_2il0floatpacket.9,8
  328. .align 4
  329. _range:
  330. .long 4277811695
  331. .long 1082535490
  332. .long 2669343409
  333. .long 1078159482
  334. .type _range,@object
  335. .size _range,16
  336. .align 4
  337. _small_value_64:
  338. .long 0
  339. .long 24117248
  340. .long 0
  341. .long 2171600896
  342. .type _small_value_64,@object
  343. .size _small_value_64,16
  344. .align 4
  345. _TWO_52H:
  346. .long 0
  347. .long 1127743488
  348. .type _TWO_52H,@object
  349. .size _TWO_52H,8
  350. .align 4
  351. SC2_BIAS:
  352. .long 511
  353. .long 1535
  354. .type SC2_BIAS,@object
  355. .size SC2_BIAS,8
  356. .align 4
  357. _SC2:
  358. .long 0
  359. .long 1609564160
  360. .long 0
  361. .long 535822336
  362. .type _SC2,@object
  363. .size _SC2,16
  364. .align 4
  365. _inf_none:
  366. .long 0
  367. .long 2146435072
  368. .long 0
  369. .long 3220176896
  370. .type _inf_none,@object
  371. .size _inf_none,16
  372. .align 4
  373. _large_value_64:
  374. .long 0
  375. .long 2121269248
  376. .long 0
  377. .long 4268752896
  378. .type _large_value_64,@object
  379. .size _large_value_64,16
  380. .data
  381. .section .note.GNU-stack, ""
  382. // -- Begin DWARF2 SEGMENT .eh_frame
  383. .section .eh_frame,"a",@progbits
  384. .eh_frame_seg:
  385. .align 1
  386. # End