exp10_gen.S 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "exp10_gen.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin exp10
  41. .text
  42. .align 16,0x90
  43. .globl exp10
  44. exp10:
  45. # parameter 1: %xmm0
  46. ..B1.1:
  47. .cfi_startproc
  48. ..___tag_value_exp10.1:
  49. ..L2:
  50. movsd %xmm0, -40(%rsp)
  51. movl -36(%rsp), %esi
  52. movl %esi, %edi
  53. movl %esi, %edx
  54. andl $2147483647, %edi
  55. shrl $31, %edx
  56. cmpl $1012924416, %edi
  57. jb ..B1.24
  58. ..B1.2:
  59. lea _range(%rip), %rcx
  60. cmpl 4(%rcx,%rdx,8), %edi
  61. jb ..B1.5
  62. ..B1.3:
  63. jne ..B1.16
  64. ..B1.4:
  65. movl -40(%rsp), %eax
  66. cmpl (%rcx,%rdx,8), %eax
  67. ja ..B1.16
  68. ..B1.5:
  69. addl $-1072693248, %esi
  70. cmpl $4587520, %esi
  71. ja ..B1.8
  72. ..B1.6:
  73. lea _TWO_52H(%rip), %rax
  74. movsd -40(%rsp), %xmm0
  75. movaps %xmm0, %xmm1
  76. movsd (%rax), %xmm2
  77. addsd (%rax), %xmm1
  78. movsd %xmm1, -32(%rsp)
  79. movsd -32(%rsp), %xmm1
  80. movslq -32(%rsp), %rcx
  81. subsd %xmm2, %xmm1
  82. ucomisd %xmm0, %xmm1
  83. jp ..B1.7
  84. je ..B1.26
  85. ..B1.7:
  86. movsd %xmm1, -24(%rsp)
  87. jmp ..B1.9
  88. ..B1.8:
  89. lea _TWO_52H(%rip), %rax
  90. movsd -40(%rsp), %xmm0
  91. movsd (%rax), %xmm2
  92. ..B1.9:
  93. movaps %xmm0, %xmm1
  94. lea _TWO_32H(%rip), %rcx
  95. mulsd .L_2il0floatpacket.0(%rip), %xmm1
  96. lea SC2_BIAS(%rip), %rdi
  97. movsd .L_2il0floatpacket.2(%rip), %xmm3
  98. addsd %xmm2, %xmm1
  99. movsd %xmm1, -32(%rsp)
  100. movsd -32(%rsp), %xmm6
  101. movsd (%rcx), %xmm5
  102. subsd %xmm2, %xmm6
  103. movsd .L_2il0floatpacket.1(%rip), %xmm2
  104. mulsd %xmm6, %xmm2
  105. mulsd %xmm3, %xmm6
  106. subsd %xmm2, %xmm0
  107. movaps %xmm0, %xmm4
  108. movl -32(%rsp), %eax
  109. movl %eax, %r9d
  110. movsd .L_2il0floatpacket.4(%rip), %xmm12
  111. movl %eax, %esi
  112. movsd .L_2il0floatpacket.8(%rip), %xmm2
  113. subsd %xmm6, %xmm4
  114. movsd .L_2il0floatpacket.3(%rip), %xmm8
  115. addsd %xmm5, %xmm4
  116. movsd %xmm4, -32(%rsp)
  117. movsd -32(%rsp), %xmm9
  118. movsd .L_2il0floatpacket.12(%rip), %xmm7
  119. subsd %xmm5, %xmm9
  120. shll $25, %r9d
  121. subsd %xmm9, %xmm0
  122. sarl $25, %r9d
  123. subsd %xmm6, %xmm0
  124. movaps %xmm0, %xmm11
  125. subl %r9d, %esi
  126. mulsd %xmm8, %xmm0
  127. addsd %xmm9, %xmm11
  128. mulsd %xmm8, %xmm9
  129. movaps %xmm11, %xmm10
  130. addsd %xmm9, %xmm7
  131. mulsd %xmm11, %xmm10
  132. mulsd %xmm10, %xmm12
  133. mulsd %xmm10, %xmm2
  134. addsd .L_2il0floatpacket.5(%rip), %xmm12
  135. addsd .L_2il0floatpacket.9(%rip), %xmm2
  136. mulsd %xmm10, %xmm12
  137. mulsd %xmm10, %xmm2
  138. addsd .L_2il0floatpacket.6(%rip), %xmm12
  139. addsd .L_2il0floatpacket.10(%rip), %xmm2
  140. mulsd %xmm10, %xmm12
  141. mulsd %xmm10, %xmm2
  142. addsd %xmm0, %xmm12
  143. addsd .L_2il0floatpacket.11(%rip), %xmm2
  144. movsd %xmm7, -24(%rsp)
  145. movl $0, -24(%rsp)
  146. movsd -24(%rsp), %xmm1
  147. movsd .L_2il0floatpacket.7(%rip), %xmm0
  148. mulsd %xmm11, %xmm2
  149. addsd %xmm1, %xmm0
  150. movslq %r9d, %r9
  151. subsd %xmm0, %xmm9
  152. shlq $4, %r9
  153. addsd %xmm9, %xmm12
  154. movq __libm_exp_table_128@GOTPCREL(%rip), %r10
  155. addsd %xmm12, %xmm2
  156. shrl $7, %esi
  157. movsd 1032(%r10,%r9), %xmm4
  158. movslq (%rdi,%rdx,4), %r8
  159. movaps %xmm4, %xmm0
  160. addq %rsi, %r8
  161. shlq $52, %r8
  162. movq %r8, -16(%rsp)
  163. cmpl $-130815, %eax
  164. movsd 1024(%r10,%r9), %xmm3
  165. mulsd %xmm2, %xmm0
  166. mulsd %xmm1, %xmm4
  167. jle ..B1.11
  168. ..B1.10:
  169. lea _SC2(%rip), %rax
  170. addsd %xmm1, %xmm2
  171. mulsd %xmm2, %xmm3
  172. addsd %xmm3, %xmm0
  173. addsd %xmm4, %xmm0
  174. mulsd -16(%rsp), %xmm0
  175. mulsd (%rax,%rdx,8), %xmm0
  176. movsd %xmm0, -40(%rsp)
  177. ret
  178. ..B1.11:
  179. lea _SC2(%rip), %rax
  180. addsd %xmm1, %xmm2
  181. mulsd %xmm2, %xmm3
  182. movaps %xmm4, %xmm1
  183. movsd (%rax,%rdx,8), %xmm2
  184. addsd %xmm3, %xmm0
  185. addsd %xmm0, %xmm1
  186. movsd %xmm1, -24(%rsp)
  187. movl $0, -24(%rsp)
  188. movsd -24(%rsp), %xmm3
  189. movsd -16(%rsp), %xmm1
  190. subsd %xmm3, %xmm4
  191. addsd %xmm0, %xmm4
  192. movaps %xmm3, %xmm0
  193. addsd %xmm4, %xmm0
  194. mulsd %xmm1, %xmm0
  195. mulsd %xmm2, %xmm0
  196. movsd %xmm0, -40(%rsp)
  197. movl -36(%rsp), %eax
  198. cmpl $1048576, %eax
  199. jge ..B1.15
  200. ..B1.12:
  201. testl %eax, %eax
  202. jle ..B1.14
  203. ..B1.13:
  204. mulsd %xmm1, %xmm3
  205. mulsd %xmm1, %xmm4
  206. mulsd %xmm2, %xmm3
  207. mulsd %xmm2, %xmm4
  208. addsd %xmm4, %xmm3
  209. movsd %xmm3, -40(%rsp)
  210. ..B1.14:
  211. lea _small_value_64(%rip), %rax
  212. movsd (%rax), %xmm0
  213. mulsd %xmm0, %xmm0
  214. movsd %xmm0, -32(%rsp)
  215. movsd -40(%rsp), %xmm0
  216. ..B1.15:
  217. ret
  218. ..B1.16:
  219. cmpl $2146435072, %edi
  220. jae ..B1.20
  221. ..B1.17:
  222. testl $-2147483648, %esi
  223. je ..B1.25
  224. ..B1.18:
  225. lea _small_value_64(%rip), %rax
  226. movsd (%rax), %xmm0
  227. mulsd %xmm0, %xmm0
  228. movsd %xmm0, -40(%rsp)
  229. ..B1.19:
  230. ret
  231. ..B1.20:
  232. addl $-2146435072, %edi
  233. orl -40(%rsp), %edi
  234. jne ..B1.22
  235. ..B1.21:
  236. lea _inf_zero(%rip), %rax
  237. movsd (%rax,%rdx,8), %xmm0
  238. ret
  239. ..B1.22:
  240. movsd -40(%rsp), %xmm0
  241. addsd %xmm0, %xmm0
  242. ..B1.23:
  243. ret
  244. ..B1.24:
  245. movsd .L_2il0floatpacket.12(%rip), %xmm0
  246. addsd -40(%rsp), %xmm0
  247. ret
  248. ..B1.25:
  249. lea _large_value_64(%rip), %rax
  250. movsd (%rax), %xmm0
  251. mulsd %xmm0, %xmm0
  252. movsd %xmm0, -40(%rsp)
  253. ret
  254. ..B1.26:
  255. lea exact_values(%rip), %rax
  256. movsd (%rax,%rcx,8), %xmm0
  257. ret
  258. .align 16,0x90
  259. .cfi_endproc
  260. .type exp10,@function
  261. .size exp10,.-exp10
  262. .data
  263. # -- End exp10
  264. .section .rodata, "a"
  265. .align 8
  266. .align 8
  267. .L_2il0floatpacket.0:
  268. .long 0x0979a371,0x407a934f
  269. .type .L_2il0floatpacket.0,@object
  270. .size .L_2il0floatpacket.0,8
  271. .align 8
  272. .L_2il0floatpacket.1:
  273. .long 0x00000000,0x3f634413
  274. .type .L_2il0floatpacket.1,@object
  275. .size .L_2il0floatpacket.1,8
  276. .align 8
  277. .L_2il0floatpacket.2:
  278. .long 0x7fbcc47c,0x3e0427de
  279. .type .L_2il0floatpacket.2,@object
  280. .size .L_2il0floatpacket.2,8
  281. .align 8
  282. .L_2il0floatpacket.3:
  283. .long 0x00000000,0x40026bb1
  284. .type .L_2il0floatpacket.3,@object
  285. .size .L_2il0floatpacket.3,8
  286. .align 8
  287. .L_2il0floatpacket.4:
  288. .long 0xd9d41e9c,0x3fca7ed8
  289. .type .L_2il0floatpacket.4,@object
  290. .size .L_2il0floatpacket.4,8
  291. .align 8
  292. .L_2il0floatpacket.5:
  293. .long 0x09fd8b95,0x3ff2bd76
  294. .type .L_2il0floatpacket.5,@object
  295. .size .L_2il0floatpacket.5,8
  296. .align 8
  297. .L_2il0floatpacket.6:
  298. .long 0xc73cea69,0x40053524
  299. .type .L_2il0floatpacket.6,@object
  300. .size .L_2il0floatpacket.6,8
  301. .align 8
  302. .L_2il0floatpacket.7:
  303. .long 0x00000000,0xbff00000
  304. .type .L_2il0floatpacket.7,@object
  305. .size .L_2il0floatpacket.7,8
  306. .align 8
  307. .L_2il0floatpacket.8:
  308. .long 0x25f8c2cc,0x3fb16e4f
  309. .type .L_2il0floatpacket.8,@object
  310. .size .L_2il0floatpacket.8,8
  311. .align 8
  312. .L_2il0floatpacket.9:
  313. .long 0xfd1d41fe,0x3fe1429f
  314. .type .L_2il0floatpacket.9,@object
  315. .size .L_2il0floatpacket.9,8
  316. .align 8
  317. .L_2il0floatpacket.10:
  318. .long 0x91de2ca4,0x40004705
  319. .type .L_2il0floatpacket.10,@object
  320. .size .L_2il0floatpacket.10,8
  321. .align 8
  322. .L_2il0floatpacket.11:
  323. .long 0xa2b05ba9,0x3eb776aa
  324. .type .L_2il0floatpacket.11,@object
  325. .size .L_2il0floatpacket.11,8
  326. .align 8
  327. .L_2il0floatpacket.12:
  328. .long 0x00000000,0x3ff00000
  329. .type .L_2il0floatpacket.12,@object
  330. .size .L_2il0floatpacket.12,8
  331. .align 8
  332. exact_values:
  333. .long 0x00000000,0x00000000
  334. .long 0x00000000,0x40240000
  335. .long 0x00000000,0x40590000
  336. .long 0x00000000,0x408f4000
  337. .long 0x00000000,0x40c38800
  338. .long 0x00000000,0x40f86a00
  339. .long 0x00000000,0x412e8480
  340. .long 0x00000000,0x416312d0
  341. .long 0x00000000,0x4197d784
  342. .long 0x00000000,0x41cdcd65
  343. .long 0x20000000,0x4202a05f
  344. .long 0xe8000000,0x42374876
  345. .long 0xa2000000,0x426d1a94
  346. .long 0xe5400000,0x42a2309c
  347. .long 0x1e900000,0x42d6bcc4
  348. .long 0x26340000,0x430c6bf5
  349. .long 0x37e08000,0x4341c379
  350. .long 0x85d8a000,0x43763457
  351. .long 0x674ec800,0x43abc16d
  352. .long 0x60913d00,0x43e158e4
  353. .long 0x78b58c40,0x4415af1d
  354. .long 0xd6e2ef50,0x444b1ae4
  355. .long 0x064dd592,0x4480f0cf
  356. .type exact_values,@object
  357. .size exact_values,184
  358. .align 4
  359. _range:
  360. .long 1352628734
  361. .long 1081295891
  362. .long 1189309266
  363. .long 1081358775
  364. .type _range,@object
  365. .size _range,16
  366. .align 4
  367. _TWO_52H:
  368. .long 0
  369. .long 1127743488
  370. .type _TWO_52H,@object
  371. .size _TWO_52H,8
  372. .align 4
  373. _TWO_32H:
  374. .long 0
  375. .long 1106771968
  376. .type _TWO_32H,@object
  377. .size _TWO_32H,8
  378. .align 4
  379. SC2_BIAS:
  380. .long 511
  381. .long 1535
  382. .type SC2_BIAS,@object
  383. .size SC2_BIAS,8
  384. .align 4
  385. _SC2:
  386. .long 0
  387. .long 1609564160
  388. .long 0
  389. .long 535822336
  390. .type _SC2,@object
  391. .size _SC2,16
  392. .align 4
  393. _small_value_64:
  394. .long 0
  395. .long 24117248
  396. .long 0
  397. .long 2171600896
  398. .type _small_value_64,@object
  399. .size _small_value_64,16
  400. .align 4
  401. _inf_zero:
  402. .long 0
  403. .long 2146435072
  404. .long 0
  405. .long 0
  406. .type _inf_zero,@object
  407. .size _inf_zero,16
  408. .align 4
  409. _large_value_64:
  410. .long 0
  411. .long 2121269248
  412. .long 0
  413. .long 4268752896
  414. .type _large_value_64,@object
  415. .size _large_value_64,16
  416. .data
  417. .section .note.GNU-stack, ""
  418. // -- Begin DWARF2 SEGMENT .eh_frame
  419. .section .eh_frame,"a",@progbits
  420. .eh_frame_seg:
  421. .align 1
  422. # End
  423. .globl pow10
  424. .equ pow10, exp10