log1p_gen.S 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "log1p_gen.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin log1p
  41. .text
  42. .align 16,0x90
  43. .globl log1p
  44. log1p:
  45. # parameter 1: %xmm0
  46. ..B1.1:
  47. .cfi_startproc
  48. ..___tag_value_log1p.1:
  49. ..L2:
  50. movsd %xmm0, -8(%rsp)
  51. movl -4(%rsp), %edi
  52. movl %edi, %edx
  53. andl $2147483647, %edx
  54. cmpl $2146435072, %edx
  55. jae ..B1.24
  56. ..B1.2:
  57. cmpl $-1074790400, %edi
  58. jae ..B1.20
  59. ..B1.3:
  60. cmpl $1068646400, %edx
  61. jae ..B1.16
  62. ..B1.4:
  63. cmpl $1066401792, %edx
  64. jae ..B1.15
  65. ..B1.5:
  66. cmpl $1059061760, %edx
  67. jae ..B1.14
  68. ..B1.6:
  69. cmpl $1012924416, %edx
  70. jae ..B1.13
  71. ..B1.7:
  72. cmpl $1048576, %edx
  73. jb ..B1.9
  74. ..B1.8:
  75. lea _small_value_64(%rip), %rax
  76. movsd (%rax), %xmm1
  77. mulsd %xmm1, %xmm1
  78. subsd %xmm1, %xmm0
  79. ret
  80. ..B1.9:
  81. movl -8(%rsp), %eax
  82. orl %eax, %edx
  83. je ..B1.11
  84. ..B1.10:
  85. lea _small_value_64(%rip), %rax
  86. movsd -8(%rsp), %xmm0
  87. movsd (%rax), %xmm1
  88. mulsd %xmm1, %xmm1
  89. movsd %xmm1, -40(%rsp)
  90. subsd %xmm1, %xmm0
  91. ret
  92. ..B1.11:
  93. movsd -8(%rsp), %xmm0
  94. ..B1.12:
  95. ret
  96. ..B1.13:
  97. movsd -8(%rsp), %xmm3
  98. lea 24+_Q3(%rip), %rax
  99. movaps %xmm3, %xmm1
  100. lea 16+_Q3(%rip), %rcx
  101. mulsd %xmm3, %xmm1
  102. lea 8+_Q3(%rip), %rdx
  103. movsd (%rax), %xmm0
  104. lea _Q3(%rip), %rsi
  105. movsd (%rcx), %xmm2
  106. mulsd %xmm1, %xmm0
  107. mulsd %xmm1, %xmm2
  108. addsd (%rdx), %xmm0
  109. addsd (%rsi), %xmm2
  110. mulsd %xmm1, %xmm0
  111. mulsd %xmm3, %xmm2
  112. movsd %xmm1, -16(%rsp)
  113. addsd %xmm2, %xmm0
  114. mulsd %xmm3, %xmm0
  115. addsd %xmm3, %xmm0
  116. ret
  117. ..B1.14:
  118. movsd -8(%rsp), %xmm3
  119. lea 56+_Q2(%rip), %rdx
  120. movaps %xmm3, %xmm5
  121. lea 64+_Q2(%rip), %r8
  122. mulsd %xmm3, %xmm5
  123. lea 40+_Q2(%rip), %rcx
  124. movsd (%rdx), %xmm6
  125. lea 48+_Q2(%rip), %r9
  126. movsd (%r8), %xmm4
  127. lea 24+_Q2(%rip), %rsi
  128. mulsd %xmm5, %xmm6
  129. lea 32+_Q2(%rip), %r10
  130. mulsd %xmm5, %xmm4
  131. addsd (%rcx), %xmm6
  132. addsd (%r9), %xmm4
  133. mulsd %xmm5, %xmm6
  134. mulsd %xmm5, %xmm4
  135. addsd (%rsi), %xmm6
  136. addsd (%r10), %xmm4
  137. mulsd %xmm5, %xmm6
  138. mulsd %xmm5, %xmm4
  139. xorl %eax, %eax
  140. movaps %xmm3, %xmm10
  141. movl %eax, -8(%rsp)
  142. lea 8+_Q2(%rip), %rdi
  143. movsd -8(%rsp), %xmm0
  144. lea 16+_Q2(%rip), %r11
  145. movaps %xmm0, %xmm1
  146. movaps %xmm0, %xmm7
  147. mulsd %xmm0, %xmm7
  148. subsd %xmm0, %xmm10
  149. addsd (%rdi), %xmm6
  150. addsd (%r11), %xmm4
  151. addsd %xmm10, %xmm1
  152. mulsd %xmm3, %xmm6
  153. mulsd %xmm5, %xmm4
  154. mulsd %xmm10, %xmm1
  155. addsd %xmm4, %xmm6
  156. movaps %xmm0, %xmm2
  157. mulsd %xmm10, %xmm2
  158. mulsd %xmm5, %xmm6
  159. addsd %xmm1, %xmm2
  160. addsd %xmm6, %xmm10
  161. movsd %xmm7, -16(%rsp)
  162. movl %eax, -16(%rsp)
  163. lea _Q2(%rip), %rax
  164. movsd -16(%rsp), %xmm8
  165. subsd %xmm8, %xmm7
  166. movsd (%rax), %xmm9
  167. addsd %xmm2, %xmm7
  168. mulsd %xmm9, %xmm7
  169. mulsd %xmm8, %xmm9
  170. addsd %xmm7, %xmm10
  171. addsd %xmm9, %xmm0
  172. movsd %xmm0, -8(%rsp)
  173. addsd %xmm10, %xmm0
  174. movsd %xmm10, -24(%rsp)
  175. ret
  176. ..B1.15:
  177. movsd -8(%rsp), %xmm12
  178. lea 96+_Q1(%rip), %rcx
  179. movaps %xmm12, %xmm1
  180. lea 88+_Q1(%rip), %r11
  181. mulsd %xmm12, %xmm1
  182. lea _TWO_32P(%rip), %rdx
  183. movsd (%rcx), %xmm2
  184. lea 80+_Q1(%rip), %rsi
  185. movsd (%r11), %xmm7
  186. lea 64+_Q1(%rip), %rdi
  187. mulsd %xmm1, %xmm2
  188. lea 56+_Q1(%rip), %rcx
  189. mulsd %xmm1, %xmm7
  190. addsd (%rsi), %xmm2
  191. movsd (%rdx), %xmm4
  192. lea 72+_Q1(%rip), %rdx
  193. mulsd %xmm1, %xmm2
  194. lea 48+_Q1(%rip), %r8
  195. addsd (%rdx), %xmm7
  196. addsd (%rdi), %xmm2
  197. mulsd %xmm1, %xmm7
  198. mulsd %xmm1, %xmm2
  199. addsd (%rcx), %xmm7
  200. addsd (%r8), %xmm2
  201. mulsd %xmm1, %xmm7
  202. mulsd %xmm1, %xmm2
  203. lea 40+_Q1(%rip), %rsi
  204. lea 32+_Q1(%rip), %r9
  205. lea 24+_Q1(%rip), %rdi
  206. lea _TWO_32(%rip), %rax
  207. movaps %xmm12, %xmm0
  208. movaps %xmm12, %xmm6
  209. mulsd %xmm4, %xmm0
  210. lea 16+_Q1(%rip), %r10
  211. addsd (%rsi), %xmm7
  212. mulsd (%rax), %xmm6
  213. addsd (%r9), %xmm2
  214. mulsd %xmm1, %xmm7
  215. mulsd %xmm1, %xmm2
  216. addsd (%rdi), %xmm7
  217. addsd (%r10), %xmm2
  218. mulsd %xmm1, %xmm7
  219. mulsd %xmm1, %xmm2
  220. lea 8+_Q1(%rip), %r8
  221. lea 112+_Q1(%rip), %r9
  222. movsd %xmm0, -40(%rsp)
  223. movaps %xmm12, %xmm3
  224. movsd -40(%rsp), %xmm5
  225. lea 104+_Q1(%rip), %r10
  226. movaps %xmm4, %xmm9
  227. subsd %xmm6, %xmm5
  228. addsd (%r8), %xmm7
  229. subsd %xmm5, %xmm3
  230. mulsd %xmm12, %xmm7
  231. mulsd %xmm3, %xmm12
  232. addsd %xmm7, %xmm2
  233. movsd (%r9), %xmm11
  234. movaps %xmm5, %xmm15
  235. movaps %xmm11, %xmm8
  236. mulsd %xmm5, %xmm11
  237. mulsd %xmm3, %xmm8
  238. mulsd %xmm3, %xmm15
  239. addsd (%r10), %xmm11
  240. addsd %xmm8, %xmm2
  241. addsd %xmm12, %xmm15
  242. movaps %xmm11, %xmm10
  243. movaps %xmm15, %xmm14
  244. movsd (%rax), %xmm13
  245. addsd %xmm2, %xmm10
  246. mulsd %xmm10, %xmm9
  247. mulsd %xmm13, %xmm10
  248. movsd %xmm9, -40(%rsp)
  249. movsd -40(%rsp), %xmm0
  250. movsd %xmm5, -8(%rsp)
  251. subsd %xmm10, %xmm0
  252. movaps %xmm5, %xmm10
  253. subsd %xmm0, %xmm11
  254. mulsd %xmm5, %xmm10
  255. addsd %xmm11, %xmm2
  256. addsd %xmm10, %xmm14
  257. mulsd %xmm1, %xmm2
  258. mulsd %xmm14, %xmm4
  259. mulsd %xmm13, %xmm14
  260. movsd %xmm4, -40(%rsp)
  261. movsd -40(%rsp), %xmm4
  262. movsd %xmm3, -24(%rsp)
  263. subsd %xmm14, %xmm4
  264. movsd %xmm1, -16(%rsp)
  265. subsd %xmm4, %xmm10
  266. addsd %xmm15, %xmm10
  267. mulsd %xmm0, %xmm10
  268. mulsd %xmm4, %xmm0
  269. addsd %xmm2, %xmm10
  270. addsd %xmm5, %xmm0
  271. addsd %xmm3, %xmm10
  272. movsd %xmm0, -32(%rsp)
  273. addsd %xmm10, %xmm0
  274. ret
  275. ..B1.16:
  276. cmpl $1130364928, %edx
  277. jae ..B1.18
  278. ..B1.17:
  279. lea _ones(%rip), %rcx
  280. pxor %xmm1, %xmm1
  281. movsd -8(%rsp), %xmm7
  282. xorl %esi, %esi
  283. movq __libm_rcp_table_256@GOTPCREL(%rip), %rdi
  284. lea 8+_ones(%rip), %r8
  285. movl %esi, -8(%rsp)
  286. lea _TWO_32(%rip), %r9
  287. movsd (%rcx), %xmm0
  288. movsd -8(%rsp), %xmm5
  289. addsd %xmm7, %xmm0
  290. subsd %xmm5, %xmm7
  291. movsd %xmm0, -32(%rsp)
  292. movl -28(%rsp), %eax
  293. movl %eax, %edx
  294. andl $2146435072, %edx
  295. movl %eax, %ecx
  296. negl %edx
  297. shrl $12, %eax
  298. addl $2145386496, %edx
  299. movl %edx, -28(%rsp)
  300. movzbl %al, %edx
  301. movl %esi, -32(%rsp)
  302. movsd (%r8), %xmm2
  303. movsd (%r9), %xmm3
  304. cvtss2sd (%rdi,%rdx,4), %xmm1
  305. mulsd -32(%rsp), %xmm1
  306. mulsd %xmm1, %xmm5
  307. addsd %xmm1, %xmm2
  308. mulsd %xmm1, %xmm7
  309. addsd %xmm2, %xmm5
  310. movaps %xmm5, %xmm4
  311. movsd %xmm5, -16(%rsp)
  312. addsd %xmm7, %xmm4
  313. movsd %xmm4, -8(%rsp)
  314. addsd %xmm3, %xmm4
  315. movsd %xmm4, -40(%rsp)
  316. movsd -40(%rsp), %xmm6
  317. shrl $20, %ecx
  318. subsd (%r9), %xmm6
  319. movsd %xmm7, -24(%rsp)
  320. addl $-1023, %ecx
  321. movsd %xmm2, -32(%rsp)
  322. subsd %xmm6, %xmm5
  323. addsd %xmm5, %xmm7
  324. jmp ..B1.19
  325. ..B1.18:
  326. movl %edx, %ecx
  327. pxor %xmm0, %xmm0
  328. shrl $12, %edx
  329. andl $1048575, %edi
  330. movzbl %dl, %edx
  331. orl $1072693248, %edi
  332. movq __libm_rcp_table_256@GOTPCREL(%rip), %rax
  333. lea 8+_ones(%rip), %r8
  334. movl %edi, -4(%rsp)
  335. shlq $32, %rdi
  336. movl -8(%rsp), %esi
  337. orq %rsi, %rdi
  338. cvtss2sd (%rax,%rdx,4), %xmm0
  339. movq %rdi, -24(%rsp)
  340. movl $0, -8(%rsp)
  341. movsd -8(%rsp), %xmm6
  342. movsd -24(%rsp), %xmm7
  343. shrl $20, %ecx
  344. subsd %xmm6, %xmm7
  345. mulsd %xmm0, %xmm6
  346. mulsd %xmm0, %xmm7
  347. addsd (%r8), %xmm6
  348. movsd %xmm0, -32(%rsp)
  349. addl $-1023, %ecx
  350. ..B1.19:
  351. movaps %xmm6, %xmm0
  352. lea 32+_P(%rip), %rsi
  353. lea 24+_P(%rip), %r9
  354. lea 16+_P(%rip), %rdi
  355. lea 8+_P(%rip), %r10
  356. lea _P(%rip), %r8
  357. lea 8+_LN2(%rip), %r11
  358. addsd %xmm7, %xmm0
  359. movaps %xmm0, %xmm1
  360. mulsd %xmm0, %xmm1
  361. movsd (%rsi), %xmm3
  362. mulsd %xmm1, %xmm3
  363. movsd (%r9), %xmm2
  364. mulsd %xmm1, %xmm2
  365. addsd (%rdi), %xmm3
  366. mulsd %xmm1, %xmm3
  367. addsd (%r10), %xmm2
  368. mulsd %xmm0, %xmm2
  369. addsd (%r8), %xmm3
  370. pxor %xmm0, %xmm0
  371. cvtsi2sd %ecx, %xmm0
  372. mulsd %xmm1, %xmm3
  373. mulsd %xmm1, %xmm2
  374. lea _LN2(%rip), %rcx
  375. addsd %xmm2, %xmm3
  376. movsd (%rcx), %xmm4
  377. addsd %xmm3, %xmm7
  378. mulsd %xmm0, %xmm4
  379. movsd (%r11), %xmm5
  380. mulsd %xmm5, %xmm0
  381. shlq $4, %rdx
  382. movq __libm_log_table_256@GOTPCREL(%rip), %rax
  383. movsd %xmm1, -16(%rsp)
  384. addsd (%rax,%rdx), %xmm4
  385. addsd 8(%rax,%rdx), %xmm0
  386. addsd %xmm4, %xmm7
  387. addsd %xmm6, %xmm0
  388. movsd %xmm0, -8(%rsp)
  389. addsd %xmm7, %xmm0
  390. movsd %xmm7, -24(%rsp)
  391. ret
  392. ..B1.20:
  393. lea _zeros(%rip), %rax
  394. addl $-1072693248, %edx
  395. orl -8(%rsp), %edx
  396. movsd (%rax), %xmm1
  397. jne ..B1.22
  398. ..B1.21:
  399. lea 8+_ones(%rip), %rax
  400. movsd (%rax), %xmm0
  401. divsd %xmm1, %xmm0
  402. ret
  403. ..B1.22:
  404. lea _infs(%rip), %rax
  405. movsd (%rax), %xmm0
  406. mulsd %xmm1, %xmm0
  407. ..B1.23:
  408. ret
  409. ..B1.24:
  410. addl $1048576, %edi
  411. orl -8(%rsp), %edi
  412. je ..B1.27
  413. ..B1.25:
  414. lea _ones(%rip), %rax
  415. movsd -8(%rsp), %xmm0
  416. mulsd (%rax), %xmm0
  417. ..B1.26:
  418. ret
  419. ..B1.27:
  420. lea _infs(%rip), %rax
  421. lea _zeros(%rip), %rdx
  422. movsd (%rax), %xmm0
  423. mulsd (%rdx), %xmm0
  424. ret
  425. .align 16,0x90
  426. .cfi_endproc
  427. .type log1p,@function
  428. .size log1p,.-log1p
  429. .data
  430. # -- End log1p
  431. .section .rodata, "a"
  432. .align 4
  433. .align 4
  434. _small_value_64:
  435. .long 0
  436. .long 24117248
  437. .long 0
  438. .long 2171600896
  439. .type _small_value_64,@object
  440. .size _small_value_64,16
  441. .align 4
  442. _Q3:
  443. .long 0
  444. .long 3219128320
  445. .long 1431655765
  446. .long 1070945621
  447. .long 55924054
  448. .long 3218079744
  449. .long 2711205044
  450. .long 1070176665
  451. .type _Q3,@object
  452. .size _Q3,32
  453. .align 4
  454. _Q2:
  455. .long 0
  456. .long 3219128320
  457. .long 1431655765
  458. .long 1070945621
  459. .long 4294967292
  460. .long 3218079743
  461. .long 2577017633
  462. .long 1070176665
  463. .long 1431726806
  464. .long 3217380693
  465. .long 2027016470
  466. .long 1069697316
  467. .long 3108735575
  468. .long 3217031167
  469. .long 1449976333
  470. .long 1069315434
  471. .long 60833854
  472. .long 3216612762
  473. .type _Q2,@object
  474. .size _Q2,72
  475. .align 4
  476. _Q1:
  477. .long 3234281536
  478. .long 3150348867
  479. .long 1430600241
  480. .long 1047876949
  481. .long 4294967294
  482. .long 3218079743
  483. .long 2576981961
  484. .long 1070176665
  485. .long 1431660017
  486. .long 3217380693
  487. .long 2452398325
  488. .long 1069697316
  489. .long 4287730462
  490. .long 3217031167
  491. .long 2576343554
  492. .long 1069314503
  493. .long 1474253516
  494. .long 3216611738
  495. .long 2720006255
  496. .long 1068975428
  497. .long 1874652295
  498. .long 3216331940
  499. .long 2893691300
  500. .long 1068761010
  501. .long 1325424864
  502. .long 3216153938
  503. .long 0
  504. .long 3219128320
  505. .long 0
  506. .long 1070945621
  507. .type _Q1,@object
  508. .size _Q1,120
  509. .align 4
  510. _TWO_32P:
  511. .long 1048576
  512. .long 1106247680
  513. .type _TWO_32P,@object
  514. .size _TWO_32P,8
  515. .align 4
  516. _TWO_32:
  517. .long 0
  518. .long 1106247680
  519. .type _TWO_32,@object
  520. .size _TWO_32,8
  521. .align 4
  522. _ones:
  523. .long 0
  524. .long 1072693248
  525. .long 0
  526. .long 3220176896
  527. .type _ones,@object
  528. .size _ones,16
  529. .align 4
  530. _P:
  531. .long 0
  532. .long 3219128320
  533. .long 1431621855
  534. .long 1070945621
  535. .long 4294842013
  536. .long 3218079743
  537. .long 1289448124
  538. .long 1070176674
  539. .long 2077359316
  540. .long 3217380703
  541. .type _P,@object
  542. .size _P,40
  543. .align 4
  544. _LN2:
  545. .long 897137782
  546. .long 1038760431
  547. .long 4276092928
  548. .long 1072049730
  549. .type _LN2,@object
  550. .size _LN2,16
  551. .align 4
  552. _zeros:
  553. .long 0
  554. .long 0
  555. .long 0
  556. .long 2147483648
  557. .type _zeros,@object
  558. .size _zeros,16
  559. .align 4
  560. _infs:
  561. .long 0
  562. .long 2146435072
  563. .long 0
  564. .long 4293918720
  565. .type _infs,@object
  566. .size _infs,16
  567. .data
  568. .section .note.GNU-stack, ""
  569. // -- Begin DWARF2 SEGMENT .eh_frame
  570. .section .eh_frame,"a",@progbits
  571. .eh_frame_seg:
  572. .align 1
  573. # End