cbrtl.S 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "cbrtl.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin cbrtl
  41. .text
  42. .align 16,0x90
  43. .globl cbrtl
  44. cbrtl:
  45. # parameter 1: 48 + %rsp
  46. ..B1.1:
  47. .cfi_startproc
  48. ..___tag_value_cbrtl.1:
  49. ..L2:
  50. subq $40, %rsp
  51. .cfi_def_cfa_offset 48
  52. xorb %r8b, %r8b
  53. ..B1.2:
  54. fnstcw 34(%rsp)
  55. ..B1.3:
  56. movzwl 56(%rsp), %ecx
  57. movl %ecx, %edi
  58. andl $32767, %edi
  59. movzwl 34(%rsp), %r9d
  60. cmpl $32767, %edi
  61. je ..B1.20
  62. ..B1.4:
  63. movb 57(%rsp), %al
  64. movl %r9d, %edx
  65. andb $-128, %al
  66. andl $768, %edx
  67. shrb $7, %al
  68. xorl %esi, %esi
  69. movzbl %al, %eax
  70. cmpl $768, %edx
  71. je ..B1.8
  72. ..B1.5:
  73. orl $-64768, %r9d
  74. movw %r9w, 32(%rsp)
  75. ..B1.6:
  76. fldcw 32(%rsp)
  77. ..B1.7:
  78. movzwl 56(%rsp), %ecx
  79. movl %ecx, %edi
  80. movb $1, %r8b
  81. andl $32767, %edi
  82. ..B1.8:
  83. testl %edi, %edi
  84. jne ..B1.14
  85. ..B1.9:
  86. cmpq $0, 48(%rsp)
  87. jne ..B1.13
  88. ..B1.10:
  89. lea _zeros(%rip), %rdx
  90. testb %r8b, %r8b
  91. movsd (%rdx,%rax,8), %xmm0
  92. je ..B1.12
  93. ..B1.11:
  94. fldcw 34(%rsp)
  95. ..B1.12:
  96. movsd %xmm0, (%rsp)
  97. fldl (%rsp)
  98. addq $40, %rsp
  99. .cfi_def_cfa_offset 8
  100. ret
  101. .cfi_def_cfa_offset 48
  102. ..B1.13:
  103. fldt 48(%rsp)
  104. movl $-25, %esi
  105. lea _TWO_75(%rip), %rdx
  106. fmull (%rdx)
  107. fstpt 48(%rsp)
  108. movzwl 56(%rsp), %ecx
  109. movl %ecx, %edi
  110. andl $32767, %edi
  111. ..B1.14:
  112. fldt .L_2il0floatpacket.0(%rip)
  113. lea ones(%rip), %r9
  114. andl $-32768, %ecx
  115. orl $-49153, %ecx
  116. movw %cx, 56(%rsp)
  117. fldl (%r9,%rax,8)
  118. lea 96+_P(%rip), %r9
  119. movl 52(%rsp), %ecx
  120. shrl $23, %ecx
  121. movzbl %cl, %eax
  122. lea _TWO_32H(%rip), %rcx
  123. movq __libm_rcp_table_256@GOTPCREL(%rip), %r11
  124. imull $21845, %edi, %edx
  125. fstpt 16(%rsp)
  126. fldt 16(%rsp)
  127. fldt 48(%rsp)
  128. fmulp %st, %st(1)
  129. flds (%r11,%rax,4)
  130. addl $21845, %edx
  131. fld %st(0)
  132. lea 32+_P(%rip), %r11
  133. fldl (%rcx)
  134. lea 80+_P(%rip), %rcx
  135. fld %st(0)
  136. shrl $16, %edx
  137. fadd %st(4), %st
  138. shlq $4, %rax
  139. lea (%rdx,%rdx), %r10d
  140. subl %r10d, %edi
  141. lea 64+_P(%rip), %r10
  142. fsubp %st, %st(1)
  143. fmul %st, %st(1)
  144. lea 10922(%rsi,%rdx), %esi
  145. fxch %st(1)
  146. fsub %st(4), %st
  147. fxch %st(1)
  148. fsubrp %st, %st(3)
  149. fxch %st(1)
  150. fmulp %st, %st(2)
  151. faddp %st, %st(1)
  152. fld %st(0)
  153. fmul %st(1), %st
  154. fxch %st(1)
  155. fstpt 48(%rsp)
  156. fldt 48(%rsp)
  157. fldt (%r9)
  158. lea 48+_P(%rip), %r9
  159. fmul %st(2), %st
  160. fldt (%r10)
  161. lea 16+_P(%rip), %r10
  162. faddp %st, %st(1)
  163. fmul %st(2), %st
  164. fldt (%r11)
  165. lea _P(%rip), %r11
  166. faddp %st, %st(1)
  167. fmul %st(2), %st
  168. fmul %st(1), %st
  169. fldt (%rcx)
  170. fmul %st(3), %st
  171. fldt (%r9)
  172. movzwl 24(%rsp), %ecx
  173. faddp %st, %st(1)
  174. fmul %st(3), %st
  175. andl $-32768, %ecx
  176. fldt (%r10)
  177. orl %esi, %ecx
  178. movw %cx, 24(%rsp)
  179. subl %edx, %edi
  180. movq __libm_cbrtl_table_256@GOTPCREL(%rip), %rdx
  181. faddp %st, %st(1)
  182. fmulp %st, %st(3)
  183. faddp %st, %st(2)
  184. fldt (%r11)
  185. fmulp %st, %st(1)
  186. faddp %st, %st(1)
  187. fadd %st, %st(1)
  188. fxch %st(1)
  189. fstpt (%rsp)
  190. fldl 8(%rdx,%rax)
  191. fldl (%rdx,%rax)
  192. fldt 16(%rsp)
  193. jne ..B1.16
  194. ..B1.15:
  195. fldt (%rsp)
  196. fmulp %st, %st(3)
  197. fxch %st(1)
  198. fmul %st, %st(3)
  199. fxch %st(3)
  200. faddp %st, %st(2)
  201. fxch %st(2)
  202. faddp %st, %st(1)
  203. fmulp %st, %st(1)
  204. fstpt (%rsp)
  205. jmp ..B1.17
  206. ..B1.16:
  207. fldt (%rsp)
  208. lea SH(%rip), %rax
  209. movslq %edi, %rdi
  210. fxch %st(3)
  211. fmull -8(%rax,%rdi,8)
  212. fldl 24(%rax,%rdi,8)
  213. fmul %st(3), %st
  214. faddp %st, %st(1)
  215. fmulp %st, %st(3)
  216. fxch %st(1)
  217. fmull 8(%rax,%rdi,8)
  218. fmul %st, %st(3)
  219. fxch %st(3)
  220. faddp %st, %st(2)
  221. fxch %st(2)
  222. faddp %st, %st(1)
  223. fmulp %st, %st(1)
  224. fstpt (%rsp)
  225. ..B1.17:
  226. testb %r8b, %r8b
  227. je ..B1.19
  228. ..B1.18:
  229. fldcw 34(%rsp)
  230. ..B1.19:
  231. fldt (%rsp)
  232. addq $40, %rsp
  233. .cfi_def_cfa_offset 8
  234. ret
  235. .cfi_def_cfa_offset 48
  236. ..B1.20:
  237. movl %r9d, %eax
  238. andl $768, %eax
  239. cmpl $768, %eax
  240. je ..B1.26
  241. ..B1.21:
  242. orl $-64768, %r9d
  243. movw %r9w, 32(%rsp)
  244. ..B1.22:
  245. fldcw 32(%rsp)
  246. ..B1.23:
  247. fldt 48(%rsp)
  248. fstpt (%rsp)
  249. ..B1.24:
  250. fldcw 34(%rsp)
  251. ..B1.25:
  252. fldt (%rsp)
  253. addq $40, %rsp
  254. .cfi_def_cfa_offset 8
  255. ret
  256. .cfi_def_cfa_offset 48
  257. ..B1.26:
  258. fldt 48(%rsp)
  259. fstpt (%rsp)
  260. jmp ..B1.25
  261. .align 16,0x90
  262. .cfi_endproc
  263. .type cbrtl,@function
  264. .size cbrtl,.-cbrtl
  265. .data
  266. # -- End cbrtl
  267. .section .rodata, "a"
  268. .align 16
  269. .align 16
  270. .L_2il0floatpacket.0:
  271. .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0xff,0x3f,0x00,0x00,0x00,0x00,0x00,0x00
  272. .type .L_2il0floatpacket.0,@object
  273. .size .L_2il0floatpacket.0,16
  274. .align 8
  275. ones:
  276. .long 0x00000000,0x3ff00000
  277. .long 0x00000000,0xbff00000
  278. .type ones,@object
  279. .size ones,16
  280. .align 8
  281. SH:
  282. .long 0xf98d728b,0x3ff428a2
  283. .long 0xa53d6e3d,0x3ff965fe
  284. .long 0xf8000000,0x3ff428a2
  285. .long 0xa4000000,0x3ff965fe
  286. .long 0xae223ddb,0x3e38d728
  287. .long 0xc82b059a,0x3e33d6e3
  288. .type SH,@object
  289. .size SH,48
  290. .align 4
  291. _zeros:
  292. .long 0
  293. .long 0
  294. .long 0
  295. .long 2147483648
  296. .type _zeros,@object
  297. .size _zeros,16
  298. .align 4
  299. _TWO_75:
  300. .long 0
  301. .long 1151336448
  302. .long 0
  303. .long 994050048
  304. .type _TWO_75,@object
  305. .size _TWO_75,16
  306. .align 4
  307. _TWO_32H:
  308. .long 0
  309. .long 1106771968
  310. .type _TWO_32H,@object
  311. .size _TWO_32H,8
  312. .align 2
  313. _P:
  314. .word 43691
  315. .word 43690
  316. .word 43690
  317. .word 43690
  318. .word 16381
  319. .word 0
  320. .word 0
  321. .word 0
  322. .word 58320
  323. .word 36408
  324. .word 14563
  325. .word 58254
  326. .word 49147
  327. .word 0
  328. .word 0
  329. .word 0
  330. .word 50474
  331. .word 25890
  332. .word 59872
  333. .word 64726
  334. .word 16378
  335. .word 0
  336. .word 0
  337. .word 0
  338. .word 45217
  339. .word 37017
  340. .word 18069
  341. .word 43151
  342. .word 49146
  343. .word 0
  344. .word 0
  345. .word 0
  346. .word 45059
  347. .word 7616
  348. .word 35240
  349. .word 63288
  350. .word 16377
  351. .word 0
  352. .word 0
  353. .word 0
  354. .word 54522
  355. .word 17175
  356. .word 48475
  357. .word 49224
  358. .word 49145
  359. .word 0
  360. .word 0
  361. .word 0
  362. .word 568
  363. .word 55426
  364. .word 44619
  365. .word 39848
  366. .word 16377
  367. .word 0
  368. .word 0
  369. .word 0
  370. .type _P,@object
  371. .size _P,112
  372. .data
  373. .section .note.GNU-stack, ""
  374. // -- Begin DWARF2 SEGMENT .eh_frame
  375. .section .eh_frame,"a",@progbits
  376. .eh_frame_seg:
  377. .align 1
  378. # End