libm_hypot2l_k80.S 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "libm_hypot2l_k80.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin __libm_hypot2l_k80
  41. .text
  42. .align 16,0x90
  43. .globl __libm_hypot2l_k80
  44. __libm_hypot2l_k80:
  45. # parameter 1: %rdi
  46. # parameter 2: 8 + %rsp
  47. # parameter 3: 24 + %rsp
  48. ..B1.1:
  49. .cfi_startproc
  50. ..___tag_value___libm_hypot2l_k80.1:
  51. ..L2:
  52. movzwl 16(%rsp), %eax
  53. testl $32767, %eax
  54. jne ..B1.6
  55. ..B1.2:
  56. cmpq $0, 8(%rsp)
  57. jne ..B1.6
  58. ..B1.3:
  59. movzwl 32(%rsp), %eax
  60. testl $32767, %eax
  61. jne ..B1.6
  62. ..B1.4:
  63. cmpq $0, 24(%rsp)
  64. jne ..B1.6
  65. ..B1.5:
  66. fldt 8(%rsp)
  67. fstpt (%rdi)
  68. fldt 24(%rsp)
  69. fstpt 16(%rdi)
  70. fldt .L_2il0floatpacket.0(%rip)
  71. ret
  72. ..B1.6:
  73. fldt 8(%rsp)
  74. fabs
  75. fstpt -56(%rsp)
  76. fldt -56(%rsp)
  77. fldt 24(%rsp)
  78. fabs
  79. fstpt -24(%rsp)
  80. fldt -24(%rsp)
  81. movzwl -48(%rsp), %ecx
  82. movzwl -16(%rsp), %esi
  83. andl $32767, %ecx
  84. andl $32767, %esi
  85. cmpl %esi, %ecx
  86. jg ..B1.11
  87. ..B1.7:
  88. jne ..B1.12
  89. ..B1.8:
  90. movl -52(%rsp), %eax
  91. movl -20(%rsp), %edx
  92. cmpl %edx, %eax
  93. ja ..B1.57
  94. ..B1.9:
  95. jne ..B1.56
  96. ..B1.10:
  97. movl -56(%rsp), %eax
  98. cmpl -24(%rsp), %eax
  99. jbe ..B1.56
  100. ..B1.57:
  101. cmpl %esi, %ecx
  102. ..B1.11:
  103. fxch %st(1)
  104. fstpt -40(%rsp)
  105. fldt -40(%rsp)
  106. fld %st(0)
  107. fxch %st(2)
  108. fxch %st(1)
  109. fxch %st(2)
  110. jl ..B1.18
  111. jmp ..B1.14
  112. ..B1.12:
  113. fstpt -40(%rsp)
  114. fldt -40(%rsp)
  115. fld %st(0)
  116. ..B1.13:
  117. jl ..B1.18
  118. ..B1.14:
  119. jne ..B1.19
  120. ..B1.15:
  121. movl -52(%rsp), %eax
  122. movl -20(%rsp), %edx
  123. cmpl %edx, %eax
  124. jb ..B1.18
  125. ..B1.16:
  126. jne ..B1.19
  127. ..B1.17:
  128. movl -56(%rsp), %eax
  129. cmpl -24(%rsp), %eax
  130. jae ..B1.19
  131. ..B1.18:
  132. fstp %st(1)
  133. fxch %st(1)
  134. fstpt -24(%rsp)
  135. fldt -24(%rsp)
  136. jmp ..B1.20
  137. ..B1.19:
  138. fstp %st(2)
  139. fstpt -24(%rsp)
  140. fldt -24(%rsp)
  141. ..B1.20:
  142. movswl -16(%rsp), %ecx
  143. testl %ecx, %ecx
  144. je ..B1.22
  145. ..B1.21:
  146. addl $-16383, %ecx
  147. jmp ..B1.26
  148. ..B1.22:
  149. movzwl -16(%rsp), %eax
  150. testl $32767, %eax
  151. jne ..B1.25
  152. ..B1.23:
  153. cmpq $0, -24(%rsp)
  154. jne ..B1.25
  155. ..B1.24:
  156. movl $-16526, %ecx
  157. jmp ..B1.26
  158. ..B1.25:
  159. lea 96+_CONSTANTS(%rip), %rax
  160. lea 112+_CONSTANTS(%rip), %rdx
  161. fldt (%rax)
  162. fmul %st(1), %st
  163. fldt (%rdx)
  164. fmulp %st, %st(1)
  165. fstpt -56(%rsp)
  166. movswl -48(%rsp), %ecx
  167. addl $-32828, %ecx
  168. ..B1.26:
  169. movswl -32(%rsp), %eax
  170. testl %eax, %eax
  171. je ..B1.28
  172. ..B1.27:
  173. addl $-16383, %eax
  174. jmp ..B1.29
  175. ..B1.28:
  176. lea 96+_CONSTANTS(%rip), %rax
  177. lea 112+_CONSTANTS(%rip), %rdx
  178. fldt (%rax)
  179. fmul %st(2), %st
  180. fldt (%rdx)
  181. fmulp %st, %st(1)
  182. fstpt -56(%rsp)
  183. movswl -48(%rsp), %eax
  184. addl $-32828, %eax
  185. ..B1.29:
  186. fldt .L_2il0floatpacket.0(%rip)
  187. addl $-67, %eax
  188. cmpl %eax, %ecx
  189. movzwl -32(%rsp), %eax
  190. jg ..L3
  191. fst %st(1)
  192. ..L3:
  193. andl $32767, %eax
  194. cmpl $24709, %eax
  195. jg ..B1.34
  196. ..B1.30:
  197. jne ..B1.35
  198. ..B1.31:
  199. lea _CONSTANTS(%rip), %rcx
  200. movl -36(%rsp), %edx
  201. cmpl 4(%rcx), %edx
  202. ja ..B1.34
  203. ..B1.32:
  204. jne ..B1.35
  205. ..B1.33:
  206. movl -40(%rsp), %edx
  207. cmpl (%rcx), %edx
  208. jb ..B1.35
  209. ..B1.34:
  210. fstp %st(0)
  211. lea 80+_CONSTANTS(%rip), %rax
  212. fldt (%rax)
  213. fmul %st, %st(2)
  214. fmul %st, %st(2)
  215. fxch %st(2)
  216. fstpt -40(%rsp)
  217. fldt -40(%rsp)
  218. fxch %st(1)
  219. fmul %st(2), %st
  220. fmulp %st, %st(2)
  221. fxch %st(1)
  222. fstpt -24(%rsp)
  223. fldt -24(%rsp)
  224. fldt .L_2il0floatpacket.1(%rip)
  225. jmp ..B1.53
  226. ..B1.35:
  227. cmpl $24573, %eax
  228. jg ..B1.40
  229. ..B1.36:
  230. jne ..B1.41
  231. ..B1.37:
  232. lea 16+_CONSTANTS(%rip), %rcx
  233. movl -36(%rsp), %edx
  234. cmpl 4(%rcx), %edx
  235. ja ..B1.40
  236. ..B1.38:
  237. jne ..B1.41
  238. ..B1.39:
  239. movl -40(%rsp), %edx
  240. cmpl (%rcx), %edx
  241. jb ..B1.41
  242. ..B1.40:
  243. fstp %st(0)
  244. lea 80+_CONSTANTS(%rip), %rax
  245. fldt (%rax)
  246. fmul %st, %st(2)
  247. fxch %st(2)
  248. fstpt -40(%rsp)
  249. fldt -40(%rsp)
  250. fxch %st(2)
  251. fmulp %st, %st(1)
  252. fstpt -24(%rsp)
  253. fldt -24(%rsp)
  254. fldt .L_2il0floatpacket.4(%rip)
  255. jmp ..B1.53
  256. ..B1.41:
  257. cmpl $8127, %eax
  258. jl ..B1.46
  259. ..B1.42:
  260. jne ..B1.47
  261. ..B1.43:
  262. lea 32+_CONSTANTS(%rip), %rcx
  263. movl -36(%rsp), %edx
  264. cmpl 4(%rcx), %edx
  265. jb ..B1.46
  266. ..B1.44:
  267. jne ..B1.47
  268. ..B1.45:
  269. movl -40(%rsp), %edx
  270. cmpl (%rcx), %edx
  271. ja ..B1.47
  272. ..B1.46:
  273. fstp %st(0)
  274. lea 64+_CONSTANTS(%rip), %rax
  275. fldt (%rax)
  276. fmul %st, %st(2)
  277. fmul %st, %st(2)
  278. fxch %st(2)
  279. fstpt -40(%rsp)
  280. fldt -40(%rsp)
  281. fxch %st(1)
  282. fmul %st(2), %st
  283. fmulp %st, %st(2)
  284. fxch %st(1)
  285. fstpt -24(%rsp)
  286. fldt -24(%rsp)
  287. fldt .L_2il0floatpacket.5(%rip)
  288. jmp ..B1.53
  289. ..B1.47:
  290. cmpl $8323, %eax
  291. jl ..B1.52
  292. ..B1.48:
  293. jne ..B1.53
  294. ..B1.49:
  295. lea 48+_CONSTANTS(%rip), %rdx
  296. movl -36(%rsp), %eax
  297. cmpl 4(%rdx), %eax
  298. jb ..B1.52
  299. ..B1.50:
  300. jne ..B1.53
  301. ..B1.51:
  302. movl -40(%rsp), %eax
  303. cmpl (%rdx), %eax
  304. ja ..B1.53
  305. ..B1.52:
  306. fstp %st(0)
  307. lea 64+_CONSTANTS(%rip), %rax
  308. fldt (%rax)
  309. fmul %st, %st(2)
  310. fxch %st(2)
  311. fstpt -40(%rsp)
  312. fldt -40(%rsp)
  313. fxch %st(2)
  314. fmulp %st, %st(1)
  315. fstpt -24(%rsp)
  316. fldt -24(%rsp)
  317. fldt .L_2il0floatpacket.6(%rip)
  318. ..B1.53:
  319. fldt .L_2il0floatpacket.2(%rip)
  320. fmul %st(3), %st
  321. fld %st(3)
  322. fsubr %st(1), %st
  323. fsubrp %st, %st(1)
  324. fld %st(0)
  325. fmul %st(1), %st
  326. fld %st(3)
  327. fxch %st(2)
  328. fsubr %st, %st(5)
  329. fldt .L_2il0floatpacket.2(%rip)
  330. fmul %st(5), %st
  331. fsub %st, %st(3)
  332. fsubp %st, %st(3)
  333. fld %st(2)
  334. fmul %st(3), %st
  335. fxch %st(3)
  336. fsubr %st, %st(5)
  337. fldt .L_2il0floatpacket.3(%rip)
  338. fmul %st, %st(2)
  339. fxch %st(2)
  340. fmul %st(7), %st
  341. fxch %st(2)
  342. fmulp %st, %st(1)
  343. fmul %st(5), %st
  344. fld %st(2)
  345. fadd %st(2), %st
  346. fsub %st, %st(3)
  347. fxch %st(3)
  348. fsubrp %st, %st(2)
  349. fxch %st(6)
  350. fmul %st(0), %st
  351. faddp %st, %st(1)
  352. fld %st(1)
  353. fadd %st(3), %st
  354. fld %st(0)
  355. fadd %st(7), %st
  356. fstpt (%rdi)
  357. fldt (%rdi)
  358. fxch %st(1)
  359. fsub %st, %st(3)
  360. fxch %st(3)
  361. fsubrp %st, %st(4)
  362. fxch %st(1)
  363. faddp %st, %st(3)
  364. fxch %st(4)
  365. fmul %st(0), %st
  366. fxch %st(4)
  367. fsubp %st, %st(1)
  368. fsubrp %st, %st(4)
  369. faddp %st, %st(3)
  370. fxch %st(1)
  371. faddp %st, %st(2)
  372. fxch %st(1)
  373. fstpt 16(%rdi)
  374. ret
  375. ..B1.56:
  376. cmpl %esi, %ecx
  377. jmp ..B1.12
  378. .align 16,0x90
  379. .cfi_endproc
  380. .type __libm_hypot2l_k80,@function
  381. .size __libm_hypot2l_k80,.-__libm_hypot2l_k80
  382. .data
  383. # -- End __libm_hypot2l_k80
  384. .section .rodata, "a"
  385. .align 16
  386. .align 16
  387. .L_2il0floatpacket.0:
  388. .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
  389. .type .L_2il0floatpacket.0,@object
  390. .size .L_2il0floatpacket.0,16
  391. .align 16
  392. .L_2il0floatpacket.1:
  393. .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x80,0x0e,0x40,0x00,0x00,0x00,0x00,0x00,0x00
  394. .type .L_2il0floatpacket.1,@object
  395. .size .L_2il0floatpacket.1,16
  396. .align 16
  397. .L_2il0floatpacket.2:
  398. .byte 0x00,0x00,0x00,0x40,0x00,0x00,0x00,0x80,0x20,0x40,0x00,0x00,0x00,0x00,0x00,0x00
  399. .type .L_2il0floatpacket.2,@object
  400. .size .L_2il0floatpacket.2,16
  401. .align 16
  402. .L_2il0floatpacket.3:
  403. .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x00,0x40,0x00,0x00,0x00,0x00,0x00,0x00
  404. .type .L_2il0floatpacket.3,@object
  405. .size .L_2il0floatpacket.3,16
  406. .align 16
  407. .L_2il0floatpacket.4:
  408. .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x80,0x0d,0x40,0x00,0x00,0x00,0x00,0x00,0x00
  409. .type .L_2il0floatpacket.4,@object
  410. .size .L_2il0floatpacket.4,16
  411. .align 16
  412. .L_2il0floatpacket.5:
  413. .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x80,0x0e,0xc0,0x00,0x00,0x00,0x00,0x00,0x00
  414. .type .L_2il0floatpacket.5,@object
  415. .size .L_2il0floatpacket.5,16
  416. .align 16
  417. .L_2il0floatpacket.6:
  418. .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x04,0x80,0x0d,0xc0,0x00,0x00,0x00,0x00,0x00,0x00
  419. .type .L_2il0floatpacket.6,@object
  420. .size .L_2il0floatpacket.6,16
  421. .align 2
  422. _CONSTANTS:
  423. .word 0
  424. .word 0
  425. .word 0
  426. .word 32768
  427. .word 24709
  428. .word 0
  429. .word 0
  430. .word 0
  431. .word 65535
  432. .word 65535
  433. .word 65535
  434. .word 65535
  435. .word 24573
  436. .word 0
  437. .word 0
  438. .word 0
  439. .word 65535
  440. .word 65535
  441. .word 65535
  442. .word 65535
  443. .word 8127
  444. .word 0
  445. .word 0
  446. .word 0
  447. .word 0
  448. .word 0
  449. .word 0
  450. .word 32768
  451. .word 8323
  452. .word 0
  453. .word 0
  454. .word 0
  455. .word 0
  456. .word 0
  457. .word 0
  458. .word 32768
  459. .word 24576
  460. .word 0
  461. .word 0
  462. .word 0
  463. .word 0
  464. .word 0
  465. .word 0
  466. .word 32768
  467. .word 8190
  468. .word 0
  469. .word 0
  470. .word 0
  471. .word 0
  472. .word 0
  473. .word 0
  474. .word 32768
  475. .word 32766
  476. .word 0
  477. .word 0
  478. .word 0
  479. .word 0
  480. .word 0
  481. .word 0
  482. .word 32768
  483. .word 16445
  484. .word 0
  485. .word 0
  486. .word 0
  487. .type _CONSTANTS,@object
  488. .size _CONSTANTS,128
  489. .data
  490. .section .note.GNU-stack, ""
  491. // -- Begin DWARF2 SEGMENT .eh_frame
  492. .section .eh_frame,"a",@progbits
  493. .eh_frame_seg:
  494. .align 1
  495. # End