csqrt_wmt.S 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "csqrt_wmt.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin static_func
  41. .text
  42. .align 16,0x90
  43. static_func:
  44. ..B1.1:
  45. ..L1:
  46. call ..L2
  47. ..L2:
  48. popl %eax
  49. lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
  50. lea static_const_table@GOTOFF(%eax), %eax
  51. ret
  52. .align 16,0x90
  53. .type static_func,@function
  54. .size static_func,.-static_func
  55. .data
  56. # -- End static_func
  57. .text
  58. # -- Begin csqrt
  59. .text
  60. .align 16,0x90
  61. .globl csqrt
  62. csqrt:
  63. # parameter 1: 12 + %ebp
  64. ..B2.1:
  65. ..L3:
  66. ..B2.2:
  67. pushl %ebp
  68. movl %esp, %ebp
  69. subl $184, %esp
  70. movl %esi, 128(%esp)
  71. call static_func
  72. movl %eax, %esi
  73. movl 192(%esp), %edx
  74. movsd 196(%esp), %xmm0
  75. movsd 204(%esp), %xmm1
  76. movl %edx, (%esp)
  77. movsd %xmm0, 16(%esp)
  78. movsd %xmm1, 24(%esp)
  79. lea 16(%esp), %ecx
  80. movq (%esi), %xmm2
  81. movl %edx, (%esp)
  82. movl %ebx, 40(%esp)
  83. movl %ecx, 44(%esp)
  84. movq (%esi), %xmm3
  85. movq 32(%esi), %xmm7
  86. pand %xmm0, %xmm2
  87. pand %xmm1, %xmm3
  88. pand 16(%esi), %xmm0
  89. pand 16(%esi), %xmm1
  90. movq %xmm2, %xmm4
  91. movq %xmm3, %xmm5
  92. movl 4(%ecx), %eax
  93. movl 12(%ecx), %ebx
  94. andl $2147483647, %eax
  95. andl $2147483647, %ebx
  96. subl $591396864, %eax
  97. movq %xmm0, 64(%esp)
  98. subl $591396864, %ebx
  99. cmpl $1017118720, %eax
  100. movq %xmm1, 72(%esp)
  101. jae .L_2TAG_PACKET_0.0.2
  102. cmpl $1017118720, %ebx
  103. jae .L_2TAG_PACKET_0.0.2
  104. addl $591396864, %eax
  105. addl $591396864, %ebx
  106. movl %eax, %edx
  107. subl %ebx, %edx
  108. cmpl $60817408, %edx
  109. jg .L_2TAG_PACKET_1.0.2
  110. cmpl $-60817408, %edx
  111. movl (%esp), %edx
  112. jl .L_2TAG_PACKET_2.0.2
  113. .L_2TAG_PACKET_3.0.2:
  114. subsd %xmm2, %xmm0
  115. subsd %xmm3, %xmm1
  116. mulsd %xmm2, %xmm2
  117. mulsd %xmm3, %xmm3
  118. mulsd %xmm0, %xmm4
  119. mulsd %xmm1, %xmm5
  120. movq %xmm2, %xmm6
  121. mulsd %xmm0, %xmm0
  122. mulsd %xmm1, %xmm1
  123. movq %xmm7, 80(%esp)
  124. movq %xmm2, %xmm7
  125. addsd %xmm3, %xmm2
  126. addsd %xmm4, %xmm0
  127. subsd %xmm2, %xmm7
  128. addsd %xmm5, %xmm1
  129. movq %xmm2, 56(%esp)
  130. addsd %xmm7, %xmm2
  131. addsd %xmm7, %xmm3
  132. movq 56(%esp), %xmm7
  133. addsd %xmm4, %xmm0
  134. subsd %xmm2, %xmm6
  135. addsd %xmm5, %xmm1
  136. movq 56(%esp), %xmm5
  137. movq %xmm0, %xmm2
  138. movq %xmm0, %xmm4
  139. addsd %xmm3, %xmm6
  140. addsd %xmm1, %xmm0
  141. subsd %xmm0, %xmm2
  142. movq %xmm0, %xmm3
  143. addsd %xmm2, %xmm0
  144. addsd %xmm2, %xmm1
  145. pshufd $221, %xmm7, %xmm2
  146. subsd %xmm0, %xmm4
  147. movq 48(%esi), %xmm0
  148. addsd %xmm4, %xmm1
  149. pand %xmm7, %xmm0
  150. addsd %xmm3, %xmm7
  151. addsd %xmm6, %xmm1
  152. subsd %xmm7, %xmm5
  153. subsd %xmm0, %xmm7
  154. movq %xmm0, %xmm4
  155. addsd %xmm5, %xmm3
  156. movq %xmm0, %xmm5
  157. addsd %xmm7, %xmm1
  158. movq %xmm0, %xmm7
  159. psrlq $29, %xmm0
  160. addsd %xmm3, %xmm1
  161. pand 64(%esi), %xmm0
  162. movq 160(%esi), %xmm3
  163. pxor 80(%esi), %xmm0
  164. addsd %xmm1, %xmm4
  165. paddd 96(%esi), %xmm0
  166. psubd 112(%esi), %xmm2
  167. rsqrtss %xmm0, %xmm0
  168. pand 128(%esi), %xmm2
  169. psrld $3, %xmm0
  170. psrld $1, %xmm2
  171. paddd 144(%esi), %xmm0
  172. psubd %xmm2, %xmm0
  173. psllq $32, %xmm0
  174. movq %xmm0, %xmm2
  175. mulsd %xmm0, %xmm0
  176. mulsd %xmm0, %xmm7
  177. mulsd %xmm1, %xmm0
  178. subsd 32(%esi), %xmm7
  179. addsd %xmm0, %xmm7
  180. movq 176(%esi), %xmm0
  181. movq %xmm7, %xmm6
  182. mulsd %xmm7, %xmm7
  183. mulsd %xmm7, %xmm3
  184. mulsd %xmm7, %xmm0
  185. addsd 192(%esi), %xmm3
  186. addsd 208(%esi), %xmm0
  187. mulsd %xmm7, %xmm3
  188. mulsd %xmm7, %xmm0
  189. addsd 224(%esi), %xmm3
  190. mulsd %xmm6, %xmm3
  191. addsd %xmm0, %xmm3
  192. movq 64(%esp), %xmm0
  193. mulsd %xmm2, %xmm3
  194. mulsd %xmm4, %xmm3
  195. movq 64(%esp), %xmm4
  196. mulsd %xmm2, %xmm1
  197. mulsd %xmm2, %xmm5
  198. movq %xmm5, %xmm2
  199. addsd 64(%esp), %xmm5
  200. movq 48(%esi), %xmm7
  201. addsd %xmm3, %xmm1
  202. subsd %xmm5, %xmm4
  203. movq %xmm5, %xmm3
  204. addsd %xmm4, %xmm5
  205. addsd %xmm2, %xmm4
  206. subsd %xmm5, %xmm0
  207. addsd %xmm0, %xmm4
  208. movq 240(%esi), %xmm0
  209. addsd %xmm1, %xmm4
  210. movq %xmm3, %xmm1
  211. movq %xmm3, %xmm2
  212. addsd %xmm4, %xmm3
  213. subsd %xmm3, %xmm1
  214. mulsd %xmm3, %xmm0
  215. pand %xmm0, %xmm7
  216. addsd %xmm1, %xmm3
  217. addsd %xmm4, %xmm1
  218. movq %xmm7, %xmm4
  219. subsd %xmm2, %xmm3
  220. movq %xmm7, %xmm5
  221. pshufd $221, %xmm0, %xmm2
  222. subsd %xmm7, %xmm0
  223. subsd %xmm3, %xmm1
  224. mulsd 240(%esi), %xmm1
  225. addsd %xmm0, %xmm1
  226. movq %xmm7, %xmm0
  227. psrlq $29, %xmm7
  228. movq 160(%esi), %xmm3
  229. pand 64(%esi), %xmm7
  230. psubd 112(%esi), %xmm2
  231. pxor 80(%esi), %xmm7
  232. addsd %xmm1, %xmm4
  233. paddd 96(%esi), %xmm7
  234. rsqrtss %xmm7, %xmm7
  235. psrld $3, %xmm7
  236. pand 128(%esi), %xmm2
  237. psrld $1, %xmm2
  238. paddd 144(%esi), %xmm7
  239. psubd %xmm2, %xmm7
  240. psllq $32, %xmm7
  241. movq %xmm7, %xmm2
  242. mulsd %xmm7, %xmm7
  243. mulsd %xmm7, %xmm0
  244. mulsd %xmm1, %xmm7
  245. subsd 32(%esi), %xmm0
  246. addsd %xmm7, %xmm0
  247. movq 176(%esi), %xmm7
  248. movq %xmm0, %xmm6
  249. mulsd %xmm0, %xmm0
  250. mulsd %xmm0, %xmm3
  251. mulsd %xmm0, %xmm7
  252. addsd 192(%esi), %xmm3
  253. addsd 208(%esi), %xmm7
  254. mulsd %xmm0, %xmm3
  255. mulsd %xmm0, %xmm7
  256. addsd 224(%esi), %xmm3
  257. mulsd %xmm6, %xmm3
  258. movq 72(%esp), %xmm6
  259. addsd %xmm7, %xmm3
  260. mulsd 240(%esi), %xmm6
  261. mulsd %xmm2, %xmm3
  262. mulsd %xmm3, %xmm4
  263. pxor %xmm7, %xmm7
  264. mulsd %xmm2, %xmm1
  265. mulsd %xmm2, %xmm5
  266. cmpnlesd (%ecx), %xmm7
  267. addsd %xmm4, %xmm1
  268. movq 48(%esi), %xmm4
  269. pand %xmm6, %xmm4
  270. subsd %xmm4, %xmm6
  271. addsd %xmm5, %xmm1
  272. movq %xmm2, %xmm5
  273. mulsd %xmm4, %xmm2
  274. mulsd %xmm3, %xmm4
  275. movq %xmm1, %xmm0
  276. pand %xmm7, %xmm0
  277. mulsd %xmm6, %xmm3
  278. mulsd %xmm5, %xmm6
  279. movq 8(%ecx), %xmm5
  280. addsd %xmm4, %xmm3
  281. addsd %xmm3, %xmm6
  282. addsd %xmm6, %xmm2
  283. movq %xmm7, %xmm6
  284. pandn %xmm2, %xmm6
  285. pand %xmm7, %xmm2
  286. pandn %xmm1, %xmm7
  287. pand 256(%esi), %xmm5
  288. por %xmm6, %xmm0
  289. por %xmm7, %xmm2
  290. por %xmm5, %xmm0
  291. mulsd 80(%esp), %xmm2
  292. mulsd 80(%esp), %xmm0
  293. movsd %xmm2, (%edx)
  294. movsd %xmm0, 8(%edx)
  295. movl 40(%esp), %ebx
  296. jmp .L_2TAG_PACKET_4.0.2
  297. .L_2TAG_PACKET_0.0.2:
  298. addl $591396864, %eax
  299. addl $591396864, %ebx
  300. cmpl $2146435072, %eax
  301. jge .L_2TAG_PACKET_5.0.2
  302. cmpl $2146435072, %ebx
  303. jge .L_2TAG_PACKET_6.0.2
  304. movl (%ecx), %edx
  305. orl %eax, %edx
  306. movl 8(%ecx), %edx
  307. je .L_2TAG_PACKET_7.0.2
  308. orl %ebx, %edx
  309. je .L_2TAG_PACKET_8.0.2
  310. movl %eax, %edx
  311. subl %ebx, %edx
  312. cmpl $60817408, %edx
  313. jg .L_2TAG_PACKET_1.0.2
  314. cmpl $-60817408, %edx
  315. jl .L_2TAG_PACKET_2.0.2
  316. cmpl $1072693248, %eax
  317. jl .L_2TAG_PACKET_9.0.2
  318. mulsd 272(%esi), %xmm0
  319. mulsd 272(%esi), %xmm1
  320. movq 288(%esi), %xmm7
  321. movq 48(%esi), %xmm2
  322. movq 48(%esi), %xmm3
  323. pand %xmm0, %xmm2
  324. pand %xmm1, %xmm3
  325. movq %xmm2, %xmm4
  326. movl (%esp), %edx
  327. movq %xmm3, %xmm5
  328. movq %xmm0, 64(%esp)
  329. movq %xmm1, 72(%esp)
  330. jmp .L_2TAG_PACKET_3.0.2
  331. .L_2TAG_PACKET_9.0.2:
  332. mulsd 304(%esi), %xmm0
  333. mulsd 304(%esi), %xmm1
  334. movq 320(%esi), %xmm7
  335. movq 48(%esi), %xmm2
  336. movq 48(%esi), %xmm3
  337. pand %xmm0, %xmm2
  338. pand %xmm1, %xmm3
  339. movq %xmm2, %xmm4
  340. movl (%esp), %edx
  341. movq %xmm3, %xmm5
  342. movq %xmm0, 64(%esp)
  343. movq %xmm1, 72(%esp)
  344. jmp .L_2TAG_PACKET_3.0.2
  345. .L_2TAG_PACKET_2.0.2:
  346. .L_2TAG_PACKET_7.0.2:
  347. cmpl $2097152, %ebx
  348. movl (%esp), %edx
  349. jl .L_2TAG_PACKET_10.0.2
  350. mulsd 240(%esi), %xmm1
  351. sqrtsd %xmm1, %xmm1
  352. movsd 8(%ecx), %xmm2
  353. pand 256(%esi), %xmm2
  354. movsd %xmm1, (%edx)
  355. por %xmm2, %xmm1
  356. movsd %xmm1, 8(%edx)
  357. movl 40(%esp), %ebx
  358. jmp .L_2TAG_PACKET_4.0.2
  359. .L_2TAG_PACKET_10.0.2:
  360. mulsd 336(%esi), %xmm1
  361. sqrtsd %xmm1, %xmm1
  362. movsd 8(%ecx), %xmm2
  363. pand 256(%esi), %xmm2
  364. mulsd 352(%esi), %xmm1
  365. movsd %xmm1, (%edx)
  366. por %xmm2, %xmm1
  367. movsd %xmm1, 8(%edx)
  368. movl 40(%esp), %ebx
  369. jmp .L_2TAG_PACKET_4.0.2
  370. .L_2TAG_PACKET_8.0.2:
  371. sqrtsd %xmm0, %xmm0
  372. movl 4(%ecx), %eax
  373. cmpl $0, %eax
  374. movl (%esp), %edx
  375. jl .L_2TAG_PACKET_11.0.2
  376. movsd 8(%ecx), %xmm2
  377. pand 256(%esi), %xmm2
  378. movsd %xmm2, 8(%edx)
  379. movsd %xmm0, (%edx)
  380. movl 40(%esp), %ebx
  381. jmp .L_2TAG_PACKET_4.0.2
  382. .L_2TAG_PACKET_11.0.2:
  383. pxor %xmm3, %xmm3
  384. movsd %xmm3, (%edx)
  385. movsd 8(%ecx), %xmm2
  386. pand 256(%esi), %xmm2
  387. por %xmm0, %xmm2
  388. movsd %xmm2, 8(%edx)
  389. movl 40(%esp), %ebx
  390. jmp .L_2TAG_PACKET_4.0.2
  391. .L_2TAG_PACKET_1.0.2:
  392. cmpl $57671680, %ebx
  393. movsd %xmm0, %xmm4
  394. subsd %xmm2, %xmm0
  395. movq %xmm2, %xmm5
  396. movq %xmm2, %xmm7
  397. pshufd $221, %xmm2, %xmm1
  398. jl .L_2TAG_PACKET_12.0.2
  399. mulsd 416(%esi), %xmm2
  400. mulsd 416(%esi), %xmm0
  401. mulsd 416(%esi), %xmm4
  402. movsd %xmm2, %xmm5
  403. movsd %xmm2, %xmm7
  404. pshufd $221, %xmm2, %xmm1
  405. shrl $21, %eax
  406. shrl $20, %ebx
  407. psrlq $29, %xmm2
  408. pand 64(%esi), %xmm2
  409. pxor 80(%esi), %xmm2
  410. paddd 96(%esi), %xmm2
  411. rsqrtss %xmm2, %xmm2
  412. subl $511, %eax
  413. subl $1023, %ebx
  414. psubd 112(%esi), %xmm1
  415. psrld $3, %xmm2
  416. pand 128(%esi), %xmm1
  417. psrld $1, %xmm1
  418. paddd 144(%esi), %xmm2
  419. psubd %xmm1, %xmm2
  420. subl %eax, %ebx
  421. psllq $32, %xmm2
  422. movq %xmm2, %xmm1
  423. mulsd %xmm2, %xmm2
  424. mulsd %xmm2, %xmm7
  425. mulsd %xmm0, %xmm2
  426. subsd 32(%esi), %xmm7
  427. cmpl $-1020, %ebx
  428. addsd %xmm2, %xmm7
  429. movq %xmm7, %xmm6
  430. mulsd %xmm7, %xmm7
  431. movq 160(%esi), %xmm3
  432. movq 176(%esi), %xmm2
  433. mulsd %xmm7, %xmm3
  434. mulsd %xmm7, %xmm2
  435. addsd 192(%esi), %xmm3
  436. addsd 208(%esi), %xmm2
  437. mulsd %xmm7, %xmm3
  438. mulsd %xmm7, %xmm2
  439. addsd 224(%esi), %xmm3
  440. mulsd %xmm6, %xmm3
  441. addsd %xmm2, %xmm3
  442. mulsd %xmm1, %xmm3
  443. mulsd %xmm3, %xmm4
  444. mulsd %xmm1, %xmm0
  445. mulsd %xmm1, %xmm5
  446. addsd %xmm4, %xmm0
  447. mulsd 448(%esi), %xmm5
  448. mulsd 448(%esi), %xmm0
  449. mulsd 432(%esi), %xmm1
  450. mulsd 432(%esi), %xmm3
  451. jl .L_2TAG_PACKET_13.0.2
  452. movsd 72(%esp), %xmm2
  453. addsd %xmm5, %xmm0
  454. mulsd 240(%esi), %xmm1
  455. mulsd 240(%esi), %xmm3
  456. movq 48(%esi), %xmm4
  457. pand %xmm2, %xmm4
  458. subsd %xmm4, %xmm2
  459. movq %xmm1, %xmm5
  460. mulsd %xmm4, %xmm1
  461. mulsd %xmm3, %xmm4
  462. mulsd %xmm2, %xmm3
  463. mulsd %xmm5, %xmm2
  464. addsd %xmm4, %xmm3
  465. addsd %xmm3, %xmm2
  466. addsd %xmm2, %xmm1
  467. .L_2TAG_PACKET_14.0.2:
  468. movsd 8(%ecx), %xmm3
  469. pand 256(%esi), %xmm3
  470. movl 4(%ecx), %eax
  471. cmpl $0, %eax
  472. movl (%esp), %edx
  473. jl .L_2TAG_PACKET_15.0.2
  474. por %xmm3, %xmm1
  475. movsd %xmm0, (%edx)
  476. movsd %xmm1, 8(%edx)
  477. movl 40(%esp), %ebx
  478. jmp .L_2TAG_PACKET_4.0.2
  479. .L_2TAG_PACKET_15.0.2:
  480. por %xmm3, %xmm0
  481. movsd %xmm1, (%edx)
  482. movsd %xmm0, 8(%edx)
  483. movl 40(%esp), %ebx
  484. jmp .L_2TAG_PACKET_4.0.2
  485. .L_2TAG_PACKET_12.0.2:
  486. psrlq $29, %xmm2
  487. pand 64(%esi), %xmm2
  488. pxor 80(%esi), %xmm2
  489. paddd 96(%esi), %xmm2
  490. rsqrtss %xmm2, %xmm2
  491. psubd 112(%esi), %xmm1
  492. psrld $3, %xmm2
  493. pand 128(%esi), %xmm1
  494. psrld $1, %xmm1
  495. paddd 144(%esi), %xmm2
  496. psubd %xmm1, %xmm2
  497. psllq $32, %xmm2
  498. movq %xmm2, %xmm1
  499. mulsd %xmm2, %xmm2
  500. mulsd %xmm2, %xmm7
  501. mulsd %xmm0, %xmm2
  502. subsd 32(%esi), %xmm7
  503. addsd %xmm2, %xmm7
  504. movq %xmm7, %xmm6
  505. mulsd %xmm7, %xmm7
  506. movq 160(%esi), %xmm3
  507. movq 176(%esi), %xmm2
  508. mulsd %xmm7, %xmm3
  509. mulsd %xmm7, %xmm2
  510. addsd 192(%esi), %xmm3
  511. addsd 208(%esi), %xmm2
  512. mulsd %xmm7, %xmm3
  513. mulsd %xmm7, %xmm2
  514. addsd 224(%esi), %xmm3
  515. mulsd %xmm6, %xmm3
  516. addsd %xmm2, %xmm3
  517. mulsd %xmm1, %xmm3
  518. mulsd %xmm3, %xmm4
  519. mulsd %xmm1, %xmm0
  520. mulsd %xmm1, %xmm5
  521. addsd %xmm4, %xmm0
  522. .L_2TAG_PACKET_13.0.2:
  523. addsd %xmm5, %xmm0
  524. movq 72(%esp), %xmm2
  525. mulsd 368(%esi), %xmm2
  526. mulsd 336(%esi), %xmm2
  527. movq 48(%esi), %xmm4
  528. pand %xmm2, %xmm4
  529. subsd %xmm4, %xmm2
  530. movq %xmm1, %xmm5
  531. mulsd %xmm4, %xmm1
  532. mulsd %xmm3, %xmm4
  533. mulsd %xmm2, %xmm3
  534. mulsd %xmm5, %xmm2
  535. addsd %xmm4, %xmm3
  536. addsd %xmm3, %xmm2
  537. movq %xmm1, %xmm3
  538. addsd %xmm2, %xmm1
  539. pextrw $3, %xmm1, %eax
  540. mulsd 384(%esi), %xmm1
  541. mulsd 400(%esi), %xmm1
  542. cmpl $19216, %eax
  543. jge .L_2TAG_PACKET_14.0.2
  544. movq 384(%esi), %xmm5
  545. mulsd %xmm5, %xmm5
  546. cmpl $19056, %eax
  547. jle .L_2TAG_PACKET_14.0.2
  548. movq 48(%esi), %xmm1
  549. pand %xmm3, %xmm1
  550. subsd %xmm1, %xmm3
  551. addsd %xmm3, %xmm2
  552. mulsd 384(%esi), %xmm1
  553. mulsd 384(%esi), %xmm2
  554. mulsd 400(%esi), %xmm1
  555. mulsd 400(%esi), %xmm2
  556. addsd %xmm2, %xmm1
  557. jmp .L_2TAG_PACKET_14.0.2
  558. .L_2TAG_PACKET_6.0.2:
  559. .L_2TAG_PACKET_16.0.2:
  560. movq 32(%esi), %xmm0
  561. addsd 8(%ecx), %xmm0
  562. movsd %xmm0, 8(%edx)
  563. mulsd %xmm0, %xmm0
  564. movsd %xmm0, (%edx)
  565. movl 40(%esp), %ebx
  566. jmp .L_2TAG_PACKET_4.0.2
  567. .L_2TAG_PACKET_5.0.2:
  568. movsd (%ecx), %xmm1
  569. mulsd %xmm1, %xmm1
  570. cmpl $2146435072, %ebx
  571. jl .L_2TAG_PACKET_17.0.2
  572. movl 8(%ecx), %ebx
  573. jg .L_2TAG_PACKET_18.0.2
  574. andl %ebx, %ebx
  575. jne .L_2TAG_PACKET_18.0.2
  576. jmp .L_2TAG_PACKET_16.0.2
  577. .L_2TAG_PACKET_18.0.2:
  578. movq 32(%esi), %xmm0
  579. addsd 8(%ecx), %xmm0
  580. cmpl $2146435072, %eax
  581. movl (%ecx), %ebx
  582. jg .L_2TAG_PACKET_19.0.2
  583. andl %ebx, %ebx
  584. movl 4(%ecx), %ebx
  585. jne .L_2TAG_PACKET_19.0.2
  586. cmpl $2146435072, %ebx
  587. je .L_2TAG_PACKET_20.0.2
  588. mulsd %xmm1, %xmm1
  589. movsd %xmm0, (%edx)
  590. movsd %xmm1, 8(%edx)
  591. movl 40(%esp), %ebx
  592. jmp .L_2TAG_PACKET_4.0.2
  593. .L_2TAG_PACKET_19.0.2:
  594. .L_2TAG_PACKET_20.0.2:
  595. movsd %xmm1, (%edx)
  596. movsd %xmm0, 8(%edx)
  597. movl 40(%esp), %ebx
  598. jmp .L_2TAG_PACKET_4.0.2
  599. .L_2TAG_PACKET_17.0.2:
  600. cmpl $2146435072, %eax
  601. movl (%ecx), %eax
  602. jg .L_2TAG_PACKET_21.0.2
  603. andl %eax, %eax
  604. jne .L_2TAG_PACKET_21.0.2
  605. pxor %xmm2, %xmm2
  606. movl 4(%ecx), %eax
  607. movq 8(%ecx), %xmm3
  608. testl $-2147483648, %eax
  609. pand 256(%esi), %xmm3
  610. jne .L_2TAG_PACKET_22.0.2
  611. por %xmm3, %xmm2
  612. movsd %xmm1, (%edx)
  613. movsd %xmm2, 8(%edx)
  614. movl 40(%esp), %ebx
  615. jmp .L_2TAG_PACKET_4.0.2
  616. .L_2TAG_PACKET_22.0.2:
  617. por %xmm3, %xmm1
  618. movsd %xmm2, (%edx)
  619. movsd %xmm1, 8(%edx)
  620. movl 40(%esp), %ebx
  621. jmp .L_2TAG_PACKET_4.0.2
  622. .L_2TAG_PACKET_21.0.2:
  623. movsd %xmm1, (%edx)
  624. movsd %xmm1, 8(%edx)
  625. movl 40(%esp), %ebx
  626. .L_2TAG_PACKET_4.0.2:
  627. movl 128(%esp), %esi
  628. movl %ebp, %esp
  629. popl %ebp
  630. ret $4
  631. ..B2.3:
  632. .align 16,0x90
  633. .type csqrt,@function
  634. .size csqrt,.-csqrt
  635. .data
  636. # -- End csqrt
  637. .section .rodata, "a"
  638. .align 16
  639. .align 16
  640. static_const_table:
  641. .long 4160749568
  642. .long 2147483647
  643. .long 4160749568
  644. .long 2147483647
  645. .long 4294967295
  646. .long 2147483647
  647. .long 4294967295
  648. .long 2147483647
  649. .long 0
  650. .long 1072693248
  651. .long 0
  652. .long 1072693248
  653. .long 4160749568
  654. .long 4294967295
  655. .long 4160749568
  656. .long 4294967295
  657. .long 16777215
  658. .long 16777215
  659. .long 16777215
  660. .long 16777215
  661. .long 8388608
  662. .long 8388608
  663. .long 8388608
  664. .long 8388608
  665. .long 1065353216
  666. .long 1065353216
  667. .long 1065353216
  668. .long 1065353216
  669. .long 1048576
  670. .long 1048576
  671. .long 1048576
  672. .long 1048576
  673. .long 4292870144
  674. .long 4292870144
  675. .long 4292870144
  676. .long 4292870144
  677. .long 1475346432
  678. .long 1475346432
  679. .long 1475346432
  680. .long 1475346432
  681. .long 0
  682. .long 3218046976
  683. .long 0
  684. .long 3218046976
  685. .long 0
  686. .long 1070694400
  687. .long 0
  688. .long 1070694400
  689. .long 0
  690. .long 3218341888
  691. .long 0
  692. .long 3218341888
  693. .long 0
  694. .long 1071120384
  695. .long 0
  696. .long 1071120384
  697. .long 0
  698. .long 3219128320
  699. .long 0
  700. .long 3219128320
  701. .long 0
  702. .long 1071644672
  703. .long 0
  704. .long 1071644672
  705. .long 0
  706. .long 2147483648
  707. .long 0
  708. .long 2147483648
  709. .long 0
  710. .long 533725184
  711. .long 0
  712. .long 533725184
  713. .long 0
  714. .long 1342177280
  715. .long 0
  716. .long 1342177280
  717. .long 0
  718. .long 1722810368
  719. .long 0
  720. .long 1722810368
  721. .long 0
  722. .long 747634688
  723. .long 0
  724. .long 747634688
  725. .long 0
  726. .long 1281359872
  727. .long 0
  728. .long 1281359872
  729. .long 0
  730. .long 967835648
  731. .long 0
  732. .long 967835648
  733. .long 0
  734. .long 2121269248
  735. .long 0
  736. .long 2121269248
  737. .long 0
  738. .long 24117248
  739. .long 0
  740. .long 24117248
  741. .long 0
  742. .long 862978048
  743. .long 0
  744. .long 862978048
  745. .long 0
  746. .long 1062207488
  747. .long 0
  748. .long 1062207488
  749. .long 0
  750. .long 1067450368
  751. .long 0
  752. .long 1067450368
  753. .long 0
  754. .long 1077936128
  755. .long 0
  756. .long 1077936128
  757. .type static_const_table,@object
  758. .size static_const_table,464
  759. .data
  760. .section .note.GNU-stack, ""
  761. # End