atan2f_gen.S 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556
  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "atan2f_gen.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin atan2f
  41. .text
  42. .align 16,0x90
  43. .globl atan2f
  44. atan2f:
  45. # parameter 1: %xmm0
  46. # parameter 2: %xmm1
  47. ..B1.1:
  48. .cfi_startproc
  49. ..___tag_value_atan2f.1:
  50. ..L2:
  51. movd %xmm0, %esi
  52. movd %xmm1, %ecx
  53. movss %xmm0, -16(%rsp)
  54. movss %xmm1, -8(%rsp)
  55. movl %esi, %edi
  56. movl %esi, %eax
  57. movl %ecx, %edx
  58. movl %ecx, %r8d
  59. andl $2147483647, %edi
  60. andl $2147483647, %r8d
  61. shrl $31, %eax
  62. shrl $31, %edx
  63. movl %ecx, -24(%rsp)
  64. cmpl $2139095040, %edi
  65. movl %esi, -24(%rsp)
  66. jl ..B1.3
  67. ..B1.2:
  68. jg ..B1.16
  69. ..B1.52:
  70. cmpl $2139095040, %r8d
  71. jg ..B1.16
  72. jmp ..B1.5
  73. ..B1.3:
  74. cmpl $2139095040, %r8d
  75. jl ..B1.17
  76. ..B1.4:
  77. jg ..B1.16
  78. ..B1.5:
  79. jl ..B1.15
  80. ..B1.6:
  81. movl %eax, %eax
  82. cmpl $2139095040, %edi
  83. jge ..B1.11
  84. ..B1.7:
  85. testl %edx, %edx
  86. je ..B1.9
  87. ..B1.8:
  88. lea pi(%rip), %rdx
  89. lea _small_value_64(%rip), %rcx
  90. movsd (%rdx,%rax,8), %xmm0
  91. addsd (%rcx,%rax,8), %xmm0
  92. cvtsd2ss %xmm0, %xmm0
  93. ret
  94. ..B1.9:
  95. lea _zeros(%rip), %rdx
  96. movss (%rdx,%rax,4), %xmm0
  97. ..B1.10:
  98. ret
  99. ..B1.11:
  100. lea _small_value_64(%rip), %rcx
  101. testl %edx, %edx
  102. movsd (%rcx,%rax,8), %xmm1
  103. je ..B1.13
  104. ..B1.12:
  105. lea pi34(%rip), %rdx
  106. movsd (%rdx,%rax,8), %xmm0
  107. addsd %xmm1, %xmm0
  108. cvtsd2ss %xmm0, %xmm0
  109. ret
  110. ..B1.13:
  111. lea pi4(%rip), %rdx
  112. movsd (%rdx,%rax,8), %xmm0
  113. addsd %xmm1, %xmm0
  114. cvtsd2ss %xmm0, %xmm0
  115. ..B1.14:
  116. ret
  117. ..B1.15:
  118. movl %eax, %eax
  119. lea pi2(%rip), %rdx
  120. lea _small_value_64(%rip), %rcx
  121. movsd (%rdx,%rax,8), %xmm0
  122. addsd (%rcx,%rax,8), %xmm0
  123. cvtsd2ss %xmm0, %xmm0
  124. ret
  125. ..B1.16:
  126. movss -8(%rsp), %xmm0
  127. mulss -16(%rsp), %xmm0
  128. ret
  129. ..B1.17:
  130. testl %r8d, %edi
  131. jne ..B1.25
  132. ..B1.18:
  133. testl %edi, %edi
  134. jne ..B1.24
  135. ..B1.19:
  136. movl %eax, %eax
  137. testl %r8d, %r8d
  138. je ..B1.45
  139. ..B1.20:
  140. testl %edx, %edx
  141. je ..B1.22
  142. ..B1.21:
  143. lea pi(%rip), %rdx
  144. lea _small_value_64(%rip), %rcx
  145. movsd (%rdx,%rax,8), %xmm0
  146. addsd (%rcx,%rax,8), %xmm0
  147. cvtsd2ss %xmm0, %xmm0
  148. ret
  149. ..B1.22:
  150. lea _zeros(%rip), %rdx
  151. movss (%rdx,%rax,4), %xmm0
  152. ..B1.23:
  153. ret
  154. ..B1.24:
  155. testl %r8d, %r8d
  156. je ..B1.49
  157. ..B1.25:
  158. pxor %xmm0, %xmm0
  159. pxor %xmm5, %xmm5
  160. cvtss2sd -16(%rsp), %xmm0
  161. cvtss2sd -8(%rsp), %xmm5
  162. movsd %xmm0, -40(%rsp)
  163. movsd %xmm5, -32(%rsp)
  164. movl -36(%rsp), %edi
  165. movl -28(%rsp), %esi
  166. andl $2147483647, %edi
  167. andl $2147483647, %esi
  168. cmpl %esi, %edi
  169. jl ..B1.32
  170. ..B1.26:
  171. lea 1048576(%rsi), %ecx
  172. cmpl %ecx, %edi
  173. jle ..B1.31
  174. ..B1.27:
  175. movl %eax, %eax
  176. lea pi2(%rip), %rdx
  177. addl $33554432, %esi
  178. cmpl %esi, %edi
  179. movsd (%rdx,%rax,8), %xmm4
  180. jle ..B1.29
  181. ..B1.28:
  182. divsd %xmm0, %xmm5
  183. subsd %xmm5, %xmm4
  184. cvtsd2ss %xmm4, %xmm4
  185. jmp ..B1.30
  186. ..B1.29:
  187. divsd %xmm0, %xmm5
  188. movaps %xmm5, %xmm1
  189. subsd %xmm5, %xmm4
  190. mulsd %xmm5, %xmm1
  191. movaps %xmm1, %xmm0
  192. mulsd %xmm1, %xmm0
  193. movsd .L_2il0floatpacket.6(%rip), %xmm3
  194. movsd .L_2il0floatpacket.10(%rip), %xmm2
  195. mulsd %xmm0, %xmm3
  196. mulsd %xmm0, %xmm2
  197. addsd .L_2il0floatpacket.7(%rip), %xmm3
  198. addsd .L_2il0floatpacket.11(%rip), %xmm2
  199. mulsd %xmm0, %xmm3
  200. mulsd %xmm0, %xmm2
  201. addsd .L_2il0floatpacket.8(%rip), %xmm3
  202. addsd .L_2il0floatpacket.12(%rip), %xmm2
  203. mulsd %xmm0, %xmm3
  204. mulsd %xmm0, %xmm2
  205. addsd .L_2il0floatpacket.9(%rip), %xmm3
  206. addsd .L_2il0floatpacket.13(%rip), %xmm2
  207. mulsd %xmm0, %xmm3
  208. mulsd %xmm1, %xmm2
  209. addsd %xmm2, %xmm3
  210. mulsd %xmm3, %xmm5
  211. subsd %xmm5, %xmm4
  212. cvtsd2ss %xmm4, %xmm4
  213. ..B1.30:
  214. movaps %xmm4, %xmm0
  215. ret
  216. ..B1.31:
  217. movl %eax, %eax
  218. lea _ones(%rip), %rcx
  219. movl %edx, %edx
  220. pxor %xmm1, %xmm1
  221. pxor %xmm2, %xmm2
  222. lea pi4n(%rip), %rsi
  223. movsd .L_2il0floatpacket.3(%rip), %xmm4
  224. cmpl %eax, %edx
  225. cvtss2sd (%rcx,%rax,4), %xmm1
  226. cvtss2sd (%rcx,%rdx,4), %xmm2
  227. mulsd %xmm1, %xmm0
  228. mulsd %xmm2, %xmm5
  229. movaps %xmm0, %xmm6
  230. addsd %xmm5, %xmm0
  231. subsd %xmm5, %xmm6
  232. divsd %xmm0, %xmm6
  233. movaps %xmm6, %xmm0
  234. mulsd %xmm6, %xmm0
  235. movaps %xmm0, %xmm3
  236. mulsd %xmm0, %xmm3
  237. movsd .L_2il0floatpacket.0(%rip), %xmm5
  238. mulsd %xmm3, %xmm5
  239. mulsd %xmm3, %xmm4
  240. addsd .L_2il0floatpacket.1(%rip), %xmm5
  241. addsd .L_2il0floatpacket.4(%rip), %xmm4
  242. mulsd %xmm3, %xmm5
  243. mulsd %xmm3, %xmm4
  244. addsd .L_2il0floatpacket.2(%rip), %xmm5
  245. addsd .L_2il0floatpacket.5(%rip), %xmm4
  246. mulsd %xmm0, %xmm5
  247. addsd %xmm4, %xmm5
  248. mulsd %xmm5, %xmm6
  249. addsd (%rsi,%rdx,8), %xmm6
  250. movaps %xmm6, %xmm0
  251. xorps .L_2il0floatpacket.15(%rip), %xmm0
  252. jne ..L3
  253. movaps %xmm6, %xmm0
  254. ..L3:
  255. cvtsd2ss %xmm0, %xmm0
  256. ret
  257. ..B1.32:
  258. lea 1048576(%rdi), %ecx
  259. cmpl %ecx, %esi
  260. jle ..B1.44
  261. ..B1.33:
  262. addl $33554432, %edi
  263. cmpl %edi, %esi
  264. jle ..B1.40
  265. ..B1.34:
  266. testl %edx, %edx
  267. jne ..B1.39
  268. ..B1.35:
  269. divsd %xmm5, %xmm0
  270. lea _minnormf(%rip), %rax
  271. movaps %xmm0, %xmm2
  272. andps .L_2il0floatpacket.16(%rip), %xmm2
  273. movsd (%rax), %xmm1
  274. comisd %xmm2, %xmm1
  275. jbe ..B1.37
  276. ..B1.36:
  277. movss .L_2il0floatpacket.14(%rip), %xmm1
  278. mulss %xmm1, %xmm1
  279. movss %xmm1, -24(%rsp)
  280. jmp ..B1.38
  281. ..B1.37:
  282. movl $1065353216, -24(%rsp)
  283. ..B1.38:
  284. cvtsd2ss %xmm0, %xmm0
  285. ret
  286. ..B1.39:
  287. divsd %xmm5, %xmm0
  288. movl %eax, %eax
  289. lea pi(%rip), %rdx
  290. addsd (%rdx,%rax,8), %xmm0
  291. cvtsd2ss %xmm0, %xmm0
  292. ret
  293. ..B1.40:
  294. divsd %xmm5, %xmm0
  295. movaps %xmm0, %xmm2
  296. testl %edx, %edx
  297. mulsd %xmm0, %xmm2
  298. movaps %xmm2, %xmm1
  299. mulsd %xmm2, %xmm1
  300. movsd .L_2il0floatpacket.6(%rip), %xmm4
  301. movsd .L_2il0floatpacket.10(%rip), %xmm3
  302. mulsd %xmm1, %xmm4
  303. mulsd %xmm1, %xmm3
  304. addsd .L_2il0floatpacket.7(%rip), %xmm4
  305. addsd .L_2il0floatpacket.11(%rip), %xmm3
  306. mulsd %xmm1, %xmm4
  307. mulsd %xmm1, %xmm3
  308. addsd .L_2il0floatpacket.8(%rip), %xmm4
  309. addsd .L_2il0floatpacket.12(%rip), %xmm3
  310. mulsd %xmm1, %xmm4
  311. mulsd %xmm1, %xmm3
  312. addsd .L_2il0floatpacket.9(%rip), %xmm4
  313. addsd .L_2il0floatpacket.13(%rip), %xmm3
  314. mulsd %xmm1, %xmm4
  315. mulsd %xmm2, %xmm3
  316. addsd %xmm3, %xmm4
  317. mulsd %xmm0, %xmm4
  318. addsd %xmm4, %xmm0
  319. je ..B1.42
  320. ..B1.41:
  321. movl %eax, %eax
  322. lea pi(%rip), %rdx
  323. movsd (%rdx,%rax,8), %xmm1
  324. addsd %xmm1, %xmm0
  325. cvtsd2ss %xmm0, %xmm0
  326. ret
  327. ..B1.42:
  328. cvtsd2ss %xmm0, %xmm0
  329. ..B1.43:
  330. ret
  331. ..B1.44:
  332. movl %eax, %eax
  333. lea _ones(%rip), %rcx
  334. pxor %xmm1, %xmm1
  335. pxor %xmm2, %xmm2
  336. lea pi4n(%rip), %rsi
  337. movsd .L_2il0floatpacket.0(%rip), %xmm4
  338. cmpl %eax, %edx
  339. cvtss2sd (%rcx,%rax,4), %xmm1
  340. cvtss2sd (%rcx,%rdx,4), %xmm2
  341. mulsd %xmm1, %xmm0
  342. mulsd %xmm2, %xmm5
  343. movaps %xmm0, %xmm6
  344. addsd %xmm5, %xmm0
  345. subsd %xmm5, %xmm6
  346. xorps .L_2il0floatpacket.15(%rip), %xmm6
  347. divsd %xmm0, %xmm6
  348. movaps %xmm6, %xmm0
  349. mulsd %xmm6, %xmm0
  350. movaps %xmm0, %xmm5
  351. mulsd %xmm0, %xmm5
  352. mulsd %xmm5, %xmm4
  353. movsd .L_2il0floatpacket.3(%rip), %xmm3
  354. mulsd %xmm5, %xmm3
  355. addsd .L_2il0floatpacket.1(%rip), %xmm4
  356. mulsd %xmm5, %xmm4
  357. addsd .L_2il0floatpacket.4(%rip), %xmm3
  358. mulsd %xmm5, %xmm3
  359. addsd .L_2il0floatpacket.2(%rip), %xmm4
  360. mulsd %xmm0, %xmm4
  361. addsd .L_2il0floatpacket.5(%rip), %xmm3
  362. movsd (%rsi,%rdx,8), %xmm7
  363. addsd %xmm3, %xmm4
  364. mulsd %xmm4, %xmm6
  365. subsd %xmm6, %xmm7
  366. movaps %xmm7, %xmm0
  367. xorps .L_2il0floatpacket.15(%rip), %xmm0
  368. jne ..L4
  369. movaps %xmm7, %xmm0
  370. ..L4:
  371. cvtsd2ss %xmm0, %xmm0
  372. ret
  373. ..B1.45:
  374. testl %edx, %edx
  375. je ..B1.47
  376. ..B1.46:
  377. lea pi(%rip), %rdx
  378. lea _small_value_64(%rip), %rcx
  379. movsd (%rdx,%rax,8), %xmm0
  380. addsd (%rcx,%rax,8), %xmm0
  381. cvtsd2ss %xmm0, %xmm0
  382. ret
  383. ..B1.47:
  384. lea _zeros(%rip), %rdx
  385. movss (%rdx,%rax,4), %xmm0
  386. ..B1.48:
  387. ret
  388. ..B1.49:
  389. lea pi2(%rip), %rdx
  390. lea _small_value_64(%rip), %rcx
  391. movsd (%rdx,%rax,8), %xmm0
  392. addsd (%rcx,%rax,8), %xmm0
  393. cvtsd2ss %xmm0, %xmm0
  394. ret
  395. .align 16,0x90
  396. .cfi_endproc
  397. .type atan2f,@function
  398. .size atan2f,.-atan2f
  399. .data
  400. # -- End atan2f
  401. .section .rodata, "a"
  402. .align 16
  403. .align 16
  404. .L_2il0floatpacket.15:
  405. .long 0x00000000,0x80000000,0x00000000,0x00000000
  406. .type .L_2il0floatpacket.15,@object
  407. .size .L_2il0floatpacket.15,16
  408. .align 16
  409. .L_2il0floatpacket.16:
  410. .long 0xffffffff,0x7fffffff,0x00000000,0x00000000
  411. .type .L_2il0floatpacket.16,@object
  412. .size .L_2il0floatpacket.16,16
  413. .align 8
  414. .L_2il0floatpacket.0:
  415. .long 0xd9d9aa33,0xbfb1c1c0
  416. .type .L_2il0floatpacket.0,@object
  417. .size .L_2il0floatpacket.0,8
  418. .align 8
  419. .L_2il0floatpacket.1:
  420. .long 0x04ba093e,0xbfc24485
  421. .type .L_2il0floatpacket.1,@object
  422. .size .L_2il0floatpacket.1,8
  423. .align 8
  424. .L_2il0floatpacket.2:
  425. .long 0x312dd43c,0xbfd55555
  426. .type .L_2il0floatpacket.2,@object
  427. .size .L_2il0floatpacket.2,8
  428. .align 8
  429. .L_2il0floatpacket.3:
  430. .long 0x947e6edc,0x3fbbcbeb
  431. .type .L_2il0floatpacket.3,@object
  432. .size .L_2il0floatpacket.3,8
  433. .align 8
  434. .L_2il0floatpacket.4:
  435. .long 0x700fa0b3,0x3fc9997b
  436. .type .L_2il0floatpacket.4,@object
  437. .size .L_2il0floatpacket.4,8
  438. .align 8
  439. .L_2il0floatpacket.5:
  440. .long 0xfff8f7db,0x3fefffff
  441. .type .L_2il0floatpacket.5,@object
  442. .size .L_2il0floatpacket.5,8
  443. .align 8
  444. .L_2il0floatpacket.6:
  445. .long 0xa1fbc9d9,0x3f9a8eb6
  446. .type .L_2il0floatpacket.6,@object
  447. .size .L_2il0floatpacket.6,8
  448. .align 8
  449. .L_2il0floatpacket.7:
  450. .long 0x713e98d0,0x3fb32474
  451. .type .L_2il0floatpacket.7,@object
  452. .size .L_2il0floatpacket.7,8
  453. .align 8
  454. .L_2il0floatpacket.8:
  455. .long 0xa367efd7,0x3fbc70d3
  456. .type .L_2il0floatpacket.8,@object
  457. .size .L_2il0floatpacket.8,8
  458. .align 8
  459. .L_2il0floatpacket.9:
  460. .long 0x8eac5238,0x3fc99999
  461. .type .L_2il0floatpacket.9,@object
  462. .size .L_2il0floatpacket.9,8
  463. .align 8
  464. .L_2il0floatpacket.10:
  465. .long 0x95b6793b,0xbfac6c73
  466. .type .L_2il0floatpacket.10,@object
  467. .size .L_2il0floatpacket.10,8
  468. .align 8
  469. .L_2il0floatpacket.11:
  470. .long 0x306ebb4b,0xbfb73640
  471. .type .L_2il0floatpacket.11,@object
  472. .size .L_2il0floatpacket.11,8
  473. .align 8
  474. .L_2il0floatpacket.12:
  475. .long 0xe2eb2ece,0xbfc24920
  476. .type .L_2il0floatpacket.12,@object
  477. .size .L_2il0floatpacket.12,8
  478. .align 8
  479. .L_2il0floatpacket.13:
  480. .long 0x5552abff,0xbfd55555
  481. .type .L_2il0floatpacket.13,@object
  482. .size .L_2il0floatpacket.13,8
  483. .align 8
  484. pi:
  485. .long 0x54442d18,0x400921fb
  486. .long 0x54442d18,0xc00921fb
  487. .type pi,@object
  488. .size pi,16
  489. .align 8
  490. pi34:
  491. .long 0x7f3321d2,0x4002d97c
  492. .long 0x7f3321d2,0xc002d97c
  493. .type pi34,@object
  494. .size pi34,16
  495. .align 8
  496. pi4:
  497. .long 0x54442d18,0x3fe921fb
  498. .long 0x54442d18,0xbfe921fb
  499. .type pi4,@object
  500. .size pi4,16
  501. .align 8
  502. pi2:
  503. .long 0x54442d18,0x3ff921fb
  504. .long 0x54442d18,0xbff921fb
  505. .type pi2,@object
  506. .size pi2,16
  507. .align 8
  508. pi4n:
  509. .long 0x54442d18,0x3fe921fb
  510. .long 0x7f3321d2,0xc002d97c
  511. .type pi4n,@object
  512. .size pi4n,16
  513. .align 4
  514. .L_2il0floatpacket.14:
  515. .long 0x0d800000
  516. .type .L_2il0floatpacket.14,@object
  517. .size .L_2il0floatpacket.14,4
  518. .align 4
  519. .L_2il0floatpacket.17:
  520. .long 0x3f800000
  521. .type .L_2il0floatpacket.17,@object
  522. .size .L_2il0floatpacket.17,4
  523. .align 4
  524. _small_value_64:
  525. .long 0
  526. .long 24117248
  527. .long 0
  528. .long 2171600896
  529. .type _small_value_64,@object
  530. .size _small_value_64,16
  531. .align 4
  532. _zeros:
  533. .long 0
  534. .long 2147483648
  535. .type _zeros,@object
  536. .size _zeros,8
  537. .align 4
  538. _ones:
  539. .long 1065353216
  540. .long 3212836864
  541. .type _ones,@object
  542. .size _ones,8
  543. .align 4
  544. _minnormf:
  545. .long 0
  546. .long 940572672
  547. .type _minnormf,@object
  548. .size _minnormf,8
  549. .data
  550. .section .note.GNU-stack, ""
  551. // -- Begin DWARF2 SEGMENT .eh_frame
  552. .section .eh_frame,"a",@progbits
  553. .eh_frame_seg:
  554. .align 1
  555. # End