cbrtf_gen.S 11 KB


  1. /*
  2. * Math library
  3. *
  4. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions
  8. * are met:
  9. *
  10. * * Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * * Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * * Neither the name of Intel Corporation nor the names of its
  17. * contributors may be used to endorse or promote products derived
  18. * from this software without specific prior written permission.
  19. *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. *
  32. *
  33. * Author Name <jingwei.zhang@intel.com>
  34. * History:
  35. * 03-14-2016 Initial version. numerics svn rev. 12864
  36. */
  37. .file "cbrtf_gen.c"
  38. .text
  39. ..TXTST0:
  40. # -- Begin cbrtf
  41. .text
  42. .align 16,0x90
  43. .globl cbrtf
  44. cbrtf:
  45. # parameter 1: %xmm0
  46. ..B1.1:
  47. .cfi_startproc
  48. ..___tag_value_cbrtf.1:
  49. ..L2:
  50. subq $24, %rsp
  51. .cfi_def_cfa_offset 32
  52. movss %xmm0, 8(%rsp)
  53. ..B1.2:
  54. movl $8388607, %edx
  55. movd %edx, %xmm5
  56. movl $-1081999360, %eax
  57. movd %eax, %xmm3
  58. movl $-1082130432, %eax
  59. movd %eax, %xmm1
  60. movl $8257536, %edx
  61. movd %edx, %xmm2
  62. pextrw $1, %xmm0, %ecx
  63. movl %ecx, %eax
  64. andl $124, %ecx
  65. lea rcp_table(%rip), %r8
  66. movss (%rcx,%r8), %xmm4
  67. shrl $7, %eax
  68. movl $255, %edx
  69. andl %eax, %edx
  70. movq %rax, %r9
  71. cmpl $0, %edx
  72. je .L_2TAG_PACKET_0.0.1
  73. cmpl $255, %edx
  74. je .L_2TAG_PACKET_1.0.1
  75. andps %xmm0, %xmm2
  76. andps %xmm5, %xmm0
  77. orps %xmm2, %xmm3
  78. orps %xmm0, %xmm1
  79. movss coeff_table(%rip), %xmm5
  80. movss 4+coeff_table(%rip), %xmm6
  81. movl %edx, %eax
  82. addl %eax, %eax
  83. addl %eax, %eax
  84. addl %edx, %eax
  85. movl %eax, %edx
  86. addl %eax, %eax
  87. addl %eax, %eax
  88. addl %eax, %eax
  89. addl %eax, %eax
  90. addl %eax, %edx
  91. addl %eax, %eax
  92. addl %eax, %eax
  93. addl %eax, %eax
  94. addl %eax, %eax
  95. addl %edx, %eax
  96. movq %r9, %rdx
  97. andq $255, %r9
  98. subq $1, %r9
  99. shrl $12, %eax
  100. andl $256, %edx
  101. subq %rax, %r9
  102. subq %rax, %r9
  103. subq %rax, %r9
  104. shlq $7, %r9
  105. addl $85, %eax
  106. orl %edx, %eax
  107. movd %eax, %xmm7
  108. addq %r9, %rcx
  109. psllq $23, %xmm7
  110. .L_2TAG_PACKET_2.0.1:
  111. movss 8+coeff_table(%rip), %xmm2
  112. movss 12+coeff_table(%rip), %xmm0
  113. subss %xmm3, %xmm1
  114. movaps %xmm7, %xmm3
  115. lea cbrtf_table(%rip), %r8
  116. mulss (%rcx,%r8), %xmm7
  117. mulss %xmm4, %xmm1
  118. lea D_table(%rip), %r8
  119. mulss (%rcx,%r8), %xmm3
  120. movss %xmm1, %xmm4
  121. mulss %xmm1, %xmm5
  122. mulss %xmm1, %xmm6
  123. mulss %xmm1, %xmm1
  124. addss %xmm5, %xmm2
  125. addss %xmm6, %xmm0
  126. mulss %xmm1, %xmm2
  127. mulss %xmm7, %xmm4
  128. addss %xmm2, %xmm0
  129. mulss %xmm4, %xmm0
  130. addss %xmm3, %xmm0
  131. addss %xmm7, %xmm0
  132. jmp ..B1.4
  133. .L_2TAG_PACKET_0.0.1:
  134. movq %xmm0, %xmm7
  135. movd %xmm0, %eax
  136. movq %rax, %r9
  137. shll $9, %eax
  138. movl $23, %ecx
  139. shrq $23, %r9
  140. .L_2TAG_PACKET_3.0.1:
  141. cmpl $-2147483648, %eax
  142. jae .L_2TAG_PACKET_4.0.1
  143. shll $1, %eax
  144. addl $1, %edx
  145. loop .L_2TAG_PACKET_3.0.1
  146. cmpl $0, %eax
  147. je .L_2TAG_PACKET_5.0.1
  148. .L_2TAG_PACKET_4.0.1:
  149. movl %edx, %ecx
  150. incl %ecx
  151. movd %ecx, %xmm7
  152. orq %rdx, %r9
  153. psllq %xmm7, %xmm0
  154. shrl $24, %eax
  155. andl $124, %eax
  156. lea rcp_table(%rip), %r8
  157. movsd (%r8,%rax), %xmm4
  158. movl %eax, %ecx
  159. andps %xmm0, %xmm2
  160. andps %xmm5, %xmm0
  161. orps %xmm2, %xmm3
  162. orps %xmm0, %xmm1
  163. movss coeff_table(%rip), %xmm5
  164. movl $1366, %eax
  165. movss 4+coeff_table(%rip), %xmm6
  166. mull %edx
  167. movq %r9, %rdx
  168. andq $255, %r9
  169. addq $1, %r9
  170. shrl $12, %eax
  171. andl $256, %edx
  172. subq %rax, %r9
  173. subq %rax, %r9
  174. subq %rax, %r9
  175. cmpq $0, %r9
  176. je .L_2TAG_PACKET_6.0.1
  177. incl %eax
  178. subq $3, %r9
  179. negq %r9
  180. .L_2TAG_PACKET_6.0.1:
  181. shlq $7, %r9
  182. addq %r9, %rcx
  183. movq $85, %r9
  184. subq %rax, %r9
  185. orq %r9, %rdx
  186. movd %edx, %xmm7
  187. psllq $23, %xmm7
  188. jmp .L_2TAG_PACKET_2.0.1
  189. .L_2TAG_PACKET_5.0.1:
  190. cmpq $0, %r9
  191. jne .L_2TAG_PACKET_7.0.1
  192. xorps %xmm0, %xmm0
  193. jmp ..B1.4
  194. .L_2TAG_PACKET_7.0.1:
  195. movss 4+ZERON(%rip), %xmm0
  196. jmp ..B1.4
  197. .L_2TAG_PACKET_1.0.1:
  198. movl 8(%rsp), %eax
  199. movl %eax, %ecx
  200. andl $2147483647, %ecx
  201. cmpl $2139095040, %ecx
  202. ja .L_2TAG_PACKET_8.0.1
  203. cmpl $2139095040, %eax
  204. jne .L_2TAG_PACKET_9.0.1
  205. movss INFS(%rip), %xmm0
  206. jmp ..B1.4
  207. .L_2TAG_PACKET_9.0.1:
  208. movss NEG_INFS(%rip), %xmm0
  209. jmp ..B1.4
  210. .L_2TAG_PACKET_8.0.1:
  211. movss 8(%rsp), %xmm0
  212. addss %xmm0, %xmm0
  213. movss %xmm0, (%rsp)
  214. .L_2TAG_PACKET_10.0.1:
  215. ..B1.4:
  216. addq $24, %rsp
  217. .cfi_def_cfa_offset 8
  218. ret
  219. .align 16,0x90
  220. .cfi_endproc
  221. .type cbrtf,@function
  222. .size cbrtf,.-cbrtf
  223. .data
  224. # -- End cbrtf
  225. .section .rodata, "a"
  226. .align 4
  227. .align 4
  228. rcp_table:
  229. .long 3212578753
  230. .long 3212085645
  231. .long 3211621124
  232. .long 3211182772
  233. .long 3210768440
  234. .long 3210376206
  235. .long 3210004347
  236. .long 3209651317
  237. .long 3209315720
  238. .long 3208996296
  239. .long 3208691905
  240. .long 3208401508
  241. .long 3208124163
  242. .long 3207859009
  243. .long 3207605259
  244. .long 3207362194
  245. .long 3207129151
  246. .long 3206905525
  247. .long 3206690755
  248. .long 3206484326
  249. .long 3206285761
  250. .long 3206094618
  251. .long 3205910490
  252. .long 3205732998
  253. .long 3205561788
  254. .long 3205396533
  255. .long 3205236929
  256. .long 3205082689
  257. .long 3204933547
  258. .long 3204789256
  259. .long 3204649583
  260. .long 3204514308
  261. .type rcp_table,@object
  262. .size rcp_table,128
  263. .align 4
  264. coeff_table:
  265. .long 3173551943
  266. .long 3185806905
  267. .long 1031591658
  268. .long 1051372203
  269. .type coeff_table,@object
  270. .size coeff_table,16
  271. .align 4
  272. cbrtf_table:
  273. .long 1065396681
  274. .long 1065482291
  275. .long 1065566215
  276. .long 1065648532
  277. .long 1065729317
  278. .long 1065808640
  279. .long 1065886565
  280. .long 1065963152
  281. .long 1066038457
  282. .long 1066112533
  283. .long 1066185428
  284. .long 1066257188
  285. .long 1066327857
  286. .long 1066397474
  287. .long 1066466079
  288. .long 1066533708
  289. .long 1066600394
  290. .long 1066666169
  291. .long 1066731064
  292. .long 1066795108
  293. .long 1066858329
  294. .long 1066920751
  295. .long 1066982401
  296. .long 1067043301
  297. .long 1067103474
  298. .long 1067162941
  299. .long 1067221722
  300. .long 1067279837
  301. .long 1067337305
  302. .long 1067394143
  303. .long 1067450368
  304. .long 1067505996
  305. .long 1067588354
  306. .long 1067696217
  307. .long 1067801953
  308. .long 1067905666
  309. .long 1068007450
  310. .long 1068107390
  311. .long 1068205570
  312. .long 1068302063
  313. .long 1068396942
  314. .long 1068490271
  315. .long 1068582113
  316. .long 1068672525
  317. .long 1068761562
  318. .long 1068849275
  319. .long 1068935712
  320. .long 1069020919
  321. .long 1069104937
  322. .long 1069187809
  323. .long 1069269572
  324. .long 1069350263
  325. .long 1069429915
  326. .long 1069508563
  327. .long 1069586236
  328. .long 1069662966
  329. .long 1069738778
  330. .long 1069813702
  331. .long 1069887762
  332. .long 1069960982
  333. .long 1070033387
  334. .long 1070104998
  335. .long 1070175837
  336. .long 1070245925
  337. .long 1070349689
  338. .long 1070485588
  339. .long 1070618808
  340. .long 1070749478
  341. .long 1070877717
  342. .long 1071003634
  343. .long 1071127332
  344. .long 1071248907
  345. .long 1071368446
  346. .long 1071486034
  347. .long 1071601747
  348. .long 1071715659
  349. .long 1071827839
  350. .long 1071938350
  351. .long 1072047254
  352. .long 1072154608
  353. .long 1072260465
  354. .long 1072364876
  355. .long 1072467891
  356. .long 1072569555
  357. .long 1072669911
  358. .long 1072769001
  359. .long 1072866863
  360. .long 1072963536
  361. .long 1073059054
  362. .long 1073153452
  363. .long 1073246762
  364. .long 1073339014
  365. .long 1073430238
  366. .long 1073520462
  367. .long 1073609714
  368. .long 1073698019
  369. .type cbrtf_table,@object
  370. .size cbrtf_table,384
  371. .align 4
  372. D_table:
  373. .long 839340838
  374. .long 867750258
  375. .long 851786446
  376. .long 853949398
  377. .long 864938789
  378. .long 864102364
  379. .long 864209792
  380. .long 865422805
  381. .long 867593594
  382. .long 854482593
  383. .long 848298042
  384. .long 860064854
  385. .long 844792593
  386. .long 870701309
  387. .long 872023170
  388. .long 860255342
  389. .long 849966899
  390. .long 863561479
  391. .long 869115319
  392. .long 871961375
  393. .long 859537336
  394. .long 871954398
  395. .long 863817578
  396. .long 861687921
  397. .long 849594757
  398. .long 816486846
  399. .long 858183533
  400. .long 864500406
  401. .long 850523240
  402. .long 808125243
  403. .long 514020693
  404. .long 861173761
  405. .long 859000219
  406. .long 823158129
  407. .long 871826232
  408. .long 871183196
  409. .long 839030530
  410. .long 867690638
  411. .long 840440923
  412. .long 868033274
  413. .long 855856030
  414. .long 865094453
  415. .long 860418487
  416. .long 866225006
  417. .long 866458226
  418. .long 865124659
  419. .long 864837702
  420. .long 811742505
  421. .long 869432099
  422. .long 864584201
  423. .long 864183978
  424. .long 844810573
  425. .long 869245699
  426. .long 859556409
  427. .long 870675446
  428. .long 814190139
  429. .long 870686941
  430. .long 861800510
  431. .long 855649163
  432. .long 869347119
  433. .long 864252033
  434. .long 867276215
  435. .long 868189817
  436. .long 849541095
  437. .long 866633177
  438. .long 843967686
  439. .long 857522493
  440. .long 862339487
  441. .long 850054662
  442. .long 864048556
  443. .long 868027089
  444. .long 848093931
  445. .long 865355299
  446. .long 848111485
  447. .long 865557362
  448. .long 870297525
  449. .long 863416216
  450. .long 869675693
  451. .long 865888071
  452. .long 825332584
  453. .long 843309506
  454. .long 870885636
  455. .long 869119784
  456. .long 865466648
  457. .long 867459244
  458. .long 861192764
  459. .long 871247716
  460. .long 864927982
  461. .long 869195129
  462. .long 864849564
  463. .long 840005936
  464. .long 852579258
  465. .long 860852782
  466. .long 869711141
  467. .long 862506141
  468. .long 837959274
  469. .type D_table,@object
  470. .size D_table,384
  471. .align 4
  472. ZERON:
  473. .long 0
  474. .long 2147483648
  475. .type ZERON,@object
  476. .size ZERON,8
  477. .align 4
  478. INFS:
  479. .long 2139095040
  480. .type INFS,@object
  481. .size INFS,4
  482. .align 4
  483. NEG_INFS:
  484. .long 4286578688
  485. .type NEG_INFS,@object
  486. .size NEG_INFS,4
  487. .data
  488. .section .note.GNU-stack, ""
  489. // -- Begin DWARF2 SEGMENT .eh_frame
  490. .section .eh_frame,"a",@progbits
  491. .eh_frame_seg:
  492. .align 1
  493. # End