fp2e_mulxi.s 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. # File: dclxvi-20130329/fp2e_mulxi.s
  2. # Author: Ruben Niederhagen, Peter Schwabe
  3. # Public Domain
  4. # qhasm: enter fp2e_mulxi_qhasm
  5. .text
  6. .p2align 5
  7. .globl _fp2e_mulxi_qhasm
  8. .globl fp2e_mulxi_qhasm
  9. _fp2e_mulxi_qhasm:
  10. fp2e_mulxi_qhasm:
  11. mov %rsp,%r11
  12. and $31,%r11
  13. add $0,%r11
  14. sub %r11,%rsp
  15. # qhasm: int64 0rop
  16. # qhasm: int64 0op
  17. # qhasm: input 0rop
  18. # qhasm: input 0op
  19. # qhasm: int6464 0r0
  20. # qhasm: int6464 0r1
  21. # qhasm: int6464 0r2
  22. # qhasm: int6464 0r3
  23. # qhasm: int6464 0r4
  24. # qhasm: int6464 0r5
  25. # qhasm: int6464 0r6
  26. # qhasm: int6464 0r7
  27. # qhasm: int6464 0r8
  28. # qhasm: int6464 0r9
  29. # qhasm: int6464 0r10
  30. # qhasm: int6464 0r11
  31. # qhasm: int6464 0t0
  32. # qhasm: int6464 0t1
  33. # qhasm: int6464 0t2
  34. # qhasm: int6464 0t3
  35. # qhasm: 0r0 = *(int128 *)(0op + 0)
  36. # asm 1: movdqa 0(<0op=int64#2),>0r0=int6464#1
  37. # asm 2: movdqa 0(<0op=%rsi),>0r0=%xmm0
  38. movdqa 0(%rsi),%xmm0
  39. # qhasm: 0r1 = *(int128 *)(0op + 16)
  40. # asm 1: movdqa 16(<0op=int64#2),>0r1=int6464#2
  41. # asm 2: movdqa 16(<0op=%rsi),>0r1=%xmm1
  42. movdqa 16(%rsi),%xmm1
  43. # qhasm: 0r2 = *(int128 *)(0op + 32)
  44. # asm 1: movdqa 32(<0op=int64#2),>0r2=int6464#3
  45. # asm 2: movdqa 32(<0op=%rsi),>0r2=%xmm2
  46. movdqa 32(%rsi),%xmm2
  47. # qhasm: 0r3 = *(int128 *)(0op + 48)
  48. # asm 1: movdqa 48(<0op=int64#2),>0r3=int6464#4
  49. # asm 2: movdqa 48(<0op=%rsi),>0r3=%xmm3
  50. movdqa 48(%rsi),%xmm3
  51. # qhasm: 0r4 = *(int128 *)(0op + 64)
  52. # asm 1: movdqa 64(<0op=int64#2),>0r4=int6464#5
  53. # asm 2: movdqa 64(<0op=%rsi),>0r4=%xmm4
  54. movdqa 64(%rsi),%xmm4
  55. # qhasm: 0r5 = *(int128 *)(0op + 80)
  56. # asm 1: movdqa 80(<0op=int64#2),>0r5=int6464#6
  57. # asm 2: movdqa 80(<0op=%rsi),>0r5=%xmm5
  58. movdqa 80(%rsi),%xmm5
  59. # qhasm: 0r6 = *(int128 *)(0op + 96)
  60. # asm 1: movdqa 96(<0op=int64#2),>0r6=int6464#7
  61. # asm 2: movdqa 96(<0op=%rsi),>0r6=%xmm6
  62. movdqa 96(%rsi),%xmm6
  63. # qhasm: 0r7 = *(int128 *)(0op + 112)
  64. # asm 1: movdqa 112(<0op=int64#2),>0r7=int6464#8
  65. # asm 2: movdqa 112(<0op=%rsi),>0r7=%xmm7
  66. movdqa 112(%rsi),%xmm7
  67. # qhasm: 0r8 = *(int128 *)(0op + 128)
  68. # asm 1: movdqa 128(<0op=int64#2),>0r8=int6464#9
  69. # asm 2: movdqa 128(<0op=%rsi),>0r8=%xmm8
  70. movdqa 128(%rsi),%xmm8
  71. # qhasm: 0r9 = *(int128 *)(0op + 144)
  72. # asm 1: movdqa 144(<0op=int64#2),>0r9=int6464#10
  73. # asm 2: movdqa 144(<0op=%rsi),>0r9=%xmm9
  74. movdqa 144(%rsi),%xmm9
  75. # qhasm: 0r10 = *(int128 *)(0op + 160)
  76. # asm 1: movdqa 160(<0op=int64#2),>0r10=int6464#11
  77. # asm 2: movdqa 160(<0op=%rsi),>0r10=%xmm10
  78. movdqa 160(%rsi),%xmm10
  79. # qhasm: 0r11 = *(int128 *)(0op + 176)
  80. # asm 1: movdqa 176(<0op=int64#2),>0r11=int6464#12
  81. # asm 2: movdqa 176(<0op=%rsi),>0r11=%xmm11
  82. movdqa 176(%rsi),%xmm11
  83. # qhasm: int6464 1t0
  84. # qhasm: int6464 1t1
  85. # qhasm: int6464 1t2
  86. # qhasm: int6464 1t3
  87. # qhasm: int6464 0t4
  88. # qhasm: int6464 0t5
  89. # qhasm: int6464 0t6
  90. # qhasm: int6464 0t7
  91. # qhasm: int6464 0t8
  92. # qhasm: int6464 0t9
  93. # qhasm: int6464 0t10
  94. # qhasm: int6464 0t11
  95. # qhasm: 1t0 = 0r0
  96. # asm 1: movdqa <0r0=int6464#1,>1t0=int6464#13
  97. # asm 2: movdqa <0r0=%xmm0,>1t0=%xmm12
  98. movdqa %xmm0,%xmm12
  99. # qhasm: float6464 0r0 *= THREE_MINUSONE
  100. # asm 1: mulpd THREE_MINUSONE,<0r0=int6464#1
  101. # asm 2: mulpd THREE_MINUSONE,<0r0=%xmm0
  102. mulpd THREE_MINUSONE,%xmm0
  103. # qhasm: float6464 1t0 *= ONE_THREE
  104. # asm 1: mulpd ONE_THREE,<1t0=int6464#13
  105. # asm 2: mulpd ONE_THREE,<1t0=%xmm12
  106. mulpd ONE_THREE,%xmm12
  107. # qhasm: float6464 0r0[0] += 0r0[1];0r0[1] = 1t0[0] + 1t0[1]
  108. # asm 1: haddpd <1t0=int6464#13,<0r0=int6464#1
  109. # asm 2: haddpd <1t0=%xmm12,<0r0=%xmm0
  110. haddpd %xmm12,%xmm0
  111. # qhasm: 1t1 = 0r1
  112. # asm 1: movdqa <0r1=int6464#2,>1t1=int6464#13
  113. # asm 2: movdqa <0r1=%xmm1,>1t1=%xmm12
  114. movdqa %xmm1,%xmm12
  115. # qhasm: float6464 0r1 *= THREE_MINUSONE
  116. # asm 1: mulpd THREE_MINUSONE,<0r1=int6464#2
  117. # asm 2: mulpd THREE_MINUSONE,<0r1=%xmm1
  118. mulpd THREE_MINUSONE,%xmm1
  119. # qhasm: float6464 1t1 *= ONE_THREE
  120. # asm 1: mulpd ONE_THREE,<1t1=int6464#13
  121. # asm 2: mulpd ONE_THREE,<1t1=%xmm12
  122. mulpd ONE_THREE,%xmm12
  123. # qhasm: float6464 0r1[0] += 0r1[1];0r1[1] = 1t1[0] + 1t1[1]
  124. # asm 1: haddpd <1t1=int6464#13,<0r1=int6464#2
  125. # asm 2: haddpd <1t1=%xmm12,<0r1=%xmm1
  126. haddpd %xmm12,%xmm1
  127. # qhasm: 1t2 = 0r2
  128. # asm 1: movdqa <0r2=int6464#3,>1t2=int6464#13
  129. # asm 2: movdqa <0r2=%xmm2,>1t2=%xmm12
  130. movdqa %xmm2,%xmm12
  131. # qhasm: float6464 0r2 *= THREE_MINUSONE
  132. # asm 1: mulpd THREE_MINUSONE,<0r2=int6464#3
  133. # asm 2: mulpd THREE_MINUSONE,<0r2=%xmm2
  134. mulpd THREE_MINUSONE,%xmm2
  135. # qhasm: float6464 1t2 *= ONE_THREE
  136. # asm 1: mulpd ONE_THREE,<1t2=int6464#13
  137. # asm 2: mulpd ONE_THREE,<1t2=%xmm12
  138. mulpd ONE_THREE,%xmm12
  139. # qhasm: float6464 0r2[0] += 0r2[1];0r2[1] = 1t2[0] + 1t2[1]
  140. # asm 1: haddpd <1t2=int6464#13,<0r2=int6464#3
  141. # asm 2: haddpd <1t2=%xmm12,<0r2=%xmm2
  142. haddpd %xmm12,%xmm2
  143. # qhasm: 1t3 = 0r3
  144. # asm 1: movdqa <0r3=int6464#4,>1t3=int6464#13
  145. # asm 2: movdqa <0r3=%xmm3,>1t3=%xmm12
  146. movdqa %xmm3,%xmm12
  147. # qhasm: float6464 0r3 *= THREE_MINUSONE
  148. # asm 1: mulpd THREE_MINUSONE,<0r3=int6464#4
  149. # asm 2: mulpd THREE_MINUSONE,<0r3=%xmm3
  150. mulpd THREE_MINUSONE,%xmm3
  151. # qhasm: float6464 1t3 *= ONE_THREE
  152. # asm 1: mulpd ONE_THREE,<1t3=int6464#13
  153. # asm 2: mulpd ONE_THREE,<1t3=%xmm12
  154. mulpd ONE_THREE,%xmm12
  155. # qhasm: float6464 0r3[0] += 0r3[1];0r3[1] = 1t3[0] + 1t3[1]
  156. # asm 1: haddpd <1t3=int6464#13,<0r3=int6464#4
  157. # asm 2: haddpd <1t3=%xmm12,<0r3=%xmm3
  158. haddpd %xmm12,%xmm3
  159. # qhasm: 0t4 = 0r4
  160. # asm 1: movdqa <0r4=int6464#5,>0t4=int6464#13
  161. # asm 2: movdqa <0r4=%xmm4,>0t4=%xmm12
  162. movdqa %xmm4,%xmm12
  163. # qhasm: float6464 0r4 *= THREE_MINUSONE
  164. # asm 1: mulpd THREE_MINUSONE,<0r4=int6464#5
  165. # asm 2: mulpd THREE_MINUSONE,<0r4=%xmm4
  166. mulpd THREE_MINUSONE,%xmm4
  167. # qhasm: float6464 0t4 *= ONE_THREE
  168. # asm 1: mulpd ONE_THREE,<0t4=int6464#13
  169. # asm 2: mulpd ONE_THREE,<0t4=%xmm12
  170. mulpd ONE_THREE,%xmm12
  171. # qhasm: float6464 0r4[0] += 0r4[1];0r4[1] = 0t4[0] + 0t4[1]
  172. # asm 1: haddpd <0t4=int6464#13,<0r4=int6464#5
  173. # asm 2: haddpd <0t4=%xmm12,<0r4=%xmm4
  174. haddpd %xmm12,%xmm4
  175. # qhasm: 0t5 = 0r5
  176. # asm 1: movdqa <0r5=int6464#6,>0t5=int6464#13
  177. # asm 2: movdqa <0r5=%xmm5,>0t5=%xmm12
  178. movdqa %xmm5,%xmm12
  179. # qhasm: float6464 0r5 *= THREE_MINUSONE
  180. # asm 1: mulpd THREE_MINUSONE,<0r5=int6464#6
  181. # asm 2: mulpd THREE_MINUSONE,<0r5=%xmm5
  182. mulpd THREE_MINUSONE,%xmm5
  183. # qhasm: float6464 0t5 *= ONE_THREE
  184. # asm 1: mulpd ONE_THREE,<0t5=int6464#13
  185. # asm 2: mulpd ONE_THREE,<0t5=%xmm12
  186. mulpd ONE_THREE,%xmm12
  187. # qhasm: float6464 0r5[0] += 0r5[1];0r5[1] = 0t5[0] + 0t5[1]
  188. # asm 1: haddpd <0t5=int6464#13,<0r5=int6464#6
  189. # asm 2: haddpd <0t5=%xmm12,<0r5=%xmm5
  190. haddpd %xmm12,%xmm5
  191. # qhasm: 0t6 = 0r6
  192. # asm 1: movdqa <0r6=int6464#7,>0t6=int6464#13
  193. # asm 2: movdqa <0r6=%xmm6,>0t6=%xmm12
  194. movdqa %xmm6,%xmm12
  195. # qhasm: float6464 0r6 *= THREE_MINUSONE
  196. # asm 1: mulpd THREE_MINUSONE,<0r6=int6464#7
  197. # asm 2: mulpd THREE_MINUSONE,<0r6=%xmm6
  198. mulpd THREE_MINUSONE,%xmm6
  199. # qhasm: float6464 0t6 *= ONE_THREE
  200. # asm 1: mulpd ONE_THREE,<0t6=int6464#13
  201. # asm 2: mulpd ONE_THREE,<0t6=%xmm12
  202. mulpd ONE_THREE,%xmm12
  203. # qhasm: float6464 0r6[0] += 0r6[1];0r6[1] = 0t6[0] + 0t6[1]
  204. # asm 1: haddpd <0t6=int6464#13,<0r6=int6464#7
  205. # asm 2: haddpd <0t6=%xmm12,<0r6=%xmm6
  206. haddpd %xmm12,%xmm6
  207. # qhasm: 0t7 = 0r7
  208. # asm 1: movdqa <0r7=int6464#8,>0t7=int6464#13
  209. # asm 2: movdqa <0r7=%xmm7,>0t7=%xmm12
  210. movdqa %xmm7,%xmm12
  211. # qhasm: float6464 0r7 *= THREE_MINUSONE
  212. # asm 1: mulpd THREE_MINUSONE,<0r7=int6464#8
  213. # asm 2: mulpd THREE_MINUSONE,<0r7=%xmm7
  214. mulpd THREE_MINUSONE,%xmm7
  215. # qhasm: float6464 0t7 *= ONE_THREE
  216. # asm 1: mulpd ONE_THREE,<0t7=int6464#13
  217. # asm 2: mulpd ONE_THREE,<0t7=%xmm12
  218. mulpd ONE_THREE,%xmm12
  219. # qhasm: float6464 0r7[0] += 0r7[1];0r7[1] = 0t7[0] + 0t7[1]
  220. # asm 1: haddpd <0t7=int6464#13,<0r7=int6464#8
  221. # asm 2: haddpd <0t7=%xmm12,<0r7=%xmm7
  222. haddpd %xmm12,%xmm7
  223. # qhasm: 0t8 = 0r8
  224. # asm 1: movdqa <0r8=int6464#9,>0t8=int6464#13
  225. # asm 2: movdqa <0r8=%xmm8,>0t8=%xmm12
  226. movdqa %xmm8,%xmm12
  227. # qhasm: float6464 0r8 *= THREE_MINUSONE
  228. # asm 1: mulpd THREE_MINUSONE,<0r8=int6464#9
  229. # asm 2: mulpd THREE_MINUSONE,<0r8=%xmm8
  230. mulpd THREE_MINUSONE,%xmm8
  231. # qhasm: float6464 0t8 *= ONE_THREE
  232. # asm 1: mulpd ONE_THREE,<0t8=int6464#13
  233. # asm 2: mulpd ONE_THREE,<0t8=%xmm12
  234. mulpd ONE_THREE,%xmm12
  235. # qhasm: float6464 0r8[0] += 0r8[1];0r8[1] = 0t8[0] + 0t8[1]
  236. # asm 1: haddpd <0t8=int6464#13,<0r8=int6464#9
  237. # asm 2: haddpd <0t8=%xmm12,<0r8=%xmm8
  238. haddpd %xmm12,%xmm8
  239. # qhasm: 0t9 = 0r9
  240. # asm 1: movdqa <0r9=int6464#10,>0t9=int6464#13
  241. # asm 2: movdqa <0r9=%xmm9,>0t9=%xmm12
  242. movdqa %xmm9,%xmm12
  243. # qhasm: float6464 0r9 *= THREE_MINUSONE
  244. # asm 1: mulpd THREE_MINUSONE,<0r9=int6464#10
  245. # asm 2: mulpd THREE_MINUSONE,<0r9=%xmm9
  246. mulpd THREE_MINUSONE,%xmm9
  247. # qhasm: float6464 0t9 *= ONE_THREE
  248. # asm 1: mulpd ONE_THREE,<0t9=int6464#13
  249. # asm 2: mulpd ONE_THREE,<0t9=%xmm12
  250. mulpd ONE_THREE,%xmm12
  251. # qhasm: float6464 0r9[0] += 0r9[1];0r9[1] = 0t9[0] + 0t9[1]
  252. # asm 1: haddpd <0t9=int6464#13,<0r9=int6464#10
  253. # asm 2: haddpd <0t9=%xmm12,<0r9=%xmm9
  254. haddpd %xmm12,%xmm9
  255. # qhasm: 0t10 = 0r10
  256. # asm 1: movdqa <0r10=int6464#11,>0t10=int6464#13
  257. # asm 2: movdqa <0r10=%xmm10,>0t10=%xmm12
  258. movdqa %xmm10,%xmm12
  259. # qhasm: float6464 0r10 *= THREE_MINUSONE
  260. # asm 1: mulpd THREE_MINUSONE,<0r10=int6464#11
  261. # asm 2: mulpd THREE_MINUSONE,<0r10=%xmm10
  262. mulpd THREE_MINUSONE,%xmm10
  263. # qhasm: float6464 0t10 *= ONE_THREE
  264. # asm 1: mulpd ONE_THREE,<0t10=int6464#13
  265. # asm 2: mulpd ONE_THREE,<0t10=%xmm12
  266. mulpd ONE_THREE,%xmm12
  267. # qhasm: float6464 0r10[0] += 0r10[1];0r10[1] = 0t10[0] + 0t10[1]
  268. # asm 1: haddpd <0t10=int6464#13,<0r10=int6464#11
  269. # asm 2: haddpd <0t10=%xmm12,<0r10=%xmm10
  270. haddpd %xmm12,%xmm10
  271. # qhasm: 0t11 = 0r11
  272. # asm 1: movdqa <0r11=int6464#12,>0t11=int6464#13
  273. # asm 2: movdqa <0r11=%xmm11,>0t11=%xmm12
  274. movdqa %xmm11,%xmm12
  275. # qhasm: float6464 0r11 *= THREE_MINUSONE
  276. # asm 1: mulpd THREE_MINUSONE,<0r11=int6464#12
  277. # asm 2: mulpd THREE_MINUSONE,<0r11=%xmm11
  278. mulpd THREE_MINUSONE,%xmm11
  279. # qhasm: float6464 0t11 *= ONE_THREE
  280. # asm 1: mulpd ONE_THREE,<0t11=int6464#13
  281. # asm 2: mulpd ONE_THREE,<0t11=%xmm12
  282. mulpd ONE_THREE,%xmm12
  283. # qhasm: float6464 0r11[0] += 0r11[1];0r11[1] = 0t11[0] + 0t11[1]
  284. # asm 1: haddpd <0t11=int6464#13,<0r11=int6464#12
  285. # asm 2: haddpd <0t11=%xmm12,<0r11=%xmm11
  286. haddpd %xmm12,%xmm11
  287. # qhasm: *(int128 *)(0rop + 0) = 0r0
  288. # asm 1: movdqa <0r0=int6464#1,0(<0rop=int64#1)
  289. # asm 2: movdqa <0r0=%xmm0,0(<0rop=%rdi)
  290. movdqa %xmm0,0(%rdi)
  291. # qhasm: *(int128 *)(0rop + 16) = 0r1
  292. # asm 1: movdqa <0r1=int6464#2,16(<0rop=int64#1)
  293. # asm 2: movdqa <0r1=%xmm1,16(<0rop=%rdi)
  294. movdqa %xmm1,16(%rdi)
  295. # qhasm: *(int128 *)(0rop + 32) = 0r2
  296. # asm 1: movdqa <0r2=int6464#3,32(<0rop=int64#1)
  297. # asm 2: movdqa <0r2=%xmm2,32(<0rop=%rdi)
  298. movdqa %xmm2,32(%rdi)
  299. # qhasm: *(int128 *)(0rop + 48) = 0r3
  300. # asm 1: movdqa <0r3=int6464#4,48(<0rop=int64#1)
  301. # asm 2: movdqa <0r3=%xmm3,48(<0rop=%rdi)
  302. movdqa %xmm3,48(%rdi)
  303. # qhasm: *(int128 *)(0rop + 64) = 0r4
  304. # asm 1: movdqa <0r4=int6464#5,64(<0rop=int64#1)
  305. # asm 2: movdqa <0r4=%xmm4,64(<0rop=%rdi)
  306. movdqa %xmm4,64(%rdi)
  307. # qhasm: *(int128 *)(0rop + 80) = 0r5
  308. # asm 1: movdqa <0r5=int6464#6,80(<0rop=int64#1)
  309. # asm 2: movdqa <0r5=%xmm5,80(<0rop=%rdi)
  310. movdqa %xmm5,80(%rdi)
  311. # qhasm: *(int128 *)(0rop + 96) = 0r6
  312. # asm 1: movdqa <0r6=int6464#7,96(<0rop=int64#1)
  313. # asm 2: movdqa <0r6=%xmm6,96(<0rop=%rdi)
  314. movdqa %xmm6,96(%rdi)
  315. # qhasm: *(int128 *)(0rop + 112) = 0r7
  316. # asm 1: movdqa <0r7=int6464#8,112(<0rop=int64#1)
  317. # asm 2: movdqa <0r7=%xmm7,112(<0rop=%rdi)
  318. movdqa %xmm7,112(%rdi)
  319. # qhasm: *(int128 *)(0rop + 128) = 0r8
  320. # asm 1: movdqa <0r8=int6464#9,128(<0rop=int64#1)
  321. # asm 2: movdqa <0r8=%xmm8,128(<0rop=%rdi)
  322. movdqa %xmm8,128(%rdi)
  323. # qhasm: *(int128 *)(0rop + 144) = 0r9
  324. # asm 1: movdqa <0r9=int6464#10,144(<0rop=int64#1)
  325. # asm 2: movdqa <0r9=%xmm9,144(<0rop=%rdi)
  326. movdqa %xmm9,144(%rdi)
  327. # qhasm: *(int128 *)(0rop + 160) = 0r10
  328. # asm 1: movdqa <0r10=int6464#11,160(<0rop=int64#1)
  329. # asm 2: movdqa <0r10=%xmm10,160(<0rop=%rdi)
  330. movdqa %xmm10,160(%rdi)
  331. # qhasm: *(int128 *)(0rop + 176) = 0r11
  332. # asm 1: movdqa <0r11=int6464#12,176(<0rop=int64#1)
  333. # asm 2: movdqa <0r11=%xmm11,176(<0rop=%rdi)
  334. movdqa %xmm11,176(%rdi)
  335. # qhasm: leave
  336. add %r11,%rsp
  337. mov %rdi,%rax
  338. mov %rsi,%rdx
  339. ret