fp2e_short_coeffred.s 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626
  1. # File: dclxvi-20130329/fp2e_short_coeffred.s
  2. # Author: Ruben Niederhagen, Peter Schwabe
  3. # Public Domain
  4. # qhasm: enter fp2e_short_coeffred_qhasm
  5. .text
  6. .p2align 5
  7. .globl _fp2e_short_coeffred_qhasm
  8. .globl fp2e_short_coeffred_qhasm
  9. _fp2e_short_coeffred_qhasm:
  10. fp2e_short_coeffred_qhasm:
  11. mov %rsp,%r11
  12. and $31,%r11
  13. add $0,%r11
  14. sub %r11,%rsp
  15. # qhasm: int64 rop
  16. # qhasm: input rop
  17. # qhasm: int6464 0r0
  18. # qhasm: int6464 0r1
  19. # qhasm: int6464 0r2
  20. # qhasm: int6464 0r3
  21. # qhasm: int6464 0r4
  22. # qhasm: int6464 0r5
  23. # qhasm: int6464 0r6
  24. # qhasm: int6464 0r7
  25. # qhasm: int6464 0r8
  26. # qhasm: int6464 0r9
  27. # qhasm: int6464 0r10
  28. # qhasm: int6464 0r11
  29. # qhasm: 0r0 = *(int128 *)(rop + 0)
  30. # asm 1: movdqa 0(<rop=int64#1),>0r0=int6464#1
  31. # asm 2: movdqa 0(<rop=%rdi),>0r0=%xmm0
  32. movdqa 0(%rdi),%xmm0
  33. # qhasm: 0r1 = *(int128 *)(rop + 16)
  34. # asm 1: movdqa 16(<rop=int64#1),>0r1=int6464#2
  35. # asm 2: movdqa 16(<rop=%rdi),>0r1=%xmm1
  36. movdqa 16(%rdi),%xmm1
  37. # qhasm: 0r2 = *(int128 *)(rop + 32)
  38. # asm 1: movdqa 32(<rop=int64#1),>0r2=int6464#3
  39. # asm 2: movdqa 32(<rop=%rdi),>0r2=%xmm2
  40. movdqa 32(%rdi),%xmm2
  41. # qhasm: 0r3 = *(int128 *)(rop + 48)
  42. # asm 1: movdqa 48(<rop=int64#1),>0r3=int6464#4
  43. # asm 2: movdqa 48(<rop=%rdi),>0r3=%xmm3
  44. movdqa 48(%rdi),%xmm3
  45. # qhasm: 0r4 = *(int128 *)(rop + 64)
  46. # asm 1: movdqa 64(<rop=int64#1),>0r4=int6464#5
  47. # asm 2: movdqa 64(<rop=%rdi),>0r4=%xmm4
  48. movdqa 64(%rdi),%xmm4
  49. # qhasm: 0r5 = *(int128 *)(rop + 80)
  50. # asm 1: movdqa 80(<rop=int64#1),>0r5=int6464#6
  51. # asm 2: movdqa 80(<rop=%rdi),>0r5=%xmm5
  52. movdqa 80(%rdi),%xmm5
  53. # qhasm: 0r6 = *(int128 *)(rop + 96)
  54. # asm 1: movdqa 96(<rop=int64#1),>0r6=int6464#7
  55. # asm 2: movdqa 96(<rop=%rdi),>0r6=%xmm6
  56. movdqa 96(%rdi),%xmm6
  57. # qhasm: 0r7 = *(int128 *)(rop + 112)
  58. # asm 1: movdqa 112(<rop=int64#1),>0r7=int6464#8
  59. # asm 2: movdqa 112(<rop=%rdi),>0r7=%xmm7
  60. movdqa 112(%rdi),%xmm7
  61. # qhasm: 0r8 = *(int128 *)(rop + 128)
  62. # asm 1: movdqa 128(<rop=int64#1),>0r8=int6464#9
  63. # asm 2: movdqa 128(<rop=%rdi),>0r8=%xmm8
  64. movdqa 128(%rdi),%xmm8
  65. # qhasm: 0r9 = *(int128 *)(rop + 144)
  66. # asm 1: movdqa 144(<rop=int64#1),>0r9=int6464#10
  67. # asm 2: movdqa 144(<rop=%rdi),>0r9=%xmm9
  68. movdqa 144(%rdi),%xmm9
  69. # qhasm: 0r10 = *(int128 *)(rop + 160)
  70. # asm 1: movdqa 160(<rop=int64#1),>0r10=int6464#11
  71. # asm 2: movdqa 160(<rop=%rdi),>0r10=%xmm10
  72. movdqa 160(%rdi),%xmm10
  73. # qhasm: 0r11 = *(int128 *)(rop + 176)
  74. # asm 1: movdqa 176(<rop=int64#1),>0r11=int6464#12
  75. # asm 2: movdqa 176(<rop=%rdi),>0r11=%xmm11
  76. movdqa 176(%rdi),%xmm11
  77. # qhasm: int6464 0round
  78. # qhasm: int6464 0carry
  79. # qhasm: int6464 0t6
  80. # qhasm: 0round = ROUND_ROUND
  81. # asm 1: movdqa ROUND_ROUND,<0round=int6464#13
  82. # asm 2: movdqa ROUND_ROUND,<0round=%xmm12
  83. movdqa ROUND_ROUND,%xmm12
  84. # qhasm: 0carry = 0r11
  85. # asm 1: movdqa <0r11=int6464#12,>0carry=int6464#14
  86. # asm 2: movdqa <0r11=%xmm11,>0carry=%xmm13
  87. movdqa %xmm11,%xmm13
  88. # qhasm: float6464 0carry *= VINV_VINV
  89. # asm 1: mulpd VINV_VINV,<0carry=int6464#14
  90. # asm 2: mulpd VINV_VINV,<0carry=%xmm13
  91. mulpd VINV_VINV,%xmm13
  92. # qhasm: float6464 0carry += 0round
  93. # asm 1: addpd <0round=int6464#13,<0carry=int6464#14
  94. # asm 2: addpd <0round=%xmm12,<0carry=%xmm13
  95. addpd %xmm12,%xmm13
  96. # qhasm: float6464 0carry -= 0round
  97. # asm 1: subpd <0round=int6464#13,<0carry=int6464#14
  98. # asm 2: subpd <0round=%xmm12,<0carry=%xmm13
  99. subpd %xmm12,%xmm13
  100. # qhasm: float6464 0r0 -= 0carry
  101. # asm 1: subpd <0carry=int6464#14,<0r0=int6464#1
  102. # asm 2: subpd <0carry=%xmm13,<0r0=%xmm0
  103. subpd %xmm13,%xmm0
  104. # qhasm: float6464 0r3 -= 0carry
  105. # asm 1: subpd <0carry=int6464#14,<0r3=int6464#4
  106. # asm 2: subpd <0carry=%xmm13,<0r3=%xmm3
  107. subpd %xmm13,%xmm3
  108. # qhasm: 0t6 = 0carry
  109. # asm 1: movdqa <0carry=int6464#14,>0t6=int6464#15
  110. # asm 2: movdqa <0carry=%xmm13,>0t6=%xmm14
  111. movdqa %xmm13,%xmm14
  112. # qhasm: float6464 0t6 *= FOUR_FOUR
  113. # asm 1: mulpd FOUR_FOUR,<0t6=int6464#15
  114. # asm 2: mulpd FOUR_FOUR,<0t6=%xmm14
  115. mulpd FOUR_FOUR,%xmm14
  116. # qhasm: float6464 0r6 -= 0t6
  117. # asm 1: subpd <0t6=int6464#15,<0r6=int6464#7
  118. # asm 2: subpd <0t6=%xmm14,<0r6=%xmm6
  119. subpd %xmm14,%xmm6
  120. # qhasm: float6464 0r9 -= 0carry
  121. # asm 1: subpd <0carry=int6464#14,<0r9=int6464#10
  122. # asm 2: subpd <0carry=%xmm13,<0r9=%xmm9
  123. subpd %xmm13,%xmm9
  124. # qhasm: float6464 0carry *= V_V
  125. # asm 1: mulpd V_V,<0carry=int6464#14
  126. # asm 2: mulpd V_V,<0carry=%xmm13
  127. mulpd V_V,%xmm13
  128. # qhasm: float6464 0r11 -= 0carry
  129. # asm 1: subpd <0carry=int6464#14,<0r11=int6464#12
  130. # asm 2: subpd <0carry=%xmm13,<0r11=%xmm11
  131. subpd %xmm13,%xmm11
  132. # qhasm: 0carry = 0r1
  133. # asm 1: movdqa <0r1=int6464#2,>0carry=int6464#14
  134. # asm 2: movdqa <0r1=%xmm1,>0carry=%xmm13
  135. movdqa %xmm1,%xmm13
  136. # qhasm: float6464 0carry *= VINV_VINV
  137. # asm 1: mulpd VINV_VINV,<0carry=int6464#14
  138. # asm 2: mulpd VINV_VINV,<0carry=%xmm13
  139. mulpd VINV_VINV,%xmm13
  140. # qhasm: float6464 0carry += 0round
  141. # asm 1: addpd <0round=int6464#13,<0carry=int6464#14
  142. # asm 2: addpd <0round=%xmm12,<0carry=%xmm13
  143. addpd %xmm12,%xmm13
  144. # qhasm: float6464 0carry -= 0round
  145. # asm 1: subpd <0round=int6464#13,<0carry=int6464#14
  146. # asm 2: subpd <0round=%xmm12,<0carry=%xmm13
  147. subpd %xmm12,%xmm13
  148. # qhasm: float6464 0r2 += 0carry
  149. # asm 1: addpd <0carry=int6464#14,<0r2=int6464#3
  150. # asm 2: addpd <0carry=%xmm13,<0r2=%xmm2
  151. addpd %xmm13,%xmm2
  152. # qhasm: float6464 0carry *= V_V
  153. # asm 1: mulpd V_V,<0carry=int6464#14
  154. # asm 2: mulpd V_V,<0carry=%xmm13
  155. mulpd V_V,%xmm13
  156. # qhasm: float6464 0r1 -= 0carry
  157. # asm 1: subpd <0carry=int6464#14,<0r1=int6464#2
  158. # asm 2: subpd <0carry=%xmm13,<0r1=%xmm1
  159. subpd %xmm13,%xmm1
  160. # qhasm: 0carry = 0r3
  161. # asm 1: movdqa <0r3=int6464#4,>0carry=int6464#14
  162. # asm 2: movdqa <0r3=%xmm3,>0carry=%xmm13
  163. movdqa %xmm3,%xmm13
  164. # qhasm: float6464 0carry *= VINV_VINV
  165. # asm 1: mulpd VINV_VINV,<0carry=int6464#14
  166. # asm 2: mulpd VINV_VINV,<0carry=%xmm13
  167. mulpd VINV_VINV,%xmm13
  168. # qhasm: float6464 0carry += 0round
  169. # asm 1: addpd <0round=int6464#13,<0carry=int6464#14
  170. # asm 2: addpd <0round=%xmm12,<0carry=%xmm13
  171. addpd %xmm12,%xmm13
  172. # qhasm: float6464 0carry -= 0round
  173. # asm 1: subpd <0round=int6464#13,<0carry=int6464#14
  174. # asm 2: subpd <0round=%xmm12,<0carry=%xmm13
  175. subpd %xmm12,%xmm13
  176. # qhasm: float6464 0r4 += 0carry
  177. # asm 1: addpd <0carry=int6464#14,<0r4=int6464#5
  178. # asm 2: addpd <0carry=%xmm13,<0r4=%xmm4
  179. addpd %xmm13,%xmm4
  180. # qhasm: float6464 0carry *= V_V
  181. # asm 1: mulpd V_V,<0carry=int6464#14
  182. # asm 2: mulpd V_V,<0carry=%xmm13
  183. mulpd V_V,%xmm13
  184. # qhasm: float6464 0r3 -= 0carry
  185. # asm 1: subpd <0carry=int6464#14,<0r3=int6464#4
  186. # asm 2: subpd <0carry=%xmm13,<0r3=%xmm3
  187. subpd %xmm13,%xmm3
  188. # qhasm: 0carry = 0r5
  189. # asm 1: movdqa <0r5=int6464#6,>0carry=int6464#14
  190. # asm 2: movdqa <0r5=%xmm5,>0carry=%xmm13
  191. movdqa %xmm5,%xmm13
  192. # qhasm: float6464 0carry *= VINV_VINV
  193. # asm 1: mulpd VINV_VINV,<0carry=int6464#14
  194. # asm 2: mulpd VINV_VINV,<0carry=%xmm13
  195. mulpd VINV_VINV,%xmm13
  196. # qhasm: float6464 0carry += 0round
  197. # asm 1: addpd <0round=int6464#13,<0carry=int6464#14
  198. # asm 2: addpd <0round=%xmm12,<0carry=%xmm13
  199. addpd %xmm12,%xmm13
  200. # qhasm: float6464 0carry -= 0round
  201. # asm 1: subpd <0round=int6464#13,<0carry=int6464#14
  202. # asm 2: subpd <0round=%xmm12,<0carry=%xmm13
  203. subpd %xmm12,%xmm13
  204. # qhasm: float6464 0r6 += 0carry
  205. # asm 1: addpd <0carry=int6464#14,<0r6=int6464#7
  206. # asm 2: addpd <0carry=%xmm13,<0r6=%xmm6
  207. addpd %xmm13,%xmm6
  208. # qhasm: float6464 0carry *= V_V
  209. # asm 1: mulpd V_V,<0carry=int6464#14
  210. # asm 2: mulpd V_V,<0carry=%xmm13
  211. mulpd V_V,%xmm13
  212. # qhasm: float6464 0r5 -= 0carry
  213. # asm 1: subpd <0carry=int6464#14,<0r5=int6464#6
  214. # asm 2: subpd <0carry=%xmm13,<0r5=%xmm5
  215. subpd %xmm13,%xmm5
  216. # qhasm: 0carry = 0r7
  217. # asm 1: movdqa <0r7=int6464#8,>0carry=int6464#14
  218. # asm 2: movdqa <0r7=%xmm7,>0carry=%xmm13
  219. movdqa %xmm7,%xmm13
  220. # qhasm: float6464 0carry *= VINV_VINV
  221. # asm 1: mulpd VINV_VINV,<0carry=int6464#14
  222. # asm 2: mulpd VINV_VINV,<0carry=%xmm13
  223. mulpd VINV_VINV,%xmm13
  224. # qhasm: float6464 0carry += 0round
  225. # asm 1: addpd <0round=int6464#13,<0carry=int6464#14
  226. # asm 2: addpd <0round=%xmm12,<0carry=%xmm13
  227. addpd %xmm12,%xmm13
  228. # qhasm: float6464 0carry -= 0round
  229. # asm 1: subpd <0round=int6464#13,<0carry=int6464#14
  230. # asm 2: subpd <0round=%xmm12,<0carry=%xmm13
  231. subpd %xmm12,%xmm13
  232. # qhasm: float6464 0r8 += 0carry
  233. # asm 1: addpd <0carry=int6464#14,<0r8=int6464#9
  234. # asm 2: addpd <0carry=%xmm13,<0r8=%xmm8
  235. addpd %xmm13,%xmm8
  236. # qhasm: float6464 0carry *= V_V
  237. # asm 1: mulpd V_V,<0carry=int6464#14
  238. # asm 2: mulpd V_V,<0carry=%xmm13
  239. mulpd V_V,%xmm13
  240. # qhasm: float6464 0r7 -= 0carry
  241. # asm 1: subpd <0carry=int6464#14,<0r7=int6464#8
  242. # asm 2: subpd <0carry=%xmm13,<0r7=%xmm7
  243. subpd %xmm13,%xmm7
  244. # qhasm: 0carry = 0r9
  245. # asm 1: movdqa <0r9=int6464#10,>0carry=int6464#14
  246. # asm 2: movdqa <0r9=%xmm9,>0carry=%xmm13
  247. movdqa %xmm9,%xmm13
  248. # qhasm: float6464 0carry *= VINV_VINV
  249. # asm 1: mulpd VINV_VINV,<0carry=int6464#14
  250. # asm 2: mulpd VINV_VINV,<0carry=%xmm13
  251. mulpd VINV_VINV,%xmm13
  252. # qhasm: float6464 0carry += 0round
  253. # asm 1: addpd <0round=int6464#13,<0carry=int6464#14
  254. # asm 2: addpd <0round=%xmm12,<0carry=%xmm13
  255. addpd %xmm12,%xmm13
  256. # qhasm: float6464 0carry -= 0round
  257. # asm 1: subpd <0round=int6464#13,<0carry=int6464#14
  258. # asm 2: subpd <0round=%xmm12,<0carry=%xmm13
  259. subpd %xmm12,%xmm13
  260. # qhasm: float6464 0r10 += 0carry
  261. # asm 1: addpd <0carry=int6464#14,<0r10=int6464#11
  262. # asm 2: addpd <0carry=%xmm13,<0r10=%xmm10
  263. addpd %xmm13,%xmm10
  264. # qhasm: float6464 0carry *= V_V
  265. # asm 1: mulpd V_V,<0carry=int6464#14
  266. # asm 2: mulpd V_V,<0carry=%xmm13
  267. mulpd V_V,%xmm13
  268. # qhasm: float6464 0r9 -= 0carry
  269. # asm 1: subpd <0carry=int6464#14,<0r9=int6464#10
  270. # asm 2: subpd <0carry=%xmm13,<0r9=%xmm9
  271. subpd %xmm13,%xmm9
  272. # qhasm: 0carry = 0r0
  273. # asm 1: movdqa <0r0=int6464#1,>0carry=int6464#14
  274. # asm 2: movdqa <0r0=%xmm0,>0carry=%xmm13
  275. movdqa %xmm0,%xmm13
  276. # qhasm: float6464 0carry *= V6INV_V6INV
  277. # asm 1: mulpd V6INV_V6INV,<0carry=int6464#14
  278. # asm 2: mulpd V6INV_V6INV,<0carry=%xmm13
  279. mulpd V6INV_V6INV,%xmm13
  280. # qhasm: float6464 0carry += 0round
  281. # asm 1: addpd <0round=int6464#13,<0carry=int6464#14
  282. # asm 2: addpd <0round=%xmm12,<0carry=%xmm13
  283. addpd %xmm12,%xmm13
  284. # qhasm: float6464 0carry -= 0round
  285. # asm 1: subpd <0round=int6464#13,<0carry=int6464#14
  286. # asm 2: subpd <0round=%xmm12,<0carry=%xmm13
  287. subpd %xmm12,%xmm13
  288. # qhasm: float6464 0r1 += 0carry
  289. # asm 1: addpd <0carry=int6464#14,<0r1=int6464#2
  290. # asm 2: addpd <0carry=%xmm13,<0r1=%xmm1
  291. addpd %xmm13,%xmm1
  292. # qhasm: float6464 0carry *= V6_V6
  293. # asm 1: mulpd V6_V6,<0carry=int6464#14
  294. # asm 2: mulpd V6_V6,<0carry=%xmm13
  295. mulpd V6_V6,%xmm13
  296. # qhasm: float6464 0r0 -= 0carry
  297. # asm 1: subpd <0carry=int6464#14,<0r0=int6464#1
  298. # asm 2: subpd <0carry=%xmm13,<0r0=%xmm0
  299. subpd %xmm13,%xmm0
  300. # qhasm: 0carry = 0r2
  301. # asm 1: movdqa <0r2=int6464#3,>0carry=int6464#14
  302. # asm 2: movdqa <0r2=%xmm2,>0carry=%xmm13
  303. movdqa %xmm2,%xmm13
  304. # qhasm: float6464 0carry *= VINV_VINV
  305. # asm 1: mulpd VINV_VINV,<0carry=int6464#14
  306. # asm 2: mulpd VINV_VINV,<0carry=%xmm13
  307. mulpd VINV_VINV,%xmm13
  308. # qhasm: float6464 0carry += 0round
  309. # asm 1: addpd <0round=int6464#13,<0carry=int6464#14
  310. # asm 2: addpd <0round=%xmm12,<0carry=%xmm13
  311. addpd %xmm12,%xmm13
  312. # qhasm: float6464 0carry -= 0round
  313. # asm 1: subpd <0round=int6464#13,<0carry=int6464#14
  314. # asm 2: subpd <0round=%xmm12,<0carry=%xmm13
  315. subpd %xmm12,%xmm13
  316. # qhasm: float6464 0r3 += 0carry
  317. # asm 1: addpd <0carry=int6464#14,<0r3=int6464#4
  318. # asm 2: addpd <0carry=%xmm13,<0r3=%xmm3
  319. addpd %xmm13,%xmm3
  320. # qhasm: float6464 0carry *= V_V
  321. # asm 1: mulpd V_V,<0carry=int6464#14
  322. # asm 2: mulpd V_V,<0carry=%xmm13
  323. mulpd V_V,%xmm13
  324. # qhasm: float6464 0r2 -= 0carry
  325. # asm 1: subpd <0carry=int6464#14,<0r2=int6464#3
  326. # asm 2: subpd <0carry=%xmm13,<0r2=%xmm2
  327. subpd %xmm13,%xmm2
  328. # qhasm: 0carry = 0r4
  329. # asm 1: movdqa <0r4=int6464#5,>0carry=int6464#14
  330. # asm 2: movdqa <0r4=%xmm4,>0carry=%xmm13
  331. movdqa %xmm4,%xmm13
  332. # qhasm: float6464 0carry *= VINV_VINV
  333. # asm 1: mulpd VINV_VINV,<0carry=int6464#14
  334. # asm 2: mulpd VINV_VINV,<0carry=%xmm13
  335. mulpd VINV_VINV,%xmm13
  336. # qhasm: float6464 0carry += 0round
  337. # asm 1: addpd <0round=int6464#13,<0carry=int6464#14
  338. # asm 2: addpd <0round=%xmm12,<0carry=%xmm13
  339. addpd %xmm12,%xmm13
  340. # qhasm: float6464 0carry -= 0round
  341. # asm 1: subpd <0round=int6464#13,<0carry=int6464#14
  342. # asm 2: subpd <0round=%xmm12,<0carry=%xmm13
  343. subpd %xmm12,%xmm13
  344. # qhasm: float6464 0r5 += 0carry
  345. # asm 1: addpd <0carry=int6464#14,<0r5=int6464#6
  346. # asm 2: addpd <0carry=%xmm13,<0r5=%xmm5
  347. addpd %xmm13,%xmm5
  348. # qhasm: float6464 0carry *= V_V
  349. # asm 1: mulpd V_V,<0carry=int6464#14
  350. # asm 2: mulpd V_V,<0carry=%xmm13
  351. mulpd V_V,%xmm13
  352. # qhasm: float6464 0r4 -= 0carry
  353. # asm 1: subpd <0carry=int6464#14,<0r4=int6464#5
  354. # asm 2: subpd <0carry=%xmm13,<0r4=%xmm4
  355. subpd %xmm13,%xmm4
  356. # qhasm: 0carry = 0r6
  357. # asm 1: movdqa <0r6=int6464#7,>0carry=int6464#14
  358. # asm 2: movdqa <0r6=%xmm6,>0carry=%xmm13
  359. movdqa %xmm6,%xmm13
  360. # qhasm: float6464 0carry *= V6INV_V6INV
  361. # asm 1: mulpd V6INV_V6INV,<0carry=int6464#14
  362. # asm 2: mulpd V6INV_V6INV,<0carry=%xmm13
  363. mulpd V6INV_V6INV,%xmm13
  364. # qhasm: float6464 0carry += 0round
  365. # asm 1: addpd <0round=int6464#13,<0carry=int6464#14
  366. # asm 2: addpd <0round=%xmm12,<0carry=%xmm13
  367. addpd %xmm12,%xmm13
  368. # qhasm: float6464 0carry -= 0round
  369. # asm 1: subpd <0round=int6464#13,<0carry=int6464#14
  370. # asm 2: subpd <0round=%xmm12,<0carry=%xmm13
  371. subpd %xmm12,%xmm13
  372. # qhasm: float6464 0r7 += 0carry
  373. # asm 1: addpd <0carry=int6464#14,<0r7=int6464#8
  374. # asm 2: addpd <0carry=%xmm13,<0r7=%xmm7
  375. addpd %xmm13,%xmm7
  376. # qhasm: float6464 0carry *= V6_V6
  377. # asm 1: mulpd V6_V6,<0carry=int6464#14
  378. # asm 2: mulpd V6_V6,<0carry=%xmm13
  379. mulpd V6_V6,%xmm13
  380. # qhasm: float6464 0r6 -= 0carry
  381. # asm 1: subpd <0carry=int6464#14,<0r6=int6464#7
  382. # asm 2: subpd <0carry=%xmm13,<0r6=%xmm6
  383. subpd %xmm13,%xmm6
  384. # qhasm: 0carry = 0r8
  385. # asm 1: movdqa <0r8=int6464#9,>0carry=int6464#14
  386. # asm 2: movdqa <0r8=%xmm8,>0carry=%xmm13
  387. movdqa %xmm8,%xmm13
  388. # qhasm: float6464 0carry *= VINV_VINV
  389. # asm 1: mulpd VINV_VINV,<0carry=int6464#14
  390. # asm 2: mulpd VINV_VINV,<0carry=%xmm13
  391. mulpd VINV_VINV,%xmm13
  392. # qhasm: float6464 0carry += 0round
  393. # asm 1: addpd <0round=int6464#13,<0carry=int6464#14
  394. # asm 2: addpd <0round=%xmm12,<0carry=%xmm13
  395. addpd %xmm12,%xmm13
  396. # qhasm: float6464 0carry -= 0round
  397. # asm 1: subpd <0round=int6464#13,<0carry=int6464#14
  398. # asm 2: subpd <0round=%xmm12,<0carry=%xmm13
  399. subpd %xmm12,%xmm13
  400. # qhasm: float6464 0r9 += 0carry
  401. # asm 1: addpd <0carry=int6464#14,<0r9=int6464#10
  402. # asm 2: addpd <0carry=%xmm13,<0r9=%xmm9
  403. addpd %xmm13,%xmm9
  404. # qhasm: float6464 0carry *= V_V
  405. # asm 1: mulpd V_V,<0carry=int6464#14
  406. # asm 2: mulpd V_V,<0carry=%xmm13
  407. mulpd V_V,%xmm13
  408. # qhasm: float6464 0r8 -= 0carry
  409. # asm 1: subpd <0carry=int6464#14,<0r8=int6464#9
  410. # asm 2: subpd <0carry=%xmm13,<0r8=%xmm8
  411. subpd %xmm13,%xmm8
  412. # qhasm: 0carry = 0r10
  413. # asm 1: movdqa <0r10=int6464#11,>0carry=int6464#14
  414. # asm 2: movdqa <0r10=%xmm10,>0carry=%xmm13
  415. movdqa %xmm10,%xmm13
  416. # qhasm: float6464 0carry *= VINV_VINV
  417. # asm 1: mulpd VINV_VINV,<0carry=int6464#14
  418. # asm 2: mulpd VINV_VINV,<0carry=%xmm13
  419. mulpd VINV_VINV,%xmm13
  420. # qhasm: float6464 0carry += 0round
  421. # asm 1: addpd <0round=int6464#13,<0carry=int6464#14
  422. # asm 2: addpd <0round=%xmm12,<0carry=%xmm13
  423. addpd %xmm12,%xmm13
  424. # qhasm: float6464 0carry -= 0round
  425. # asm 1: subpd <0round=int6464#13,<0carry=int6464#14
  426. # asm 2: subpd <0round=%xmm12,<0carry=%xmm13
  427. subpd %xmm12,%xmm13
  428. # qhasm: float6464 0r11 += 0carry
  429. # asm 1: addpd <0carry=int6464#14,<0r11=int6464#12
  430. # asm 2: addpd <0carry=%xmm13,<0r11=%xmm11
  431. addpd %xmm13,%xmm11
  432. # qhasm: float6464 0carry *= V_V
  433. # asm 1: mulpd V_V,<0carry=int6464#14
  434. # asm 2: mulpd V_V,<0carry=%xmm13
  435. mulpd V_V,%xmm13
  436. # qhasm: float6464 0r10 -= 0carry
  437. # asm 1: subpd <0carry=int6464#14,<0r10=int6464#11
  438. # asm 2: subpd <0carry=%xmm13,<0r10=%xmm10
  439. subpd %xmm13,%xmm10
  440. # qhasm: *(int128 *)(rop + 0) = 0r0
  441. # asm 1: movdqa <0r0=int6464#1,0(<rop=int64#1)
  442. # asm 2: movdqa <0r0=%xmm0,0(<rop=%rdi)
  443. movdqa %xmm0,0(%rdi)
  444. # qhasm: *(int128 *)(rop + 16) = 0r1
  445. # asm 1: movdqa <0r1=int6464#2,16(<rop=int64#1)
  446. # asm 2: movdqa <0r1=%xmm1,16(<rop=%rdi)
  447. movdqa %xmm1,16(%rdi)
  448. # qhasm: *(int128 *)(rop + 32) = 0r2
  449. # asm 1: movdqa <0r2=int6464#3,32(<rop=int64#1)
  450. # asm 2: movdqa <0r2=%xmm2,32(<rop=%rdi)
  451. movdqa %xmm2,32(%rdi)
  452. # qhasm: *(int128 *)(rop + 48) = 0r3
  453. # asm 1: movdqa <0r3=int6464#4,48(<rop=int64#1)
  454. # asm 2: movdqa <0r3=%xmm3,48(<rop=%rdi)
  455. movdqa %xmm3,48(%rdi)
  456. # qhasm: *(int128 *)(rop + 64) = 0r4
  457. # asm 1: movdqa <0r4=int6464#5,64(<rop=int64#1)
  458. # asm 2: movdqa <0r4=%xmm4,64(<rop=%rdi)
  459. movdqa %xmm4,64(%rdi)
  460. # qhasm: *(int128 *)(rop + 80) = 0r5
  461. # asm 1: movdqa <0r5=int6464#6,80(<rop=int64#1)
  462. # asm 2: movdqa <0r5=%xmm5,80(<rop=%rdi)
  463. movdqa %xmm5,80(%rdi)
  464. # qhasm: *(int128 *)(rop + 96) = 0r6
  465. # asm 1: movdqa <0r6=int6464#7,96(<rop=int64#1)
  466. # asm 2: movdqa <0r6=%xmm6,96(<rop=%rdi)
  467. movdqa %xmm6,96(%rdi)
  468. # qhasm: *(int128 *)(rop + 112) = 0r7
  469. # asm 1: movdqa <0r7=int6464#8,112(<rop=int64#1)
  470. # asm 2: movdqa <0r7=%xmm7,112(<rop=%rdi)
  471. movdqa %xmm7,112(%rdi)
  472. # qhasm: *(int128 *)(rop + 128) = 0r8
  473. # asm 1: movdqa <0r8=int6464#9,128(<rop=int64#1)
  474. # asm 2: movdqa <0r8=%xmm8,128(<rop=%rdi)
  475. movdqa %xmm8,128(%rdi)
  476. # qhasm: *(int128 *)(rop + 144) = 0r9
  477. # asm 1: movdqa <0r9=int6464#10,144(<rop=int64#1)
  478. # asm 2: movdqa <0r9=%xmm9,144(<rop=%rdi)
  479. movdqa %xmm9,144(%rdi)
  480. # qhasm: *(int128 *)(rop + 160) = 0r10
  481. # asm 1: movdqa <0r10=int6464#11,160(<rop=int64#1)
  482. # asm 2: movdqa <0r10=%xmm10,160(<rop=%rdi)
  483. movdqa %xmm10,160(%rdi)
  484. # qhasm: *(int128 *)(rop + 176) = 0r11
  485. # asm 1: movdqa <0r11=int6464#12,176(<rop=int64#1)
  486. # asm 2: movdqa <0r11=%xmm11,176(<rop=%rdi)
  487. movdqa %xmm11,176(%rdi)
  488. # qhasm: leave
  489. add %r11,%rsp
  490. mov %rdi,%rax
  491. mov %rsi,%rdx
  492. ret