fp2e_add.s 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. # File: dclxvi-20130329/fp2e_add.s
  2. # Author: Ruben Niederhagen, Peter Schwabe
  3. # Public Domain
  4. # qhasm: enter fp2e_add_qhasm
  5. .text
  6. .p2align 5
  7. .globl _fp2e_add_qhasm
  8. .globl fp2e_add_qhasm
  9. _fp2e_add_qhasm:
  10. fp2e_add_qhasm:
  11. mov %rsp,%r11
  12. and $31,%r11
  13. add $0,%r11
  14. sub %r11,%rsp
  15. # qhasm: int64 0rop
  16. # qhasm: int64 0op1
  17. # qhasm: int64 0op2
  18. # qhasm: input 0rop
  19. # qhasm: input 0op1
  20. # qhasm: input 0op2
  21. # qhasm: int6464 0r0
  22. # qhasm: int6464 0r1
  23. # qhasm: int6464 0r2
  24. # qhasm: int6464 0r3
  25. # qhasm: int6464 0r4
  26. # qhasm: int6464 0r5
  27. # qhasm: int6464 0r6
  28. # qhasm: int6464 0r7
  29. # qhasm: int6464 0r8
  30. # qhasm: int6464 0r9
  31. # qhasm: int6464 0r10
  32. # qhasm: int6464 0r11
  33. # qhasm: int6464 0t0
  34. # qhasm: int6464 0t1
  35. # qhasm: int6464 0t2
  36. # qhasm: int6464 0t3
  37. # qhasm: 0r0 = *(int128 *)(0op1 + 0)
  38. # asm 1: movdqa 0(<0op1=int64#2),>0r0=int6464#1
  39. # asm 2: movdqa 0(<0op1=%rsi),>0r0=%xmm0
  40. movdqa 0(%rsi),%xmm0
  41. # qhasm: 0r1 = *(int128 *)(0op1 + 16)
  42. # asm 1: movdqa 16(<0op1=int64#2),>0r1=int6464#2
  43. # asm 2: movdqa 16(<0op1=%rsi),>0r1=%xmm1
  44. movdqa 16(%rsi),%xmm1
  45. # qhasm: 0r2 = *(int128 *)(0op1 + 32)
  46. # asm 1: movdqa 32(<0op1=int64#2),>0r2=int6464#3
  47. # asm 2: movdqa 32(<0op1=%rsi),>0r2=%xmm2
  48. movdqa 32(%rsi),%xmm2
  49. # qhasm: 0r3 = *(int128 *)(0op1 + 48)
  50. # asm 1: movdqa 48(<0op1=int64#2),>0r3=int6464#4
  51. # asm 2: movdqa 48(<0op1=%rsi),>0r3=%xmm3
  52. movdqa 48(%rsi),%xmm3
  53. # qhasm: 0r4 = *(int128 *)(0op1 + 64)
  54. # asm 1: movdqa 64(<0op1=int64#2),>0r4=int6464#5
  55. # asm 2: movdqa 64(<0op1=%rsi),>0r4=%xmm4
  56. movdqa 64(%rsi),%xmm4
  57. # qhasm: 0r5 = *(int128 *)(0op1 + 80)
  58. # asm 1: movdqa 80(<0op1=int64#2),>0r5=int6464#6
  59. # asm 2: movdqa 80(<0op1=%rsi),>0r5=%xmm5
  60. movdqa 80(%rsi),%xmm5
  61. # qhasm: 0r6 = *(int128 *)(0op1 + 96)
  62. # asm 1: movdqa 96(<0op1=int64#2),>0r6=int6464#7
  63. # asm 2: movdqa 96(<0op1=%rsi),>0r6=%xmm6
  64. movdqa 96(%rsi),%xmm6
  65. # qhasm: 0r7 = *(int128 *)(0op1 + 112)
  66. # asm 1: movdqa 112(<0op1=int64#2),>0r7=int6464#8
  67. # asm 2: movdqa 112(<0op1=%rsi),>0r7=%xmm7
  68. movdqa 112(%rsi),%xmm7
  69. # qhasm: 0r8 = *(int128 *)(0op1 + 128)
  70. # asm 1: movdqa 128(<0op1=int64#2),>0r8=int6464#9
  71. # asm 2: movdqa 128(<0op1=%rsi),>0r8=%xmm8
  72. movdqa 128(%rsi),%xmm8
  73. # qhasm: 0r9 = *(int128 *)(0op1 + 144)
  74. # asm 1: movdqa 144(<0op1=int64#2),>0r9=int6464#10
  75. # asm 2: movdqa 144(<0op1=%rsi),>0r9=%xmm9
  76. movdqa 144(%rsi),%xmm9
  77. # qhasm: 0r10 = *(int128 *)(0op1 + 160)
  78. # asm 1: movdqa 160(<0op1=int64#2),>0r10=int6464#11
  79. # asm 2: movdqa 160(<0op1=%rsi),>0r10=%xmm10
  80. movdqa 160(%rsi),%xmm10
  81. # qhasm: 0r11 = *(int128 *)(0op1 + 176)
  82. # asm 1: movdqa 176(<0op1=int64#2),>0r11=int6464#12
  83. # asm 2: movdqa 176(<0op1=%rsi),>0r11=%xmm11
  84. movdqa 176(%rsi),%xmm11
  85. # qhasm: int6464 1t0
  86. # qhasm: int6464 1t1
  87. # qhasm: int6464 1t2
  88. # qhasm: int6464 1t3
  89. # qhasm: 1t0 = *(int128 *)(0op2 + 0)
  90. # asm 1: movdqa 0(<0op2=int64#3),>1t0=int6464#13
  91. # asm 2: movdqa 0(<0op2=%rdx),>1t0=%xmm12
  92. movdqa 0(%rdx),%xmm12
  93. # qhasm: 1t1 = *(int128 *)(0op2 + 16)
  94. # asm 1: movdqa 16(<0op2=int64#3),>1t1=int6464#14
  95. # asm 2: movdqa 16(<0op2=%rdx),>1t1=%xmm13
  96. movdqa 16(%rdx),%xmm13
  97. # qhasm: 1t2 = *(int128 *)(0op2 + 32)
  98. # asm 1: movdqa 32(<0op2=int64#3),>1t2=int6464#15
  99. # asm 2: movdqa 32(<0op2=%rdx),>1t2=%xmm14
  100. movdqa 32(%rdx),%xmm14
  101. # qhasm: 1t3 = *(int128 *)(0op2 + 48)
  102. # asm 1: movdqa 48(<0op2=int64#3),>1t3=int6464#16
  103. # asm 2: movdqa 48(<0op2=%rdx),>1t3=%xmm15
  104. movdqa 48(%rdx),%xmm15
  105. # qhasm: float6464 0r0 += 1t0
  106. # asm 1: addpd <1t0=int6464#13,<0r0=int6464#1
  107. # asm 2: addpd <1t0=%xmm12,<0r0=%xmm0
  108. addpd %xmm12,%xmm0
  109. # qhasm: float6464 0r1 += 1t1
  110. # asm 1: addpd <1t1=int6464#14,<0r1=int6464#2
  111. # asm 2: addpd <1t1=%xmm13,<0r1=%xmm1
  112. addpd %xmm13,%xmm1
  113. # qhasm: float6464 0r2 += 1t2
  114. # asm 1: addpd <1t2=int6464#15,<0r2=int6464#3
  115. # asm 2: addpd <1t2=%xmm14,<0r2=%xmm2
  116. addpd %xmm14,%xmm2
  117. # qhasm: float6464 0r3 += 1t3
  118. # asm 1: addpd <1t3=int6464#16,<0r3=int6464#4
  119. # asm 2: addpd <1t3=%xmm15,<0r3=%xmm3
  120. addpd %xmm15,%xmm3
  121. # qhasm: 1t0 = *(int128 *)(0op2 + 64)
  122. # asm 1: movdqa 64(<0op2=int64#3),>1t0=int6464#13
  123. # asm 2: movdqa 64(<0op2=%rdx),>1t0=%xmm12
  124. movdqa 64(%rdx),%xmm12
  125. # qhasm: 1t1 = *(int128 *)(0op2 + 80)
  126. # asm 1: movdqa 80(<0op2=int64#3),>1t1=int6464#14
  127. # asm 2: movdqa 80(<0op2=%rdx),>1t1=%xmm13
  128. movdqa 80(%rdx),%xmm13
  129. # qhasm: 1t2 = *(int128 *)(0op2 + 96)
  130. # asm 1: movdqa 96(<0op2=int64#3),>1t2=int6464#15
  131. # asm 2: movdqa 96(<0op2=%rdx),>1t2=%xmm14
  132. movdqa 96(%rdx),%xmm14
  133. # qhasm: 1t3 = *(int128 *)(0op2 + 112)
  134. # asm 1: movdqa 112(<0op2=int64#3),>1t3=int6464#16
  135. # asm 2: movdqa 112(<0op2=%rdx),>1t3=%xmm15
  136. movdqa 112(%rdx),%xmm15
  137. # qhasm: float6464 0r4 += 1t0
  138. # asm 1: addpd <1t0=int6464#13,<0r4=int6464#5
  139. # asm 2: addpd <1t0=%xmm12,<0r4=%xmm4
  140. addpd %xmm12,%xmm4
  141. # qhasm: float6464 0r5 += 1t1
  142. # asm 1: addpd <1t1=int6464#14,<0r5=int6464#6
  143. # asm 2: addpd <1t1=%xmm13,<0r5=%xmm5
  144. addpd %xmm13,%xmm5
  145. # qhasm: float6464 0r6 += 1t2
  146. # asm 1: addpd <1t2=int6464#15,<0r6=int6464#7
  147. # asm 2: addpd <1t2=%xmm14,<0r6=%xmm6
  148. addpd %xmm14,%xmm6
  149. # qhasm: float6464 0r7 += 1t3
  150. # asm 1: addpd <1t3=int6464#16,<0r7=int6464#8
  151. # asm 2: addpd <1t3=%xmm15,<0r7=%xmm7
  152. addpd %xmm15,%xmm7
  153. # qhasm: 1t0 = *(int128 *)(0op2 + 128)
  154. # asm 1: movdqa 128(<0op2=int64#3),>1t0=int6464#13
  155. # asm 2: movdqa 128(<0op2=%rdx),>1t0=%xmm12
  156. movdqa 128(%rdx),%xmm12
  157. # qhasm: 1t1 = *(int128 *)(0op2 + 144)
  158. # asm 1: movdqa 144(<0op2=int64#3),>1t1=int6464#14
  159. # asm 2: movdqa 144(<0op2=%rdx),>1t1=%xmm13
  160. movdqa 144(%rdx),%xmm13
  161. # qhasm: 1t2 = *(int128 *)(0op2 + 160)
  162. # asm 1: movdqa 160(<0op2=int64#3),>1t2=int6464#15
  163. # asm 2: movdqa 160(<0op2=%rdx),>1t2=%xmm14
  164. movdqa 160(%rdx),%xmm14
  165. # qhasm: 1t3 = *(int128 *)(0op2 + 176)
  166. # asm 1: movdqa 176(<0op2=int64#3),>1t3=int6464#16
  167. # asm 2: movdqa 176(<0op2=%rdx),>1t3=%xmm15
  168. movdqa 176(%rdx),%xmm15
  169. # qhasm: float6464 0r8 += 1t0
  170. # asm 1: addpd <1t0=int6464#13,<0r8=int6464#9
  171. # asm 2: addpd <1t0=%xmm12,<0r8=%xmm8
  172. addpd %xmm12,%xmm8
  173. # qhasm: float6464 0r9 += 1t1
  174. # asm 1: addpd <1t1=int6464#14,<0r9=int6464#10
  175. # asm 2: addpd <1t1=%xmm13,<0r9=%xmm9
  176. addpd %xmm13,%xmm9
  177. # qhasm: float6464 0r10 += 1t2
  178. # asm 1: addpd <1t2=int6464#15,<0r10=int6464#11
  179. # asm 2: addpd <1t2=%xmm14,<0r10=%xmm10
  180. addpd %xmm14,%xmm10
  181. # qhasm: float6464 0r11 += 1t3
  182. # asm 1: addpd <1t3=int6464#16,<0r11=int6464#12
  183. # asm 2: addpd <1t3=%xmm15,<0r11=%xmm11
  184. addpd %xmm15,%xmm11
  185. # qhasm: *(int128 *)(0rop + 0) = 0r0
  186. # asm 1: movdqa <0r0=int6464#1,0(<0rop=int64#1)
  187. # asm 2: movdqa <0r0=%xmm0,0(<0rop=%rdi)
  188. movdqa %xmm0,0(%rdi)
  189. # qhasm: *(int128 *)(0rop + 16) = 0r1
  190. # asm 1: movdqa <0r1=int6464#2,16(<0rop=int64#1)
  191. # asm 2: movdqa <0r1=%xmm1,16(<0rop=%rdi)
  192. movdqa %xmm1,16(%rdi)
  193. # qhasm: *(int128 *)(0rop + 32) = 0r2
  194. # asm 1: movdqa <0r2=int6464#3,32(<0rop=int64#1)
  195. # asm 2: movdqa <0r2=%xmm2,32(<0rop=%rdi)
  196. movdqa %xmm2,32(%rdi)
  197. # qhasm: *(int128 *)(0rop + 48) = 0r3
  198. # asm 1: movdqa <0r3=int6464#4,48(<0rop=int64#1)
  199. # asm 2: movdqa <0r3=%xmm3,48(<0rop=%rdi)
  200. movdqa %xmm3,48(%rdi)
  201. # qhasm: *(int128 *)(0rop + 64) = 0r4
  202. # asm 1: movdqa <0r4=int6464#5,64(<0rop=int64#1)
  203. # asm 2: movdqa <0r4=%xmm4,64(<0rop=%rdi)
  204. movdqa %xmm4,64(%rdi)
  205. # qhasm: *(int128 *)(0rop + 80) = 0r5
  206. # asm 1: movdqa <0r5=int6464#6,80(<0rop=int64#1)
  207. # asm 2: movdqa <0r5=%xmm5,80(<0rop=%rdi)
  208. movdqa %xmm5,80(%rdi)
  209. # qhasm: *(int128 *)(0rop + 96) = 0r6
  210. # asm 1: movdqa <0r6=int6464#7,96(<0rop=int64#1)
  211. # asm 2: movdqa <0r6=%xmm6,96(<0rop=%rdi)
  212. movdqa %xmm6,96(%rdi)
  213. # qhasm: *(int128 *)(0rop + 112) = 0r7
  214. # asm 1: movdqa <0r7=int6464#8,112(<0rop=int64#1)
  215. # asm 2: movdqa <0r7=%xmm7,112(<0rop=%rdi)
  216. movdqa %xmm7,112(%rdi)
  217. # qhasm: *(int128 *)(0rop + 128) = 0r8
  218. # asm 1: movdqa <0r8=int6464#9,128(<0rop=int64#1)
  219. # asm 2: movdqa <0r8=%xmm8,128(<0rop=%rdi)
  220. movdqa %xmm8,128(%rdi)
  221. # qhasm: *(int128 *)(0rop + 144) = 0r9
  222. # asm 1: movdqa <0r9=int6464#10,144(<0rop=int64#1)
  223. # asm 2: movdqa <0r9=%xmm9,144(<0rop=%rdi)
  224. movdqa %xmm9,144(%rdi)
  225. # qhasm: *(int128 *)(0rop + 160) = 0r10
  226. # asm 1: movdqa <0r10=int6464#11,160(<0rop=int64#1)
  227. # asm 2: movdqa <0r10=%xmm10,160(<0rop=%rdi)
  228. movdqa %xmm10,160(%rdi)
  229. # qhasm: *(int128 *)(0rop + 176) = 0r11
  230. # asm 1: movdqa <0r11=int6464#12,176(<0rop=int64#1)
  231. # asm 2: movdqa <0r11=%xmm11,176(<0rop=%rdi)
  232. movdqa %xmm11,176(%rdi)
  233. # qhasm: leave
  234. add %r11,%rsp
  235. mov %rdi,%rax
  236. mov %rsi,%rdx
  237. ret