b17biswa 6 anni fa
parent
commit
3d635811ab
100 ha cambiato i file con 34567 aggiunte e 0 eliminazioni
  1. BIN
      bin/bgn
  2. BIN
      bin/bgn_as
  3. BIN
      bin/bgn_check
  4. 180 0
      dclxvi-20130329/bilintest.c
  5. 247 0
      dclxvi-20130329/checkdouble.h
  6. 64 0
      dclxvi-20130329/consts.s
  7. 22 0
      dclxvi-20130329/cpucycles.c
  8. 12 0
      dclxvi-20130329/cpucycles.h
  9. 439 0
      dclxvi-20130329/curvepoint_fp.c
  10. 62 0
      dclxvi-20130329/curvepoint_fp.h
  11. 64 0
      dclxvi-20130329/curvepoint_fp_multiscalar.c
  12. 12 0
      dclxvi-20130329/curvepoint_fp_multiscalar.h
  13. 132 0
      dclxvi-20130329/final_expo.c
  14. 24 0
      dclxvi-20130329/final_expo.h
  15. 346 0
      dclxvi-20130329/fp12e.c
  16. 108 0
      dclxvi-20130329/fp12e.h
  17. 604 0
      dclxvi-20130329/fp2e.c
  18. 211 0
      dclxvi-20130329/fp2e.h
  19. 314 0
      dclxvi-20130329/fp2e_add.s
  20. 310 0
      dclxvi-20130329/fp2e_add2.s
  21. 249 0
      dclxvi-20130329/fp2e_conjugate.s
  22. 249 0
      dclxvi-20130329/fp2e_double.s
  23. 245 0
      dclxvi-20130329/fp2e_double2.s
  24. 6792 0
      dclxvi-20130329/fp2e_mul.s
  25. 4233 0
      dclxvi-20130329/fp2e_mul_fpe.s
  26. 446 0
      dclxvi-20130329/fp2e_mulxi.s
  27. 249 0
      dclxvi-20130329/fp2e_neg.s
  28. 245 0
      dclxvi-20130329/fp2e_neg2.s
  29. 3846 0
      dclxvi-20130329/fp2e_parallel_coeffmul.s
  30. 626 0
      dclxvi-20130329/fp2e_short_coeffred.s
  31. 4362 0
      dclxvi-20130329/fp2e_square.s
  32. 314 0
      dclxvi-20130329/fp2e_sub.s
  33. 310 0
      dclxvi-20130329/fp2e_sub2.s
  34. 249 0
      dclxvi-20130329/fp2e_triple.s
  35. 245 0
      dclxvi-20130329/fp2e_triple2.s
  36. 335 0
      dclxvi-20130329/fp6e.c
  37. 85 0
      dclxvi-20130329/fp6e.h
  38. 263 0
      dclxvi-20130329/fpe.c
  39. 105 0
      dclxvi-20130329/fpe.h
  40. BIN
      dclxvi-20130329/fpe.h.gch
  41. 4027 0
      dclxvi-20130329/fpe_mul.s
  42. 313 0
      dclxvi-20130329/gmp_convert.c
  43. 35 0
      dclxvi-20130329/gmp_convert.h
  44. 480 0
      dclxvi-20130329/heap_rootreplaced.s
  45. 56 0
      dclxvi-20130329/index_heap.c
  46. 30 0
      dclxvi-20130329/index_heap.h
  47. 184 0
      dclxvi-20130329/linefunction.c
  48. 36 0
      dclxvi-20130329/linefunction.h
  49. 132 0
      dclxvi-20130329/mul.c
  50. 16 0
      dclxvi-20130329/mul.h
  51. 18 0
      dclxvi-20130329/mydouble.c
  52. 20 0
      dclxvi-20130329/mydouble.h
  53. 137 0
      dclxvi-20130329/optate.c
  54. 17 0
      dclxvi-20130329/optate.h
  55. 83 0
      dclxvi-20130329/parameters.c
  56. 40 0
      dclxvi-20130329/parameters.h
  57. 111 0
      dclxvi-20130329/scalar.c
  58. 45 0
      dclxvi-20130329/scalar.h
  59. 146 0
      dclxvi-20130329/scalar_sub_nored.s
  60. 233 0
      dclxvi-20130329/speedtest.c
  61. 55 0
      dclxvi-20130329/test_curvepoint_multiscalar.c
  62. 55 0
      dclxvi-20130329/test_twistpoint_multiscalar.c
  63. 401 0
      dclxvi-20130329/twistpoint_fp2.c
  64. 50 0
      dclxvi-20130329/twistpoint_fp2.h
  65. 64 0
      dclxvi-20130329/twistpoint_fp2_multiscalar.c
  66. 14 0
      dclxvi-20130329/twistpoint_fp2_multiscalar.h
  67. BIN
      doc/bgnfcf.pdf
  68. 2 0
      doc/more_doc
  69. 27 0
      gengetopt/option.c
  70. 36 0
      gengetopt/option.ggo
  71. 187 0
      gengetopt/option.h
  72. 93 0
      include/Bipoint.hpp
  73. 38 0
      include/BitChiffre.hpp
  74. 34 0
      include/BitEvalL1.hpp
  75. 54 0
      include/BitEvalL1.tpp
  76. 28 0
      include/BitEvalL2.hpp
  77. 43 0
      include/BitEvalL3.hpp
  78. 32 0
      include/BitEvalL4.hpp
  79. 43 0
      include/Fp.hpp
  80. 34 0
      include/PrivateKey.hpp
  81. 30 0
      include/PublicKey.hpp
  82. 42 0
      include/Quadripoint.hpp
  83. 16 0
      include/additionL1.hpp
  84. 12 0
      include/additionL2.hpp
  85. 12 0
      include/additionL3.hpp
  86. 12 0
      include/additionL4.hpp
  87. 95 0
      include/bgn.hpp
  88. 27 0
      include/chiffrement.hpp
  89. 22 0
      include/circuit_additionL1.hpp
  90. 24 0
      include/circuit_additionL2.hpp
  91. 25 0
      include/circuit_additionL3.hpp
  92. 25 0
      include/circuit_additionL4.hpp
  93. 21 0
      include/circuit_chiffrement.hpp
  94. 24 0
      include/circuit_demo.hpp
  95. 11 0
      include/circuit_ip.hpp
  96. 24 0
      include/circuit_minmaj.hpp
  97. 24 0
      include/circuit_minmaj2.hpp
  98. 24 0
      include/circuit_minmaj3.hpp
  99. 24 0
      include/circuit_multiplicationL1.hpp
  100. 25 0
      include/circuit_multiplicationL1L2.hpp

BIN
bin/bgn


BIN
bin/bgn_as


BIN
bin/bgn_check


+ 180 - 0
dclxvi-20130329/bilintest.c

@@ -0,0 +1,180 @@
+// g++  -lgmp -lgmpxx -I ../dclxvi-20130329 -I ../update -o ../bin/bilintest ../dclxvi-20130329/bilintest.c  ../dclxvi-20130329/fpe.c ../dclxvi-20130329/fp2e.c ../dclxvi-20130329/curvepoint_fp.c ../dclxvi-20130329/twistpoint_fp2.c  ../dclxvi-20130329/scalar.c ../dclxvi-20130329/parameters.c ../dclxvi-20130329/mul.c ../dclxvi-20130329/mydouble.c ../update/parameters_512.c ../update/fp2e_512.c ../update/scalar_512.c ../update/complete_addition.c ../update/test_functions.cpp ../dclxvi-20130329/optate.c     ../dclxvi-20130329/fp6e.c  ../dclxvi-20130329/linefunction.c ../dclxvi-20130329/fp12e.c ../dclxvi-20130329/final_expo.c ../update/final_expo_512.c   -DNTESTS=10 && ../bin/bilintest
+
+#include <stdio.h>
+
+#include "test_functions.hpp" //ajouté pour la mise à jour vers une clef 512 bits
+
+
+#include "mydouble.h" 
+extern "C" {	
+#include "fpe.h"
+#include "fp2e.h"
+#include "fp6e.h"
+#include "fp12e.h"
+} 
+#include "curvepoint_fp.h"
+#include "twistpoint_fp2.h"
+#include "optate.h"
+#include <unistd.h> //fonction sleep
+
+extern const curvepoint_fp_t bn_curvegen;
+extern const twistpoint_fp2_t bn_twistgen;
+extern const scalar_t bn_n;
+
+int main(int argc, char* argv[])
+{
+	titre(Bilintest);
+	//zout(!0,!1,!456465);
+	//return 0;
+	fp12e_t e1, e2, e3;
+
+	curvepoint_fp_t p1;
+
+	twistpoint_fp2_t p2;
+
+	scalar_t s1, s2;
+
+	int i;
+	// Test with neutral element as argument
+	scalar_setrandom(s1, bn_n);
+	//scalar_print(stdout, s1); 
+	//exit(0);
+	curvepoint_fp_set(p1, bn_curvegen);
+
+	//curvepoint_fp_print(stdout,p1);
+	//exit(0);
+	twistpoint_fp2_setneutral(p2);
+	fpe_isreduced(p1->m_x);
+	fpe_isreduced(p1->m_y);
+	//abc;
+	//curvepoint_fp_print(stdout,p1);
+	//scalar_print(stdout,s1);
+	//where;
+	curvepoint_fp_scalarmult_vartime(p1, p1, s1); //64 additions
+	
+	//xyz;
+	curvepoint_fp_makeaffine(p1);
+	optate(e1, p2, p1);
+	if(!fp12e_isone(e1))
+	printf("Error in optimal ate: e(infty,P) != 1\n");
+	scalar_setrandom(s2, bn_n);
+	curvepoint_fp_setneutral(p1);
+	twistpoint_fp2_set(p2, bn_twistgen);
+	fp2e_isreduced(p2->m_x);
+	fp2e_isreduced(p2->m_y);
+
+	twistpoint_fp2_scalarmult_vartime(p2, p2, s2);
+	twistpoint_fp2_makeaffine(p2);
+	optate(e1, p2, p1);
+	if(!fp12e_isone(e1))
+	printf("Error in optimal ate: e(Q,infty) != 1\n");
+	
+	// Bilinearity test of optimal ate Pairing:
+	for(i=0;i<NTESTS;i++)
+	{
+		#if (NTESTS > 100)
+			if(!(i%(NTESTS/100)) && i!=0) printf("Number of tests: %d\n",i);
+		#else
+			if(i>=0) printf("Number of tests: %d\n",i);
+		#endif
+		//ecris(bn_n = );
+		//scalar_print(stdout,bn_n);
+		scalar_setrandom(s1, bn_n);
+
+		scalar_setrandom(s2, bn_n);
+		
+		
+		
+		
+		
+		//unsigned long long tab[4]={0x8ae69c5bcb039b07,	0x58549b8019d00d7b,	0x1102df67a4a9ace2,	0xc36d19fe776fac9f};
+		//scalar_set_lluarray(s1,tab);
+		
+		//unsigned long long tab2[4]={0xad89b6fb63df1643,	0x354658972be46274,	0x4df1d15eaa4d2322,	0x10bf308f77d3d38d};
+		//scalar_set_lluarray(s2,tab2);		
+		
+		
+		//unsigned long long tab[4]={8,0,0,0};
+		//unsigned long long tab2[4]={1,0,0,0};
+		//scalar_set_lluarray(s1,tab);
+		//scalar_set_lluarray(s2,tab2);
+
+		curvepoint_fp_set(p1, bn_curvegen);
+		twistpoint_fp2_set(p2, bn_twistgen);
+		//twistpoint_fp2_print(stdout,bn_twistgen);
+		fpe_isreduced(p1->m_x);
+		fpe_isreduced(p1->m_y);
+		fp2e_isreduced(p2->m_x);
+		fp2e_isreduced(p2->m_y);
+		curvepoint_fp_scalarmult_vartime(p1, p1, s1);
+		curvepoint_fp_makeaffine(p1);
+		twistpoint_fp2_scalarmult_vartime(p2, p2, s2);
+		twistpoint_fp2_makeaffine(p2);
+		//printf("s1p1 affine: ");
+		//curvepoint_fp_print(stdout,p1);
+		//printf("\n");
+		//printf("s2p2 affine: ");
+		//twistpoint_fp2_print(stdout,p2);
+		//printf("\n");
+		optate(e1, p2, p1);
+		curvepoint_fp_set(p1, bn_curvegen);
+		twistpoint_fp2_set(p2, bn_twistgen);
+		fpe_isreduced(p1->m_x);
+		fpe_isreduced(p1->m_y);
+		fp2e_isreduced(p2->m_x);
+		fp2e_isreduced(p2->m_y);
+		curvepoint_fp_scalarmult_vartime(p1, p1, s2);
+		curvepoint_fp_makeaffine(p1);
+		twistpoint_fp2_scalarmult_vartime(p2, p2, s1);
+		twistpoint_fp2_makeaffine(p2);
+		//printf("s2p1 affine: ");
+		//curvepoint_fp_print(stdout,p1);
+		//printf("\n");
+		//printf("s1p2 affine: ");
+		//twistpoint_fp2_print(stdout,p2);
+		//printf("\n");
+		optate(e2, p2, p1);
+
+		curvepoint_fp_set(p1, bn_curvegen);
+		twistpoint_fp2_set(p2, bn_twistgen);
+
+		optate(e3, p2, p1);
+
+		fp12e_pow_vartime(e3, e3, s1);
+		fp12e_pow_vartime(e3, e3, s2);
+
+		if(!fp12e_iseq(e1,e2))
+		{
+		  printf("Error in optimal ate: e1 != e2\n");
+		  //printf("e1: ");
+		  //fp12e_print(stdout, e1);
+		  //printf("\ne2: ");
+		  //fp12e_print(stdout, e2);
+		  //printf("\nScalars:\n");
+		  //printf("s1= ");
+		  //scalar_print(stdout, s1); 
+		  //printf("\ns2= ");
+		  //scalar_print(stdout, s2); 
+		  //printf("\n");
+		  }
+		else if(!fp12e_iseq(e2,e3))
+		{
+		  printf("Error in optimal ate: e2 != e3\n");
+		  printf("e2: ");
+		  fp12e_print(stdout, e2);
+		  printf("\ne3: ");
+		  fp12e_print(stdout, e3);
+		  printf("\nScalars:\n");
+		  printf("s1= ");
+		  scalar_print(stdout, s1); 
+		  printf("\ns2= ");
+		  scalar_print(stdout, s2); 
+		  printf("\n");
+		}
+		else if(fp12e_iszero(e2))
+		  printf("Error: Pairing value is zero\n");
+		else if(fp12e_isone(e2))
+		  printf("Warning: Pairing value is one\n");
+		}
+		return 0;
+}

+ 247 - 0
dclxvi-20130329/checkdouble.h

@@ -0,0 +1,247 @@
+/*
+ * File:   dclxvi-20130329/checkdouble.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef CHECKDOUBLE_H
+#define CHECKDOUBLE_H
+
+#include <execinfo.h>
+#include <inttypes.h>
+#include <memory.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include "zout.hpp" 
+
+#define MANTISSA_MAX ((1ULL << 53) - 1)
+
+class CheckDouble{
+  public: double v;
+          unsigned long long mmax;
+
+          CheckDouble()
+          {
+            v = NAN;
+            mmax = MANTISSA_MAX;
+          }
+
+          CheckDouble(const double a)
+          {
+            v = a;
+            mmax = (unsigned long long)fabs(a);
+          }
+
+          CheckDouble(const CheckDouble &a)
+          {
+            v = a.v;
+            mmax = a.mmax;
+          }
+
+          CheckDouble(const double a, const unsigned long long int mmax)
+          {
+            v = a;
+            this->mmax = mmax;
+          }
+
+          CheckDouble operator=(const CheckDouble &a)
+          {
+            v = a.v;
+            mmax = a.mmax;
+            return *this;
+          }
+
+          int operator==(const CheckDouble &a)const
+          {
+            return v == a.v;
+          }
+
+          int operator!=(const CheckDouble &a)const
+          {
+            return v != a.v;
+          }
+
+          CheckDouble operator+(const CheckDouble &a)const
+          {
+            if((mmax+a.mmax) > MANTISSA_MAX)
+            {
+              fprintf(stderr, "Overflow in %lf + %lf\n", v,a.v);
+              fprintf(stderr, "Maximal values: %llu, %llu\n", mmax,a.mmax);
+              abort();
+            }
+            return CheckDouble(a.v+v, mmax+a.mmax);
+          }
+
+          CheckDouble operator+=(const CheckDouble &a)
+          {
+            if((mmax+a.mmax) > MANTISSA_MAX)
+            {
+              fprintf(stderr, "Overflow in %lf += %lf\n", v,a.v);
+              fprintf(stderr, "Maximal values: %llu, %llu\n", mmax,a.mmax);
+              abort();
+            }
+            v += a.v;
+            mmax += a.mmax;
+            return *this;
+          }
+
+          CheckDouble operator-(const CheckDouble &a)const
+          {
+            if((mmax+a.mmax) > MANTISSA_MAX)
+            {
+              fprintf(stderr, "Overflow in %lf - %lf\n", v,a.v);
+              fprintf(stderr, "Maximal values: %llu, %llu\n", mmax,a.mmax);
+              abort();
+            }
+            return CheckDouble(v-a.v, mmax+a.mmax);
+          }
+
+          CheckDouble operator-=(const CheckDouble &a)
+          {
+            if((mmax+a.mmax) > MANTISSA_MAX)
+            {
+              fprintf(stderr, "Overflow in %lf += %lf\n", v,a.v);
+              fprintf(stderr, "Maximal values: %llu, %llu\n", mmax,a.mmax);
+              abort();
+            }
+            v -= a.v;
+            mmax += a.mmax;
+            return *this;
+          }
+
+          CheckDouble operator-()const
+          {
+            return CheckDouble(-v, mmax);
+          }
+
+          CheckDouble operator*(const CheckDouble &a)const
+          {
+            uint64_t l1 = mmax & 0xffffffff; //les 32 bits de poids faible
+            uint64_t l2 = a.mmax & 0xffffffff;
+            uint64_t u1 = mmax >> 32;
+            uint64_t u2 = a.mmax >> 32;
+            unsigned long long upper = u1 * u2;
+            if(upper != 0)
+            {
+				ecris(upper);
+              fprintf(stderr, "Overflow in %lf * %lf\n", v,a.v);
+              fprintf(stderr, "Maximal values: %llu, %llu\n", mmax,a.mmax);
+              abort();
+            }
+            unsigned long long mid = l1 * u2 + u1 * l2;
+            unsigned long long lower = l1 * l2;
+            if(lower >= MANTISSA_MAX)
+            {
+				ecris(lower);
+              fprintf(stderr, "Overflow in %lf * %lf\n", v,a.v);
+              fprintf(stderr, "Maximal values: %llu, %llu\n", mmax,a.mmax);
+              abort();
+            }
+            if(mid > (MANTISSA_MAX>>32))
+            {
+				ecris(mid);
+				zout(l1,l2,u1,u2);
+				zout(mid,(MANTISSA_MAX>>32));
+              fprintf(stderr, "Overflow in %lf * %lf\n", v,a.v);
+              fprintf(stderr, "Maximal values: %llu, %llu\n", mmax,a.mmax);
+              abort();
+            }
+            lower += (mid <<32);
+            if(lower > MANTISSA_MAX)
+            {
+				ecris(lower2);
+              fprintf(stderr, "Overflow in %lf * %lf\n", v,a.v);
+              fprintf(stderr, "Maximal values: %llu, %llu\n", mmax,a.mmax);
+              abort();
+            }
+            return CheckDouble(v*a.v, mmax*a.mmax);
+          }
+
+          CheckDouble operator/(const double &a)const
+          {
+            if(mmax/fabs(a) > MANTISSA_MAX)
+            {
+              fprintf(stderr, "Overflow in %lf / %lf\n", v,a);
+              fprintf(stderr, "Maximal values: %llu, %lf\n", mmax,a);
+              abort();
+            }
+            return CheckDouble(v/a, mmax/(unsigned long long)fabs(a)+1);
+          }
+
+          CheckDouble operator*=(const int b) 
+          {
+            CheckDouble op((double) b, abs(b));
+            *this = *this * op;
+            return *this;
+          }
+
+          /*
+          friend CheckDouble operator*(const CheckDouble &a,const int b) 
+          {
+            CheckDouble op((double) b, abs(b));
+            return op * a;
+          }
+          */
+          
+          friend CheckDouble operator*(const int32_t b, const CheckDouble &a) 
+          {
+            CheckDouble op((double) b, abs(b));
+            return op * a;
+          }
+
+          friend int operator<(const CheckDouble &op1, const CheckDouble &op2)
+          {
+            return op1.v < op2.v;
+          }
+          
+          friend int operator<=(const CheckDouble &op1, const CheckDouble &op2)
+          {
+            return op1.v <= op2.v;
+          }
+
+          friend int operator>(const CheckDouble &op1, const CheckDouble &op2)
+          {
+            return op1.v > op2.v;
+          }
+
+          friend int operator>=(const CheckDouble &op1, const CheckDouble &op2)
+          {
+            return op1.v >= op2.v;
+          }
+
+          friend CheckDouble round(const CheckDouble &a)
+          {
+            return CheckDouble(round(a.v),a.mmax);
+          }
+          
+          friend CheckDouble trunc(const CheckDouble &a)
+          {
+            return CheckDouble(trunc(a.v),a.mmax);
+          }
+
+          friend CheckDouble remround(const CheckDouble &a, const double d)
+          {
+            double carry = round(a.v/d);
+            return CheckDouble(a.v - carry*d, (unsigned long long)((d+1)/2));
+          }
+
+          friend long long ftoll(const CheckDouble &arg)
+          {
+            return (long long)arg.v;
+          }
+
+          friend void setmax(CheckDouble &arg, unsigned long long max)
+          {
+            arg.mmax = max;
+          }
+
+          friend double todouble(const CheckDouble &arg)
+          {
+            return arg.v;
+          }
+};
+
+int printfoff(...);
+
+#endif // #ifndef CHECKDOUBLE_H

+ 64 - 0
dclxvi-20130329/consts.s

@@ -0,0 +1,64 @@
+# File:   dclxvi-20130329/consts.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+.globl TWO_TWO
+.globl THREE_THREE
+.globl FOUR_FOUR
+.globl FIVE_FIVE
+.globl SIX_SIX
+.globl EIGHT_EIGHT
+.globl NINE_NINE
+.globl ONE_MINUSONE
+.globl MONE_MONE
+.globl EIGHTEEN_EIGHTEEN
+.globl THIRTY_THIRTY
+.globl ONE_SIX
+.globl ONE_FOUR
+.globl ONE_THREE
+.globl ONE_TWO
+.globl ONE_MINUSSEVEN
+.globl SIX_ONE
+.globl ZERO_ONE 
+.globl MINUSSEVEN_ONE
+.globl MINUSONE_ONE
+.globl FOUR_MINUSSEVEN
+.globl THREE_MINUSONE
+.globl TWO_MINUSONE
+.globl V_V
+.globl V6_V6
+.globl VINV_VINV
+.globl V6INV_V6INV
+.globl ROUND_ROUND
+
+.p2align 4
+
+TWO_TWO: .double 2.,2.
+THREE_THREE: .double 3.,3.
+FOUR_FOUR: .double 4.,4.
+FIVE_FIVE: .double 5.,5.
+SIX_SIX: .double 6.,6.
+EIGHT_EIGHT: .double 8.,8.
+NINE_NINE: .double 9.,9.
+EIGHTEEN_EIGHTEEN: .double 18.,18.
+THIRTY_THIRTY: .double 30.,30.
+ONE_SIX: .double 1.,6.
+ONE_FOUR: .double 1.,4.
+ONE_THREE: .double 1.,3.
+ONE_TWO: .double 1.,2.
+ONE_MINUSSEVEN: .double 1.,-7.
+MINUSSEVEN_ONE: .double -7.,1.
+MINUSONE_ONE: .double -1.,1.
+SIX_ONE: .double 6.,1.
+ZERO_ONE: .double 0.,1.
+FOUR_MINUSSEVEN: .double 4.,-7.
+TWO_MINUSONE: .double 2.,-1.
+THREE_MINUSONE: .double 3.,-1.
+ONE_MINUSONE: .double 1.,-1.
+MONE_MONE: .double -1.,-1.
+V_V: .double 1868033.,1868033.
+V6_V6: .double 11208198.,11208198.
+VINV_VINV: .double 0.00000053532244880042272725429071400515823597743292339146137237548828125,0.00000053532244880042272725429071400515823597743292339146137237548828125
+V6INV_V6INV: .double 0.0000000892204081334037834640851853847121066820591295254416763782501220703125,0.0000000892204081334037834640851853847121066820591295254416763782501220703125
+ROUND_ROUND: .double 6755399441055744.,6755399441055744.
+

+ 22 - 0
dclxvi-20130329/cpucycles.c

@@ -0,0 +1,22 @@
+/*
+ * File:   dclxvi-20130329/cpucycles.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include <stdio.h>
+
+long long int cpucycles(void)
+{
+#ifdef __x86_64__
+  unsigned long long result;
+  __asm__ volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax"
+      : "=a" (result) ::  "%rdx");
+  return result;
+#else
+  long long result;
+  __asm__ volatile(".byte 15;.byte 49" : "=A" (result));
+  return result;
+#endif
+}
+

+ 12 - 0
dclxvi-20130329/cpucycles.h

@@ -0,0 +1,12 @@
+/*
+ * File:   dclxvi-20130329/cpucycles.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef CPUCYCLES_H
+#define CPUCYCLES_H
+
+unsigned long long int cpucycles();
+
+#endif

+ 439 - 0
dclxvi-20130329/curvepoint_fp.c

@@ -0,0 +1,439 @@
+/*
+ * File:   dclxvi-20130329/curvepoint_fp.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+#include <stdio.h>
+#include <stdlib.h>
+
+
+#include "mydouble.h" 
+extern "C" {	
+#include "fpe.h"
+} 
+
+#include "curvepoint_fp.h"
+//#include "scalar_512.h"
+
+#include "zout.hpp"
+#include <typeinfo>
+#include <cxxabi.h>
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+//            Point initialization and deletion functions
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// Global dummies usable by all curvepoints:
+
+// Set the coordinates of a curvepoint_fp_t by copying the coordinates from another curvepoint_fp
+void curvepoint_fp_set(curvepoint_fp_t rop, const curvepoint_fp_t op)
+{
+	fpe_set(rop->m_x, op->m_x);
+	fpe_set(rop->m_y, op->m_y);
+	fpe_set(rop->m_z, op->m_z);
+	fpe_setzero(rop->m_t);
+}
+
+void curvepoint_fp_setneutral(curvepoint_fp_t rop)
+{
+	fpe_setone(rop->m_x);
+	fpe_setone(rop->m_y);
+	fpe_setzero(rop->m_z);
+	fpe_setzero(rop->m_t);
+}
+
+// Addition of two points, op2 is assumed to be in affine coordinates 
+// For the algorithm see e.g. DA Peter Schwabe
+/*
+void curvepoint_fp_mixadd(curvepoint_fp_t rop, const curvepoint_fp_t op1, const curvepoint_fp_t op2)
+{
+	fpe_t tfpe1, tfpe2, tfpe3, tfpe4, tfpe5, tfpe6, tfpe7, tfpe8, tfpe9; // Temporary variables needed for intermediary results
+	fpe_square(tfpe1, op1->m_z);
+	fpe_mul(tfpe2, op1->m_z, tfpe1);
+	fpe_mul(tfpe3, op2->m_x, tfpe1);
+	fpe_mul(tfpe4, op2->m_y, tfpe2);
+	fpe_sub(tfpe5, tfpe3, op1->m_x);
+  fpe_short_coeffred(tfpe5);
+	fpe_sub(tfpe6, tfpe4, op1->m_y);
+	fpe_square(tfpe7, tfpe5);
+	fpe_mul(tfpe8, tfpe7, tfpe5);
+	fpe_mul(tfpe9, op1->m_x, tfpe7);
+
+	fpe_double(tfpe1, tfpe9);
+	fpe_add(tfpe1, tfpe1, tfpe8);
+	fpe_square(rop->m_x, tfpe6);
+	fpe_sub(rop->m_x, rop->m_x, tfpe1);
+  fpe_short_coeffred(rop->m_x);
+	fpe_sub(tfpe1, tfpe9, rop->m_x);
+	fpe_mul(tfpe2, tfpe1, tfpe6);
+	fpe_mul(tfpe3, op1->m_y, tfpe8);
+	fpe_sub(rop->m_y, tfpe2, tfpe3);
+  fpe_short_coeffred(rop->m_y);
+	fpe_mul(rop->m_z, op1->m_z, tfpe5);
+}
+*/
+
+
+#ifndef COMPLETE_ADDITION //UPDATE 512
+int number_call=0;
+
+void curvepoint_fp_double(curvepoint_fp_t rop, const curvepoint_fp_t op)
+{
+	if (number_call == 0)
+	{	
+		signature;
+	}
+	number_call++;	
+	fpe_t tfpe1, tfpe2, tfpe3, tfpe4; // Temporary variables needed for intermediary results
+	//abc;
+	fpe_square(tfpe1, op->m_y);
+	//xyz;
+	//debug(301);
+	fpe_mul(tfpe2, tfpe1, op->m_x);
+	fpe_double(tfpe2, tfpe2);
+	//printf("\n\n\ntfpe2=");  fpe_print(stdout,tfpe2);
+	fpe_double(tfpe2, tfpe2);
+	fpe_square(tfpe3, tfpe1);
+	fpe_double(tfpe3, tfpe3);
+	fpe_double(tfpe3, tfpe3);
+	fpe_double(tfpe3, tfpe3);
+	fpe_square(tfpe4, op->m_x);
+	fpe_triple(tfpe4, tfpe4);
+  fpe_short_coeffred(tfpe4);
+	fpe_square(rop->m_x, tfpe4);
+	fpe_double(tfpe1, tfpe2);
+	fpe_sub(rop->m_x, rop->m_x, tfpe1);
+  fpe_short_coeffred(rop->m_x);
+	fpe_sub(tfpe1, tfpe2, rop->m_x);
+  fpe_short_coeffred(tfpe1);
+	fpe_mul(rop->m_z, op->m_y, op->m_z);
+	fpe_double(rop->m_z, rop->m_z);
+	fpe_mul(rop->m_y, tfpe4, tfpe1);
+	fpe_sub(rop->m_y, rop->m_y, tfpe3);
+  fpe_short_coeffred(rop->m_y);
+  //curvepoint_fp_makeaffine(rop);
+}
+
+
+
+
+
+// Transform Jacobian to Affine Coordinates (z=1)
+void curvepoint_fp_makeaffine(curvepoint_fp_t point)
+{
+	//signature;
+	if (!fpe_iszero(point->m_z))
+	{	
+		fpe_t tfpe1;
+		fpe_invert(tfpe1, point->m_z);
+		fpe_mul(point->m_x, point->m_x, tfpe1);
+		fpe_mul(point->m_x, point->m_x, tfpe1);
+
+		fpe_mul(point->m_y, point->m_y, tfpe1);
+		fpe_mul(point->m_y, point->m_y, tfpe1);
+		fpe_mul(point->m_y, point->m_y, tfpe1);
+
+		fpe_setone(point->m_z);
+	}
+}
+
+#endif
+
+#ifndef COMPLETE_ADDITION //UPDATE 512
+
+void curvepoint_fp_add_vartime(curvepoint_fp_t rop, const curvepoint_fp_t op1, const curvepoint_fp_t op2)
+{
+	//signature; 
+  if(fpe_iszero(op1->m_z))
+    curvepoint_fp_set(rop,op2);
+  else if(fpe_iszero(op2->m_z))
+    curvepoint_fp_set(rop,op1);
+  else
+  {
+    //See http://www.hyperelliptic.org/EFD/g1p/auto-code/shortw/jacobian-0/addition/add-2007-bl.op3
+    fpe_t z1z1, z2z2, r, v, s1, s2, u1, u2, h, i, j, t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14;
+    //Z1Z1 = Z1^2
+    fpe_square(z1z1, op1->m_z);
+    //Z2Z2 = Z2^2
+    fpe_square(z2z2, op2->m_z);
+    //U1 = X1*Z2Z2
+    fpe_mul(u1, op1->m_x, z2z2);
+    //U2 = X2*Z1Z1
+    fpe_mul(u2, op2->m_x, z1z1);
+    //t0 = Z2*Z2Z2
+    fpe_mul(t0, op2->m_z, z2z2);
+    //S1 = Y1*t0
+    fpe_mul(s1,op1->m_y,t0);
+    //t1 = Z1*Z1Z1
+    fpe_mul(t1,op1->m_z, z1z1);
+    //S2 = Y2*t1
+    fpe_mul(s2,op2->m_y,t1);
+    if(fpe_iseq(u1,u2))
+    {
+      if(fpe_iseq(s1,s2))
+        curvepoint_fp_double(rop,op1);
+      else
+        curvepoint_fp_setneutral(rop);
+    }
+    //H = U2-U1
+    fpe_sub(h,u2,u1);
+    //t2 = 2*H
+    fpe_add(t2, h, h);
+    //I = t2^2
+    fpe_short_coeffred(t2);
+    fpe_square(i,t2);
+    //J = H*I
+    fpe_mul(j,h,i);
+    //t3 = S2-S1
+    fpe_sub(t3,s2,s1);
+    //r = 2*t3
+    fpe_add(r,t3,t3);
+    //V = U1*I
+    fpe_mul(v,u1,i);
+    //t4 = r^2
+    fpe_short_coeffred(r);
+    fpe_square(t4,r);
+    //t5 = 2*V
+    fpe_add(t5,v,v);
+    //t6 = t4-J
+    fpe_sub(t6,t4,j);
+    //X3 = t6-t5
+    fpe_sub(rop->m_x,t6,t5);
+    fpe_short_coeffred(rop->m_x);
+    //t7 = V-X3
+    fpe_sub(t7,v,rop->m_x);
+    //t8 = S1*J
+    fpe_mul(t8,s1,j);
+    //t9 = 2*t8
+    fpe_add(t9,t8,t8);
+    //t10 = r*t7
+    fpe_mul(t10,r,t7);
+    //Y3 = t10-t9
+    fpe_sub(rop->m_y,t10,t9);
+    fpe_short_coeffred(rop->m_y);
+    //t11 = Z1+Z2
+    fpe_add(t11,op1->m_z,op2->m_z);
+    //t12 = t11^2
+    fpe_short_coeffred(t11);
+    fpe_square(t12,t11);
+    //t13 = t12-Z1Z1
+    fpe_sub(t13,t12,z1z1);
+    //t14 = t13-Z2Z2
+    fpe_sub(t14,t13,z2z2);
+    //Z3 = t14*H
+    fpe_mul(rop->m_z,t14,h);
+    fpe_short_coeffred(rop->m_z);
+  }
+  //curvepoint_fp_makeaffine(rop);
+}
+
+#endif
+
+
+
+static void curvepoint_fp_add_nocheck(curvepoint_fp_t rop, const curvepoint_fp_t op1, const curvepoint_fp_t op2)
+{
+  //See http://www.hyperelliptic.org/EFD/g1p/auto-code/shortw/jacobian-0/addition/add-2007-bl.op3
+  fpe_t z1z1, z2z2, r, v, s1, s2, u1, u2, h, i, j, t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14;
+  //Z1Z1 = Z1^2
+  fpe_square(z1z1, op1->m_z);
+  //Z2Z2 = Z2^2
+  fpe_square(z2z2, op2->m_z);
+  //U1 = X1*Z2Z2
+  fpe_mul(u1, op1->m_x, z2z2);
+  //U2 = X2*Z1Z1
+  fpe_mul(u2, op2->m_x, z1z1);
+  //t0 = Z2*Z2Z2
+  fpe_mul(t0, op2->m_z, z2z2);
+  //S1 = Y1*t0
+  fpe_mul(s1,op1->m_y,t0);
+  //t1 = Z1*Z1Z1
+  fpe_mul(t1,op1->m_z, z1z1);
+  //S2 = Y2*t1
+  fpe_mul(s2,op2->m_y,t1);
+  //H = U2-U1
+  fpe_sub(h,u2,u1);
+  //t2 = 2*H
+  fpe_add(t2, h, h);
+  //I = t2^2
+  fpe_short_coeffred(t2);
+  fpe_square(i,t2);
+  //J = H*I
+  fpe_mul(j,h,i);
+  //t3 = S2-S1
+  fpe_sub(t3,s2,s1);
+  //r = 2*t3
+  fpe_add(r,t3,t3);
+  //V = U1*I
+  fpe_mul(v,u1,i);
+  //t4 = r^2
+  fpe_short_coeffred(r);
+  fpe_square(t4,r);
+  //t5 = 2*V
+  fpe_add(t5,v,v);
+  //t6 = t4-J
+  fpe_sub(t6,t4,j);
+  //X3 = t6-t5
+  fpe_sub(rop->m_x,t6,t5);
+  fpe_short_coeffred(rop->m_x);
+  //t7 = V-X3
+  fpe_sub(t7,v,rop->m_x);
+  //t8 = S1*J
+  fpe_mul(t8,s1,j);
+  //t9 = 2*t8
+  fpe_add(t9,t8,t8);
+  //t10 = r*t7
+  fpe_mul(t10,r,t7);
+  //Y3 = t10-t9
+  fpe_sub(rop->m_y,t10,t9);
+  fpe_short_coeffred(rop->m_y);
+  //t11 = Z1+Z2
+  fpe_add(t11,op1->m_z,op2->m_z);
+  //t12 = t11^2
+  fpe_short_coeffred(t11);
+  fpe_square(t12,t11);
+  //t13 = t12-Z1Z1
+  fpe_sub(t13,t12,z1z1);
+  //t14 = t13-Z2Z2
+  fpe_sub(t14,t13,z2z2);
+  //Z3 = t14*H
+  fpe_mul(rop->m_z,t14,h);
+  fpe_short_coeffred(rop->m_z);
+}
+
+/*
+void curvepoint_fp_scalarmult_vartime_old(curvepoint_fp_t rop, const curvepoint_fp_t op, const scalar_t scalar, const unsigned int scalar_bitsize)
+{
+	size_t i;
+	curvepoint_fp_t r;
+	curvepoint_fp_set(r, op);
+	for(i = scalar_bitsize-1; i > 0; i--)
+	{
+		curvepoint_fp_double(r, r);
+		if(scalar_getbit(scalar, i - 1)) 
+			curvepoint_fp_mixadd(r, r, op);
+	}
+	curvepoint_fp_set(rop, r);
+}
+*/
+
+static void choose_t(curvepoint_fp_t t, struct curvepoint_fp_struct *pre, signed char b)
+{
+	//signature;
+			//zout((int)b);
+  if(b>0)
+    *t = pre[b-1];
+  else 
+  {
+    *t = pre[-b-1];
+    	    //printf("avant t = ");
+	//curvepoint_fp_print(stdout,t);
+	//printf("\n\n\n");
+    curvepoint_fp_neg(t,t);
+    	    //printf("apres t = ");
+	//curvepoint_fp_print(stdout,t);
+	//printf("\n\n\n");
+  }
+}
+
+void curvepoint_fp_scalarmult_vartime(curvepoint_fp_t rop, const curvepoint_fp_t op, const scalar_t scalar)
+{
+	signed char s[65]; 
+	int i; 
+	curvepoint_fp_t t;
+	struct curvepoint_fp_struct pre[8];
+	//zout(__builtin_return_address(0),__builtin_return_address(1),__func__,__PRETTY_FUNCTION__);
+	//cout << abi::__cxa_demangle(typeid(__func__).name(), 0, 0, 0);
+	scalar_window4(s,scalar);
+	//zout(__builtin_return_address(0),__func__);
+	//printf("\n");
+	//ecris(s = );
+	//for(i=0;i<64;i++)
+	//printf("%d ",s[i]);
+	//printf("\n");
+	pre[0] = *op;                                         //  P 
+	curvepoint_fp_double(&pre[1], &pre[0]);               // 2P
+	curvepoint_fp_add_vartime(&pre[2], &pre[0], &pre[1]); // 3P
+	curvepoint_fp_double(&pre[3], &pre[1]);               // 4P
+	curvepoint_fp_add_vartime(&pre[4], &pre[0], &pre[3]); // 5P
+	curvepoint_fp_double(&pre[5], &pre[2]);               // 6P
+	curvepoint_fp_add_vartime(&pre[6], &pre[0], &pre[5]); // 7P
+	curvepoint_fp_double(&pre[7], &pre[3]);               // 8P
+	//printf("\n\n\nP = ");  curvepoint_fp_print(stdout,&pre[0]);
+	//printf("\n\n\n2P = ");  curvepoint_fp_print(stdout,&pre[1]);
+	//printf("\n\n\n3P = ");  curvepoint_fp_print(stdout,&pre[2]);
+	//printf("\n\n\n4P = ");  curvepoint_fp_print(stdout,&pre[3]);
+	//printf("\n\n\n5P = ");  curvepoint_fp_print(stdout,&pre[4]);
+	//printf("\n\n\n6P = ");  curvepoint_fp_print(stdout,&pre[5]);
+	//printf("\n\n\n7P = ");  curvepoint_fp_print(stdout,&pre[6]);
+	//printf("\n\n\n8P = ");  curvepoint_fp_print(stdout,&pre[7]);
+	//printf("\n\n\n");
+	i = 64; 
+		
+	while(!s[i]&&i>0) i--;
+
+	if(!s[i]) 
+	{
+		curvepoint_fp_setneutral(rop);
+		//printf("\n\n\n rop = ");
+		//curvepoint_fp_print(stdout,rop);
+	}
+	else
+	{  
+		choose_t(rop,pre,s[i]);
+		i--;
+		for(;i>=0;i--)
+		{
+			//printf("i = %d\n",i);	
+			curvepoint_fp_double(rop, rop); 
+			curvepoint_fp_double(rop, rop);
+			curvepoint_fp_double(rop, rop);
+			curvepoint_fp_double(rop, rop);
+			if(s[i])
+			{
+				
+				//printf("i=%d \t s[i] = %d\n",i,s[i]);		
+				choose_t(t,pre,s[i]);
+				//printf("rop = ");
+				//curvepoint_fp_print(stdout,rop);
+				//printf("\n\n\n");
+				//printf("t = ");
+				//curvepoint_fp_print(stdout,t);
+				//printf("\n\n\n");
+				curvepoint_fp_add_vartime(rop,rop,t);
+			} 
+		}
+	}
+}
+
+// Negate a point, store in rop:
+void curvepoint_fp_neg(curvepoint_fp_t rop, const curvepoint_fp_t op)
+{
+	if (fpe_iszero(op->m_z))
+	{
+		curvepoint_fp_set(rop,op);
+	}
+    else
+    {
+		fpe_t tfpe1;
+		fpe_set(rop->m_x, op->m_x);
+		fpe_neg(rop->m_y, op->m_y);
+		fpe_set(rop->m_z, op->m_z);
+	}
+}
+
+
+
+// Print a point:
+void curvepoint_fp_print(FILE *outfile, const curvepoint_fp_t point)
+{
+	fprintf(outfile, "______________Curve______________\n\nX = ");
+	fpe_print(outfile, point->m_x);
+	fprintf(outfile, "\n\nY = ");
+	fpe_print(outfile, point->m_y);
+	fprintf(outfile, "\n\nZ = ");
+	fpe_print(outfile, point->m_z);
+	fprintf(outfile, "\n_________________________________\n");
+}
+

+ 62 - 0
dclxvi-20130329/curvepoint_fp.h

@@ -0,0 +1,62 @@
+/*
+ * File:   dclxvi-20130329/curvepoint_fp.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef CURVEPOINT_FP_H
+#define CURVEPOINT_FP_H
+
+#include <stdio.h>
+
+#include "fpe.h"
+#ifdef NEW_PARAMETERS
+#include "scalar_512.h"
+#else
+#include "scalar.h"
+#endif
+
+/// Structure describing a point on a BN-curve
+typedef struct curvepoint_fp_struct curvepoint_fp_struct_t;
+struct curvepoint_fp_struct
+{	
+	fpe_t m_x; // X-Coordinate (Jacobian Coordinate system)
+	fpe_t m_y; // Y-Coordinate (Jacobian Coordinate system)
+	fpe_t m_z; // Y-Coordinate (Jacobian Coordinate system)
+	fpe_t m_t; // T = Z^2, only used during pairing computation, set to zero if not set
+};
+
+typedef curvepoint_fp_struct_t curvepoint_fp_t[1];
+
+void curvepoint_fp_init(curvepoint_fp_t rop);
+
+void curvepoint_fp_init_set_str(curvepoint_fp_t rop, const char* x,const char* y,const char* z);
+
+void curvepoint_fp_init_set(curvepoint_fp_t rop, const curvepoint_fp_t op);
+
+void curvepoint_fp_init_set_fpe(curvepoint_fp_t rop, const fpe_t opx, const fpe_t opy);
+
+void curvepoint_fp_setneutral(curvepoint_fp_t rop);
+	
+// Generate a point on the curve
+void curvepoint_fp_set_str(curvepoint_fp_t point, const char* x,const char* y,const char* z);
+
+// Generate a curvepoint_fp_t by copying the coordinates from another curvepoint_fp
+void curvepoint_fp_set(curvepoint_fp_t point, const curvepoint_fp_t arg);
+
+void curvepoint_fp_add_vartime(curvepoint_fp_t rop, const curvepoint_fp_t op1, const curvepoint_fp_t op2);
+
+void curvepoint_fp_double(curvepoint_fp_t rop, const curvepoint_fp_t op);
+
+void curvepoint_fp_scalarmult_vartime(curvepoint_fp_t rop, const curvepoint_fp_t op, const scalar_t s);
+
+// Compute the Inverse of a Point op, store result in rop:
+void curvepoint_fp_neg(curvepoint_fp_t rop, const curvepoint_fp_t op);
+
+// Transform to Affine Coordinates (z=1)
+void curvepoint_fp_makeaffine(curvepoint_fp_t point);
+
+// Print the (Jacobian) coordinates of a point
+void curvepoint_fp_print(FILE *outfile, const curvepoint_fp_t point);
+
+#endif // ifdef CURVEPOINT_FP_H

+ 64 - 0
dclxvi-20130329/curvepoint_fp_multiscalar.c

@@ -0,0 +1,64 @@
+/*
+ * File:   dclxvi-20130329/curvepoint_fp_multiscalar.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include "curvepoint_fp.h"
+#include "scalar.h"
+#include "index_heap.h"
+
+#define MAX_HEAP_SIZE 63 // Can also try 127 or 255
+void curvepoint_fp_multiscalarmult_vartime(curvepoint_fp_t rop, struct curvepoint_fp_struct *p, scalar_t *s, const unsigned long long npoints)
+{
+  unsigned long long pos[MAX_HEAP_SIZE];
+  unsigned long long max1, max2,i;
+  curvepoint_fp_t t;
+  unsigned long long tctr, ctr = npoints;
+
+  curvepoint_fp_setneutral(rop);
+  while(ctr>=MAX_HEAP_SIZE)
+  {
+    heap_init(pos, MAX_HEAP_SIZE, s);
+    for(i=0;;i++)
+    {
+      heap_get2max(pos, &max1, &max2, s);
+      if(scalar_iszero_vartime(s[max2])) break;
+      scalar_sub_nored(s[max1],s[max1],s[max2]);
+      curvepoint_fp_add_vartime(&p[max2],&p[max2],&p[max1]);
+      heap_rootreplaced(pos, MAX_HEAP_SIZE, s);
+    }
+    curvepoint_fp_scalarmult_vartime(t, &p[max1], s[max1]);
+    curvepoint_fp_add_vartime(rop,rop,t);
+    p += MAX_HEAP_SIZE;
+    s += MAX_HEAP_SIZE;
+    ctr -= MAX_HEAP_SIZE;
+  }
+  if(ctr > 5)
+  {
+    tctr = (ctr-1)|1; // need an odd heap size 
+    heap_init(pos, tctr, s);
+    for(i=0;;i++)
+    {
+      heap_get2max(pos, &max1, &max2, s);
+      if(scalar_iszero_vartime(s[max2])) break;
+      scalar_sub_nored(s[max1],s[max1],s[max2]);
+      curvepoint_fp_add_vartime(&p[max2],&p[max2],&p[max1]);
+      heap_rootreplaced(pos, tctr, s);
+    }
+    curvepoint_fp_scalarmult_vartime(t, &p[max1], s[max1]);
+    curvepoint_fp_add_vartime(rop,rop,t);
+    p += tctr;
+    s += tctr;
+    ctr -= tctr;
+  }
+  while(ctr>0)
+  {
+    curvepoint_fp_scalarmult_vartime(t,p,*s);
+    curvepoint_fp_add_vartime(rop,rop,t);
+    p++;
+    s++;
+    ctr--;
+  }
+}
+

+ 12 - 0
dclxvi-20130329/curvepoint_fp_multiscalar.h

@@ -0,0 +1,12 @@
+/*
+ * File:   dclxvi-20130329/curvepoint_fp_multiscalar.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef CURVEPOINT_FP_MULTISCALAR_H
+#define CURVEPOINT_FP_MULTISCALAR_H
+
+void curvepoint_fp_multiscalarmult_vartime(curvepoint_fp_t rop, struct curvepoint_fp_struct *p, scalar_t *s, const unsigned long long npoints);
+
+#endif

+ 132 - 0
dclxvi-20130329/final_expo.c

@@ -0,0 +1,132 @@
+#ifndef NEW_PARAMETERS
+
+
+/*
+ * File:   dclxvi-20130329/final_expo.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include "final_expo.h"
+#include "fpe.h"
+
+extern const scalar_t bn_u;
+extern const scalar_t bn_v_scalar;
+extern const unsigned long bn_u_bitsize;
+  
+static void fp12e_powv_special_square(fp12e_t rop, const fp12e_t op)
+{
+    fp12e_t tmp0, tmp1, tmp2;
+    //XXX Implement
+    fp12e_special_square_finexp(tmp0,op);
+    fp12e_special_square_finexp(tmp0,tmp0);
+    fp12e_special_square_finexp(tmp0,tmp0);   // t0 = op^8
+    fp12e_special_square_finexp(tmp1,tmp0);
+    fp12e_special_square_finexp(tmp1,tmp1);
+    fp12e_special_square_finexp(tmp1,tmp1);   // t1 = op^64
+    fp12e_conjugate(tmp2, tmp0);              // t2 = op^-8
+    fp12e_mul(tmp2,tmp2,op);                  // t2 = op^-7
+    fp12e_mul(tmp2,tmp2,tmp1);                // tmp2 = op^57
+    fp12e_special_square_finexp(tmp2,tmp2);  
+    fp12e_special_square_finexp(tmp2,tmp2);  
+    fp12e_special_square_finexp(tmp2,tmp2);  
+    fp12e_special_square_finexp(tmp2,tmp2);  
+    fp12e_special_square_finexp(tmp2,tmp2);  
+    fp12e_special_square_finexp(tmp2,tmp2);  
+    fp12e_special_square_finexp(tmp2,tmp2);   // tmp2 = op^(2^7*57) = op^7296
+    fp12e_mul(tmp2,tmp2,op);                  // tmp2 = op^7297
+    fp12e_special_square_finexp(tmp2,tmp2);  
+    fp12e_special_square_finexp(tmp2,tmp2);  
+    fp12e_special_square_finexp(tmp2,tmp2);  
+    fp12e_special_square_finexp(tmp2,tmp2);  
+    fp12e_special_square_finexp(tmp2,tmp2);  
+    fp12e_special_square_finexp(tmp2,tmp2);  
+    fp12e_special_square_finexp(tmp2,tmp2);  
+    fp12e_special_square_finexp(tmp2,tmp2);  // tmp2 = op^(7297*256) = op^1868032
+    fp12e_mul(rop,tmp2,op);                  // rop  = op^v
+}
+
+static void fp12e_powu_special_square(fp12e_t rop, const fp12e_t op)
+{
+    fp12e_powv_special_square(rop, op);
+    fp12e_powv_special_square(rop, rop);
+    fp12e_powv_special_square(rop, rop);
+}
+
+void final_expo(fp12e_t rop)
+{
+  /* This all has to change to support scalar_t instead of mpz_t */
+	// First part: (p^6 - 1)
+	fp12e_t dummy1, dummy2, fp, fp2, fp3, fu, fu2, fu3, fu2p, fu3p, y0, y1, y2, y3, y4, y5, y6, t0, t1;
+	fp12e_set(dummy1, rop);
+	
+	// This is exactly the p^6-Frobenius action:
+	fp6e_neg(rop->m_a, rop->m_a);
+	
+	fp12e_invert(dummy2, dummy1);
+	fp12e_mul(rop, rop, dummy2);
+    // After this point, rop has norm 1, so we can use 
+    // special squaring and exponentiation.
+
+	// Second part: (p^2 + 1)
+	fp12e_set(dummy1, rop);
+	fp12e_frobenius_p2(rop, rop);
+	fp12e_mul(rop, rop, dummy1);
+
+  /* Hard part */
+  fp12e_frobenius_p(fp, rop);
+  fp12e_frobenius_p2(fp2, rop);
+  fp12e_frobenius_p(fp3, fp2);
+  
+  fp12e_powu_special_square(fu, rop);
+  fp12e_powu_special_square(fu2, fu);
+  fp12e_powu_special_square(fu3, fu2);
+  fp12e_frobenius_p(y3, fu);
+  fp12e_frobenius_p(fu2p, fu2);
+  fp12e_frobenius_p(fu3p, fu3);
+  fp12e_frobenius_p2(y2,fu2);
+  fp12e_mul(y0, fp, fp2);
+  fp12e_mul(y0, y0, fp3);
+  
+  fp12e_conjugate(y1, rop);
+  
+  fp12e_conjugate(y5, fu2);
+  fp12e_conjugate(y3, y3);
+  fp12e_mul(y4, fu, fu2p);
+  fp12e_conjugate(y4, y4);
+
+  fp12e_mul(y6, fu3, fu3p);
+  fp12e_conjugate(y6, y6);
+
+//t0 := fp12square(y6);
+  fp12e_special_square_finexp(t0, y6);
+//t0 := t0*y4;
+  fp12e_mul(t0, t0, y4);
+//t0 := t0*y5;
+  fp12e_mul(t0, t0, y5);
+//t1 := y3*y5;
+  fp12e_mul(t1, y3, y5);
+//t1 := t1*t0;
+  fp12e_mul(t1, t1, t0);
+//t0 := t0*y2;
+  fp12e_mul(t0, t0, y2);
+//t1 := t1^2;
+  fp12e_special_square_finexp(t1, t1);
+//t1 := t1*t0;
+  fp12e_mul(t1, t1, t0);
+//t1 := t1^2;
+  fp12e_special_square_finexp(t1, t1);
+//t0 := t1*y1;
+  fp12e_mul(t0, t1, y1);
+//t1 := t1*y0;
+  fp12e_mul(t1, t1, y0);
+//t0 := t0^2;
+  fp12e_special_square_finexp(t0, t0);
+//t0 := t0*t1;
+  fp12e_mul(rop, t0, t1);
+
+}
+
+
+#endif

+ 24 - 0
dclxvi-20130329/final_expo.h

@@ -0,0 +1,24 @@
+/*
+ * File:   dclxvi-20130329/final_expo.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef FINAL_EXPO_H
+#define FINAL_EXPO_H
+
+// AVANT
+//#include "fp12e.h"
+
+// APRES
+#include "mul.h"
+extern "C" {	
+#include "fpe.h"
+#include "fp2e.h"
+#include "fp6e.h"
+#include "fp12e.h"
+} 
+
+void final_expo(fp12e_t rop);
+
+#endif // ifdef FINAL_EXPO_H

+ 346 - 0
dclxvi-20130329/fp12e.c

@@ -0,0 +1,346 @@
+/*
+ * File:   dclxvi-20130329/fp12e.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+
+#include <stdio.h>
+#include <assert.h>
+
+
+//AVANT
+//#include "fp6e.h"
+//#include "fp12e.h"
+
+//APRES
+#include "mul.h"
+extern "C" {	
+#include "fpe.h"
+#include "fp2e.h"
+#include "fp6e.h"
+#include "fp12e.h"
+} 
+
+extern const fp2e_t bn_zpminus1;
+extern const fpe_t bn_zeta;
+
+
+
+// Set fp12e_t rop to given value:
+void fp12e_set(fp12e_t rop, const fp12e_t op)
+{
+  fp6e_set(rop->m_a, op->m_a);
+  fp6e_set(rop->m_b, op->m_b);
+}
+
+// Initialize an fp12e, set to value given in two fp6es
+void fp12e_set_fp6e(fp12e_t rop, const fp6e_t a, const fp6e_t b)
+{
+  fp6e_set(rop->m_a, a);
+  fp6e_set(rop->m_b, b);
+}
+
+// Set rop to one:
+void fp12e_setone(fp12e_t rop)
+{
+  fp6e_setzero(rop->m_a);
+  fp6e_setone(rop->m_b);
+}
+
+// Set rop to zero:
+void fp12e_setzero(fp12e_t rop)
+{
+  fp6e_setzero(rop->m_a);
+  fp6e_setzero(rop->m_b);
+}
+
+// Compare for equality:
+int fp12e_iseq(const fp12e_t op1, const fp12e_t op2)
+{
+  int ret = fp6e_iseq(op1->m_a, op2->m_a);
+  ret = ret && fp6e_iseq(op1->m_b, op2->m_b);
+  return ret;
+}
+
+int fp12e_isone(const fp12e_t op)
+{
+  int ret = fp6e_iszero(op->m_a);
+  ret = ret && fp6e_isone(op->m_b);
+  return ret;
+}
+
+int fp12e_iszero(const fp12e_t op)
+{
+  int ret = fp6e_iszero(op->m_a);
+  ret = ret && fp6e_iszero(op->m_b);
+  return ret;
+}
+
+void fp12e_cmov(fp12e_t rop, const fp12e_t op, int c)
+{
+  fp6e_cmov(rop->m_a, op->m_a, c);
+  fp6e_cmov(rop->m_b, op->m_b, c);
+}
+
+// Compute conjugate over Fp6:
+void fp12e_conjugate(fp12e_t rop, const fp12e_t op2)
+{
+  fp6e_neg(rop->m_a, op2->m_a);
+  fp6e_set(rop->m_b, op2->m_b);
+}
+
+// Add two fp12e, store result in rop:
+void fp12e_add(fp12e_t rop, const fp12e_t op1, const fp12e_t op2)
+{
+  fp6e_add(rop->m_a, op1->m_a, op2->m_a);
+  fp6e_add(rop->m_b, op1->m_b, op2->m_b);
+}
+
+// Subtract op2 from op1, store result in rop:
+void fp12e_sub(fp12e_t rop, const fp12e_t op1, const fp12e_t op2)
+{
+  fp6e_sub(rop->m_a, op1->m_a, op2->m_a);
+  fp6e_sub(rop->m_b, op1->m_b, op2->m_b);
+}
+
+// Multiply two fp12e, store result in rop:
+void fp12e_mul(fp12e_t rop, const fp12e_t op1, const fp12e_t op2)
+{
+#ifdef BENCH
+  nummultp12 ++;
+  multp12cycles -= cpucycles();
+#endif
+
+  fp6e_t tmp1, tmp2, tmp3; // Needed to store intermediary results
+
+  fp6e_mul(tmp1, op1->m_a, op2->m_a);
+  fp6e_mul(tmp3, op1->m_b, op2->m_b);
+
+  fp6e_add(tmp2, op2->m_a, op2->m_b);
+  fp6e_short_coeffred(tmp2);
+
+  fp6e_add(rop->m_a, op1->m_a, op1->m_b);
+  fp6e_short_coeffred(rop->m_a);
+  fp6e_set(rop->m_b, tmp3);
+
+  fp6e_mul(rop->m_a, rop->m_a, tmp2);
+  fp6e_sub(rop->m_a, rop->m_a, tmp1);
+  fp6e_sub(rop->m_a, rop->m_a, rop->m_b);
+  fp6e_short_coeffred(rop->m_a);
+  fp6e_multau(tmp1, tmp1);
+  fp6e_add(rop->m_b, rop->m_b, tmp1);
+  fp6e_short_coeffred(rop->m_b);
+#ifdef BENCH
+  multp12cycles += cpucycles();
+#endif
+}
+
+void fp12e_mul_fp6e(fp12e_t rop, const fp12e_t op1, const fp6e_t op2)
+{
+  fp6e_mul(rop->m_a, op1->m_a, op2);
+  fp6e_mul(rop->m_b, op1->m_b, op2);
+}
+
+// Square an fp12e, store result in rop:
+void fp12e_square(fp12e_t rop, const fp12e_t op)
+{
+#ifdef BENCH
+  numsqp12 ++;
+  sqp12cycles -= cpucycles();
+#endif
+  fp6e_t tmp1, tmp2, tmp3; // Needed to store intermediary results
+
+  fp6e_mul(tmp1, op->m_a, op->m_b);
+
+  fp6e_add(tmp2, op->m_a, op->m_b);
+  fp6e_short_coeffred(tmp2);
+  fp6e_multau(tmp3, op->m_a);
+  fp6e_add(rop->m_b, tmp3, op->m_b);
+  fp6e_short_coeffred(rop->m_b);
+  fp6e_mul(rop->m_b, rop->m_b, tmp2);
+
+  fp6e_sub(rop->m_b, rop->m_b, tmp1);
+  fp6e_multau(tmp2, tmp1);
+  fp6e_sub(rop->m_b, rop->m_b, tmp2);
+  fp6e_short_coeffred(rop->m_b);
+
+  fp6e_add(rop->m_a, tmp1, tmp1);
+  fp6e_short_coeffred(rop->m_a);
+#ifdef BENCH
+  sqp12cycles += cpucycles();
+#endif
+}
+
+// Multiply an fp12e by a line function value, store result in rop:
+// The line function is given by 3 fp2e elements op2, op3, op4 as
+// line = (op2*tau + op3)*z + op4 = a2*z + b2.
+void fp12e_mul_line(fp12e_t rop, const fp12e_t op1, const fp2e_t op2, const fp2e_t op3, const fp2e_t op4)
+{
+#ifdef BENCH
+  nummultp12 ++;
+  multp12cycles -= cpucycles();
+#endif
+
+  fp2e_t fp2_0, tmp;
+  fp6e_t tmp1, tmp2, tmp3; // Needed to store intermediary results
+
+  fp2e_setzero(fp2_0);                        // fp2_0 = 0
+  fp6e_set_fp2e(tmp1, fp2_0, op2, op3);       // tmp1 = a2 = op2*tau + op3
+  fp6e_mul_shortfp6e(tmp1, op1->m_a, tmp1);   // tmp1 = a1*a2 
+  fp6e_mul_fp2e(tmp3, op1->m_b, op4);         // tmp3 = b1*op4 = b1*b2
+
+  fp2e_add(tmp, op3, op4);
+  fp2e_short_coeffred(tmp);
+  fp6e_set_fp2e(tmp2, fp2_0, op2, tmp);       // tmp2 = a2 + b2
+  fp6e_add(rop->m_a, op1->m_a, op1->m_b);     // a3 = a1 + b1
+  fp6e_short_coeffred(rop->m_a);
+
+  fp6e_set(rop->m_b, tmp3);                   // b3 = b1*b2
+
+  fp6e_mul_shortfp6e(rop->m_a, rop->m_a, tmp2);// a3 = (a1+b1)*(a2+b2)
+  fp6e_sub(rop->m_a, rop->m_a, tmp1);
+  fp6e_sub(rop->m_a, rop->m_a, rop->m_b);     // a3 = a1*b2 + a2*b1
+  fp6e_short_coeffred(rop->m_a);
+  fp6e_multau(tmp1, tmp1);                    // tmp1 = a1*a2*tau
+  fp6e_add(rop->m_b, rop->m_b, tmp1);         // b3 = b1*b2 + a1*a2*tau
+  fp6e_short_coeffred(rop->m_b);
+#ifdef BENCH
+  multp12cycles += cpucycles();
+#endif
+}
+
+void fp12e_pow_vartime(fp12e_t rop, const fp12e_t op, const scalar_t exp)
+{
+  fp12e_t dummy;
+  unsigned int startbit;
+
+  startbit = scalar_scanb(exp);
+  fp12e_set(dummy, op);
+  fp12e_set(rop,op);
+  int i;
+  for(i = startbit; i > 0; i--)
+  {
+    fp12e_square(rop, rop);
+    if(scalar_getbit(exp, i - 1)) 
+      fp12e_mul(rop, rop, dummy);
+  }
+}
+
+// Implicit fp4 squaring for Granger/Scott special squaring in final expo
+// fp4e_square takes two fp2e op1, op2 representing the fp4 element 
+// op1*z^3 + op2, writes the square to rop1, rop2 representing rop1*z^3 + rop2.
+// (op1*z^3 + op2)^2 = (2*op1*op2)*z^3 + (op1^2*xi + op2^2).
+void fp4e_square(fp2e_t rop1, fp2e_t rop2, const fp2e_t op1, const fp2e_t op2)
+{
+  fp2e_t t1, t2; 
+
+  fp2e_square(t1, op1);        // t1 = op1^2
+  fp2e_square(t2, op2);        // t2 = op2^2
+
+  //fp2e_mul(rop1, op1, op2);    // rop1 = op1*op2
+  //fp2e_add(rop1, rop1, rop1);  // rop1 = 2*op1*op2
+  fp2e_add(rop1, op1, op2);
+  fp2e_short_coeffred(rop1);
+  fp2e_square(rop1, rop1);
+  fp2e_sub2(rop1, t1);
+  fp2e_sub2(rop1, t2);    // rop1 = 2*op1*op2
+
+  fp2e_mulxi(rop2, t1);        // rop2 = op1^2*xi
+  fp2e_add2(rop2, t2);    // rop2 = op1^2*xi + op2^2
+}
+
+// Special squaring for use on elements in T_6(fp2) (after the
+// easy part of the final exponentiation. Used in the hard part 
+// of the final exponentiation. Function uses formulas in
+// Granger/Scott (PKC2010).
+void fp12e_special_square_finexp(fp12e_t rop, const fp12e_t op)
+{
+  fp2e_t f00, f01, f02, f10, f11, f12;
+  fp2e_t t00, t01, t02, t10, t11, t12, t;
+  fp6e_t f0, f1;
+
+  fp4e_square(t11, t00, op->m_a->m_b, op->m_b->m_c);
+  fp4e_square(t12, t01, op->m_b->m_a, op->m_a->m_c);
+  fp4e_square(t02, t10, op->m_a->m_a, op->m_b->m_b);
+
+  fp2e_mulxi(t, t02);
+  fp2e_set(t02, t10);
+  fp2e_set(t10, t);
+
+  fp2e_mul_scalar(f00, op->m_b->m_c, -2);
+  fp2e_mul_scalar(f01, op->m_b->m_b, -2);
+  fp2e_mul_scalar(f02, op->m_b->m_a, -2);
+  fp2e_double(f10, op->m_a->m_c);
+  fp2e_double(f11, op->m_a->m_b);
+  fp2e_double(f12, op->m_a->m_a);
+
+  fp2e_triple2(t00);
+  fp2e_triple2(t01);
+  fp2e_triple2(t02);
+  fp2e_triple2(t10);
+  fp2e_triple2(t11);
+  fp2e_triple2(t12);
+
+  fp2e_add2(f00, t00);
+  fp2e_add2(f01, t01);
+  fp2e_add2(f02, t02);
+  fp2e_add2(f10, t10);
+  fp2e_add2(f11, t11);
+  fp2e_add2(f12, t12);
+
+  fp6e_set_fp2e(f0, f02, f01, f00);
+  fp6e_short_coeffred(f0);
+  fp6e_set_fp2e(f1, f12, f11, f10);
+  fp6e_short_coeffred(f1);
+  fp12e_set_fp6e(rop,f1,f0);
+}
+
+void fp12e_invert(fp12e_t rop, const fp12e_t op)
+{
+#ifdef BENCH
+  numinvp12 ++;
+  invp12cycles -= cpucycles();
+#endif
+  fp6e_t tmp1, tmp2; // Needed to store intermediary results
+
+  fp6e_squaredouble(tmp1, op->m_a);
+  fp6e_squaredouble(tmp2, op->m_b);
+  fp6e_multau(tmp1, tmp1);
+  fp6e_sub(tmp1, tmp2, tmp1);
+  fp6e_short_coeffred(tmp1);
+  fp6e_invert(tmp1, tmp1);
+  fp6e_add(tmp1,tmp1,tmp1);
+  fp6e_short_coeffred(tmp1);
+  fp12e_set(rop, op);
+  fp6e_neg(rop->m_a, rop->m_a);
+  fp12e_mul_fp6e(rop, rop, tmp1);
+#ifdef BENCH
+  invp12cycles += cpucycles();
+#endif
+}
+
+void fp12e_frobenius_p(fp12e_t rop, const fp12e_t op)
+{
+  fp6e_frobenius_p(rop->m_a, op->m_a);
+  fp6e_frobenius_p(rop->m_b, op->m_b);
+  fp6e_mul_fp2e(rop->m_a, rop->m_a, bn_zpminus1);
+}
+
+void fp12e_frobenius_p2(fp12e_t rop, const fp12e_t op)
+{
+  fp6e_t t;
+  fp6e_frobenius_p2(rop->m_a, op->m_a);
+  fp6e_frobenius_p2(rop->m_b, op->m_b);
+  fp6e_mul_fpe(t, rop->m_a, bn_zeta);
+  fp6e_neg(rop->m_a, t);
+}
+
+// Print the element to stdout:
+void fp12e_print(FILE *outfile, const fp12e_t op)
+{
+  fp6e_print(outfile, op->m_a);
+  fprintf(outfile, " * Z\n\n + ");
+  fp6e_print(outfile, op->m_b);
+}
+

+ 108 - 0
dclxvi-20130329/fp12e.h

@@ -0,0 +1,108 @@
+/*
+ * File:   dclxvi-20130329/fp12e.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef FP12E_H
+#define FP12E_H
+
+#include "fp6e.h"
+#ifdef NEW_PARAMETERS
+#include "scalar_512.h"
+#else
+#include "scalar.h"
+#endif
+
+#ifdef BENCH
+unsigned long long multp12cycles; unsigned long long nummultp12;
+unsigned long long sqp12cycles; unsigned long long numsqp12;
+unsigned long long sqp12norm1cycles; unsigned long long numsqp12norm1;
+unsigned long long invp12cycles; unsigned long long numinvp12;
+#endif
+
+// Elements from F_{p^{12}}= F_{p^6}[Z] / (Z^2 - tau)F_{p^6}[Z] are represented as aZ + b
+typedef struct fp12e_struct fp12e_struct_t;
+
+struct fp12e_struct
+{
+	fp6e_t m_a;
+	fp6e_t m_b;
+};
+
+typedef fp12e_struct_t fp12e_t[1];
+
+// Set fp12e_t rop to given value:
+void fp12e_set(fp12e_t rop, const fp12e_t op);
+
+// Initialize an fp12e, set to value given in two fp6es
+void fp12e_set_fp6e(fp12e_t rop, const fp6e_t a, const fp6e_t b);
+
+// Set rop to one:
+void fp12e_setone(fp12e_t rop);
+
+// Set rop to zero:
+void fp12e_setzero(fp12e_t rop);
+
+// Compare for equality:
+int fp12e_iseq(const fp12e_t op1, const fp12e_t op2);
+
+int fp12e_isone(const fp12e_t op);
+
+int fp12e_iszero(const fp12e_t op);
+
+void fp12e_cmov(fp12e_t rop, const fp12e_t op, int c);
+
+// Compute conjugate over Fp6:
+void fp12e_conjugate(fp12e_t rop, const fp12e_t op2);
+
+// Add two fp12e, store result in rop:
+void fp12e_add(fp12e_t rop, const fp12e_t op1, const fp12e_t op2);
+
+// Subtract op2 from op1, store result in rop:
+void fp12e_sub(fp12e_t rop, const fp12e_t op1, const fp12e_t op2);
+
+// Multiply two fp12e, store result in rop:
+void fp12e_mul(fp12e_t rop, const fp12e_t op1, const fp12e_t op2);
+
+void fp12e_mul_fp6e(fp12e_t rop, const fp12e_t op1, const fp6e_t op2);
+
+// Square an fp12e, store result in rop:
+void fp12e_square(fp12e_t rop, const fp12e_t op);
+
+// Multiply an fp12e by a line function value, store result in rop:
+// The line function is given by 3 fp2e elements op2, op3, op4 as
+// line = (op2*tau + op3)*z + op4 = a2*z + b2.
+void fp12e_mul_line(fp12e_t rop, const fp12e_t op1, const fp2e_t op2, const fp2e_t op3, const fp2e_t op4);
+
+void fp12e_pow_vartime(fp12e_t rop, const fp12e_t op, const scalar_t exp);
+
+//void fp12e_pow_norm1(fp12e_t rop, const fp12e_t op, const scalar_t exp, const unsigned int exp_bitsize);
+
+// Implicit fp4 squaring for Granger/Scott special squaring in final expo
+// fp4e_square takes two fp2e op1, op2 representing the fp4 element 
+// op1*z^3 + op2, writes the square to rop1, rop2 representing rop1*z^3 + rop2.
+// (op1*z^3 + op2)^2 = (2*op1*op2)*z^3 + (op1^2*xi + op2^2).
+void fp4e_square(fp2e_t rop1, fp2e_t rop2, const fp2e_t op1, const fp2e_t op2);
+
+// Special squaring for use on elements in T_6(fp2) (after the
+// easy part of the final exponentiation. Used in the hard part 
+// of the final exponentiation. Function uses formulas in
+// Granger/Scott (PKC2010).
+void fp12e_special_square_finexp(fp12e_t rop, const fp12e_t op);
+
+void fp12e_invert(fp12e_t rop, const fp12e_t op);
+
+void fp12e_frobenius_p(fp12e_t rop, const fp12e_t op);
+
+void fp12e_frobenius_p2(fp12e_t rop, const fp12e_t op);
+
+// Scalar multiple of an fp12e, store result in rop:
+void fp12e_mul_scalar(fp12e_t rop, const fp12e_t op1, const scalar_t op2);
+
+// Print the element to stdout:
+void fp12e_print(FILE *outfile, const fp12e_t op);
+
+
+
+#endif // ifndef FP12E_H

+ 604 - 0
dclxvi-20130329/fp2e.c

@@ -0,0 +1,604 @@
+/*
+ * File:   dclxvi-20130329/fp2e.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+#include "cpucycles.h"
+#ifdef NEW_PARAMETERS
+#include "scalar_512.h"
+#else
+#include "scalar.h"
+#endif
+
+#include "mul.h"
+extern "C" {	
+#include "fpe.h"
+#include "fp2e.h"
+} 
+
+
+extern const double bn_v;
+extern const double bn_v6;
+extern const scalar_t bn_pminus2;
+  
+#ifdef N_OPS
+unsigned long long mulfp2ctr;
+unsigned long long mulfp2fpctr;
+unsigned long long sqfp2ctr;
+unsigned long long invfp2ctr;
+unsigned long long double2fp2ctr;
+unsigned long long doublefp2ctr;
+unsigned long long triple2fp2ctr;
+unsigned long long triplefp2ctr;
+unsigned long long mul_scalarfp2ctr;
+unsigned long long add2fp2ctr;
+unsigned long long addfp2ctr;
+unsigned long long sub2fp2ctr;
+unsigned long long subfp2ctr;
+unsigned long long neg2fp2ctr;
+unsigned long long negfp2ctr;
+unsigned long long mulxifp2ctr;
+unsigned long long conjugatefp2ctr;
+unsigned long long short_coeffredfp2ctr;
+#endif
+
+#ifndef QHASM
+void fp2e_short_coeffred_c(fp2e_t rop)
+{
+#ifdef N_OPS
+  short_coeffredfp2ctr++;
+#endif
+    mydouble carry11 = round(rop->v[22]/bn_v);
+    mydouble carry11b = round(rop->v[23]/bn_v);
+    rop->v[22] = remround(rop->v[22],bn_v);
+    rop->v[23] = remround(rop->v[23],bn_v);
+    rop->v[0] = rop->v[0] - carry11;
+    rop->v[1] = rop->v[1] - carry11b;
+    rop->v[6] = rop->v[6] - carry11;
+    rop->v[7] = rop->v[7] - carry11b;
+    rop->v[12] = rop->v[12] - 4*carry11;
+    rop->v[13] = rop->v[13] - 4*carry11b;
+    rop->v[18] = rop->v[18] - carry11;
+    rop->v[19] = rop->v[19] - carry11b;
+
+    mydouble carry1 = round(rop->v[2]/bn_v);
+    mydouble carry1b = round(rop->v[3]/bn_v);
+    rop->v[2] = remround(rop->v[2],bn_v);
+    rop->v[3] = remround(rop->v[3],bn_v);
+    rop->v[4] += carry1;
+    rop->v[5] += carry1b;
+    
+    mydouble carry3 = round(rop->v[6]/bn_v);
+    mydouble carry3b = round(rop->v[7]/bn_v);
+    rop->v[6] = remround(rop->v[6],bn_v);
+    rop->v[7] = remround(rop->v[7],bn_v);
+    rop->v[8] += carry3;
+    rop->v[9] += carry3b;
+
+    mydouble carry5 = round(rop->v[10]/bn_v);
+    mydouble carry5b = round(rop->v[11]/bn_v);
+    rop->v[10] = remround(rop->v[10],bn_v);
+    rop->v[11] = remround(rop->v[11],bn_v);
+    rop->v[12] += carry5;
+    rop->v[13] += carry5b;
+
+    mydouble carry7 = round(rop->v[14]/bn_v);
+    mydouble carry7b = round(rop->v[15]/bn_v);
+    rop->v[14] = remround(rop->v[14],bn_v);
+    rop->v[15] = remround(rop->v[15],bn_v);
+    rop->v[16] += carry7;
+    rop->v[17] += carry7b;
+
+    mydouble carry9 = round(rop->v[18]/bn_v);
+    mydouble carry9b = round(rop->v[19]/bn_v);
+    rop->v[18] = remround(rop->v[18],bn_v);
+    rop->v[19] = remround(rop->v[19],bn_v);
+    rop->v[20] += carry9;
+    rop->v[21] += carry9b;
+
+    mydouble carry0 = round(rop->v[0]/bn_v6);
+    mydouble carry0b = round(rop->v[1]/bn_v6);
+    rop->v[0] = remround(rop->v[0],bn_v6);
+    rop->v[1] = remround(rop->v[1],bn_v6);
+    rop->v[2] += carry0;
+    rop->v[3] += carry0b;
+
+    mydouble carry2 = round(rop->v[4]/bn_v);
+    mydouble carry2b = round(rop->v[5]/bn_v);
+    rop->v[4] = remround(rop->v[4],bn_v);
+    rop->v[5] = remround(rop->v[5],bn_v);
+    rop->v[6] += carry2;
+    rop->v[7] += carry2b;
+
+    mydouble carry4 = round(rop->v[8]/bn_v);
+    mydouble carry4b = round(rop->v[9]/bn_v);
+    rop->v[8] = remround(rop->v[8],bn_v);
+    rop->v[9] = remround(rop->v[9],bn_v);
+    rop->v[10] += carry4;
+    rop->v[11] += carry4b;
+
+    mydouble carry6 = round(rop->v[12]/bn_v6);
+    mydouble carry6b = round(rop->v[13]/bn_v6);
+    rop->v[12] = remround(rop->v[12],bn_v6);
+    rop->v[13] = remround(rop->v[13],bn_v6);
+    rop->v[14] += carry6;
+    rop->v[15] += carry6b;
+
+    mydouble carry8 = round(rop->v[16]/bn_v);
+    mydouble carry8b = round(rop->v[17]/bn_v);
+    rop->v[16] = remround(rop->v[16],bn_v);
+    rop->v[17] = remround(rop->v[17],bn_v);
+    rop->v[18] += carry8;
+    rop->v[19] += carry8b;
+
+    mydouble carry10 = round(rop->v[20]/bn_v);
+    mydouble carry10b = round(rop->v[21]/bn_v);
+    rop->v[20] = remround(rop->v[20],bn_v);
+    rop->v[21] = remround(rop->v[21],bn_v);
+    rop->v[22] += carry10;
+    rop->v[23] += carry10b;
+}
+#endif
+
+void fp2e_to_2fpe(fpe_t ropa, fpe_t ropb, const fp2e_t op)//ai+b
+{
+  int i;
+  for(i=0;i<12;i++)
+  {
+    ropb->v[i] = op->v[2*i];
+    ropa->v[i] = op->v[2*i+1];
+  }
+}
+
+void _2fpe_to_fp2e(fp2e_t rop, const fpe_t opa, const fpe_t opb)//ai+b
+{
+  int i;
+  for(i=0;i<12;i++)
+  {
+    rop->v[2*i] = opb->v[i];
+    rop->v[2*i+1] = opa->v[i];
+  }
+}
+
+// Set fp2e_t rop to given value:
+void fp2e_set(fp2e_t rop, const fp2e_t op)
+{
+  int i;
+  for(i=0;i<24;i++) 
+    rop->v[i] = op->v[i];
+}
+
+/* Communicate the fact that the fp2e is reduced (and that we don't know anything more about it) */
+void fp2e_isreduced(fp2e_t rop)
+{
+  setmax(rop->v[0],(long)bn_v6/2);
+  setmax(rop->v[1],(long)bn_v6/2);
+  setmax(rop->v[12],(long)bn_v6/2);
+  setmax(rop->v[13],(long)bn_v6/2);
+
+  setmax(rop->v[2],(long)bn_v/2);
+  setmax(rop->v[3],(long)bn_v/2);
+  setmax(rop->v[6],(long)bn_v/2);
+  setmax(rop->v[7],(long)bn_v/2);
+  setmax(rop->v[8],(long)bn_v/2);
+  setmax(rop->v[9],(long)bn_v/2);
+  setmax(rop->v[14],(long)bn_v/2);
+  setmax(rop->v[15],(long)bn_v/2);
+  setmax(rop->v[18],(long)bn_v/2);
+  setmax(rop->v[19],(long)bn_v/2);
+  setmax(rop->v[20],(long)bn_v/2);
+  setmax(rop->v[21],(long)bn_v/2);
+
+  //XXX: Change additive constant
+  setmax(rop->v[4],(long)bn_v/2+2331); /* TODO: Think about value */
+  setmax(rop->v[5],(long)bn_v/2+2331); /* TODO: Think about value */
+  setmax(rop->v[10],(long)bn_v/2+2331); /* TODO: Think about value */
+  setmax(rop->v[11],(long)bn_v/2+2331); /* TODO: Think about value */
+  setmax(rop->v[16],(long)bn_v/2+2331); /* TODO: Think about value */
+  setmax(rop->v[17],(long)bn_v/2+2331); /* TODO: Think about value */
+  setmax(rop->v[22],(long)bn_v/2+2331); /* TODO: Think about value */
+  setmax(rop->v[23],(long)bn_v/2+2331); /* TODO: Think about value */
+}
+
+// Set fp2e_t rop to given value:
+void fp2e_set_fpe(fp2e_t rop, const fpe_t op)
+{
+  int i;
+  for(i=0;i<12;i++) 
+  {
+    rop->v[2*i] = op->v[i];
+    rop->v[2*i+1] = 0;
+  }
+}
+
+// Set rop to one
+void fp2e_setone(fp2e_t rop)
+{
+  int i;
+  for(i=1;i<24;i++) 
+    rop->v[i] = 0;
+  rop->v[0] = 1.;
+}
+
+// Set rop to zero
+void fp2e_setzero(fp2e_t rop)
+{
+  int i;
+  for(i=0;i<24;i++) 
+    rop->v[i] = 0;
+}
+
+// Compare for equality:
+int fp2e_iseq(const fp2e_t op1, const fp2e_t op2)
+{
+  fpe_t a1,b1,a2,b2;
+  fp2e_to_2fpe(a1,b1,op1);
+  fp2e_to_2fpe(a2,b2,op2);
+  return fpe_iseq(a1,a2) && fpe_iseq(b1,b2);
+}
+
+int fp2e_isone(const fp2e_t op)
+{
+  fpe_t ta, tb;
+  fp2e_to_2fpe(ta, tb, op);
+  int ret = fpe_iszero(ta);
+  ret = ret && fpe_isone(tb);
+  return ret;
+}
+
+int fp2e_iszero(const fp2e_t op)
+{
+  fpe_t ta, tb;
+  fp2e_to_2fpe(ta, tb, op);
+  int ret = fpe_iszero(ta);
+  ret = ret && fpe_iszero(tb);
+  return ret;
+}
+
+void fp2e_cmov(fp2e_t rop, const fp2e_t op, int c)
+{
+  int i;
+  for(i=0;i<24;i++)
+    rop->v[i] = (1-c)*rop->v[i] + c*op->v[i];
+}
+
+#ifndef QHASM
+// Double an fp2e:
+void fp2e_double2_c(fp2e_t rop)
+{
+#ifdef N_OPS
+  double2fp2ctr++;
+#endif
+  int i;
+  for(i=0;i<24;i++)
+    rop->v[i] = 2*rop->v[i];
+}
+#endif
+
+#ifndef QHASM
+// Double an fp2e:
+void fp2e_double_c(fp2e_t rop, const fp2e_t op)
+{
+#ifdef N_OPS
+  doublefp2ctr++;
+#endif
+  int i;
+  for(i=0;i<24;i++)
+    rop->v[i] = 2*op->v[i];
+}
+#endif
+
+#ifndef QHASM
+// Triple an fp2e:
+void fp2e_triple2_c(fp2e_t rop)
+{
+#ifdef N_OPS
+  triple2fp2ctr++;
+#endif
+  int i;
+  for(i=0;i<24;i++)
+    rop->v[i] = 3*rop->v[i];
+}
+#endif
+
+#ifndef QHASM
+// Triple an fp2e:
+void fp2e_triple_c(fp2e_t rop, const fp2e_t op)
+{
+#ifdef N_OPS
+  triplefp2ctr++;
+#endif
+  int i;
+  for(i=0;i<24;i++)
+    rop->v[i] = 3*op->v[i];
+}
+#endif
+
+void fp2e_mul_scalar(fp2e_t rop, const fp2e_t op, const int s)
+{
+#ifdef N_OPS
+  mul_scalarfp2ctr++;
+#endif
+  int i;
+  for(i=0;i<24;i++)
+    rop->v[i] = s*op->v[i];
+}
+
+// Add two fp2e, store result in op1:
+#ifndef QHASM
+void fp2e_add2_c(fp2e_t op1, const fp2e_t op2)
+{
+#ifdef N_OPS
+  add2fp2ctr++;
+#endif
+  int i;
+  for(i=0;i<24;i++)
+    op1->v[i] += op2->v[i];
+}
+#endif
+
+#ifndef QHASM
+// Add two fp2e, store result in rop:
+void fp2e_add_c(fp2e_t rop, const fp2e_t op1, const fp2e_t op2)
+{
+#ifdef N_OPS
+  addfp2ctr++;
+#endif
+  int i;
+  for(i=0;i<24;i++)
+    rop->v[i] = op1->v[i] + op2->v[i];
+}
+#endif
+
+#ifndef QHASM
+// Subtract op2 from op1, store result in op1:
+void fp2e_sub2_c(fp2e_t op1, const fp2e_t op2)
+{
+#ifdef N_OPS
+  sub2fp2ctr++;
+#endif
+  int i;
+  for(i=0;i<24;i++)
+    op1->v[i] -= op2->v[i];
+}
+#endif
+
+#ifndef QHASM
+// Subtract op2 from op1, store result in rop:
+void fp2e_sub_c(fp2e_t rop, const fp2e_t op1, const fp2e_t op2)
+{
+#ifdef N_OPS
+  subfp2ctr++;
+#endif
+  int i;
+  for(i=0;i<24;i++)
+    rop->v[i] = op1->v[i] - op2->v[i];
+}
+#endif
+
+#ifndef QHASM
+// Negate op
+void fp2e_neg2_c(fp2e_t op)
+{
+#ifdef N_OPS
+  neg2fp2ctr++;
+#endif
+  int i;
+  for(i=0;i<24;i++)
+    op->v[i] = -op->v[i];
+}
+#endif
+
+#ifndef QHASM
+// Negate op
+void fp2e_neg_c(fp2e_t rop, const fp2e_t op)
+{
+#ifdef N_OPS
+  negfp2ctr++;
+#endif
+  int i;
+  for(i=0;i<24;i++)
+    rop->v[i] = -op->v[i];
+}
+#endif
+
+#ifndef QHASM
+// Conjugates: aX+b to -aX+b
+void fp2e_conjugate_c(fp2e_t rop, const fp2e_t op)
+{
+#ifdef N_OPS
+  conjugatefp2ctr++;
+#endif
+  int i;
+  for(i=0;i<24;i+=2)
+  {
+    rop->v[i] = op->v[i];
+    rop->v[i+1] = op->v[i+1] * (-1);
+  }
+}
+#endif
+
+#ifndef QHASM
+// Multiply two fp2e, store result in rop:
+void fp2e_mul_c(fp2e_t rop, const fp2e_t op1, const fp2e_t op2)
+{
+#ifdef N_OPS
+  mulfp2ctr += 1;
+#endif
+  fpe_t a1, b1, a2, b2, r1, r2;
+  mydouble a3[24], b3[24];
+  int i;
+  mydouble t0[24], t1[24], t2[24], t3[24];
+
+  fp2e_to_2fpe(a1, b1, op1);
+  fp2e_to_2fpe(a2, b2, op2);
+
+  polymul(t1, a1->v, b2->v);          // t1 = a1*b2
+  polymul(t2, b1->v, a2->v);          // t2 = b1*a2
+
+  for(i=0; i<12; i++)                 // t3 = 1*a1
+  {
+    t3[i] = 1*a1->v[i];
+  }
+  polymul(t3, t3, a2->v);             // t3 = 1*a1*a2
+  polymul(t0, b1->v, b2->v);             // t0 = b1*b2
+
+  for(i=0; i<23; i++)
+  {
+    a3[i] = t1[i] + t2[i];            // a3 = a1*b2 + b1*a2
+    b3[i] = t0[i] - t3[i];            // b3 = b1*b2 - 1*a1*a2
+  }
+  degred(a3);
+  degred(b3);
+  coeffred_round_par(a3);
+  coeffred_round_par(b3);
+
+  fpe_set_doublearray(r1, a3);
+  fpe_set_doublearray(r2, b3);
+  _2fpe_to_fp2e(rop, r1, r2);
+}
+#endif
+
+#ifndef QHASM
+// Square an fp2e, store result in rop:
+void fp2e_square_c(fp2e_t rop, const fp2e_t op)
+{
+#ifdef N_OPS
+  sqfp2ctr += 1;
+#endif
+  fpe_t a1, b1, r1, r2;
+  mydouble ropa[24], ropb[24];
+  fp2e_to_2fpe(a1, b1, op);
+  int i;
+
+/* CheckDoubles are not smart enough to recognize 
+ * binomial formula to compute b^2-a^2 */
+#ifdef CHECK
+  mydouble d1[24];
+  polymul(d1, a1->v, a1->v);
+  polymul(ropb, b1->v, b1->v);
+  polymul(ropa, b1->v, a1->v);
+  for(i=0;i<23;i++)
+  {
+    ropb[i] -= d1[i];
+    ropa[i] *= 2;
+  }
+#else
+  fpe_t t1, t2, t3;
+  for(i=0;i<12;i++)
+  {
+    t1->v[i] = a1->v[i] + b1->v[i];
+    t2->v[i] = b1->v[i] - a1->v[i];
+    t3->v[i] = 2*b1->v[i];
+  }
+  polymul(ropa, a1->v, t3->v);
+  polymul(ropb, t1->v, t2->v);
+#endif
+
+  degred(ropa);
+  degred(ropb);
+  coeffred_round_par(ropa);
+  coeffred_round_par(ropb);
+
+  fpe_set_doublearray(r1, ropa);
+  fpe_set_doublearray(r2, ropb);
+  _2fpe_to_fp2e(rop, r1, r2);
+}
+#endif
+
+#ifndef  NEW_PARAMETERS
+#ifndef QHASM
+// Multiply by xi=i+3 which is used to construct F_p^6
+// (a*i + b)*(i + 3) = (3*b - 1*a) + (3*a + b)*i
+void fp2e_mulxi_c(fp2e_t rop, const fp2e_t op)
+{
+#ifdef N_OPS
+  mulxifp2ctr++;
+#endif
+  fpe_t a, b, t1, t2, t3, t4, t5;
+  fp2e_to_2fpe(a, b, op);
+  int i;
+  for(i=0; i<12; i++)
+  {
+    t1->v[i] = 3*a->v[i];       // t1 = 3*a
+    t2->v[i] = 3*b->v[i];       // t2 = 3*b
+    t3->v[i] = 1*a->v[i];       // t3 = 1*a
+  }
+  fpe_add(t4, t1, b);           // t4 = 3*a + b
+  fpe_sub(t5, t2, t3);          // t5 = 3*b - 1*a
+  _2fpe_to_fp2e(rop, t4, t5);
+}
+#endif
+#endif
+
+
+
+// Scalar multiple of an fp2e, store result in rop:
+#ifndef QHASM
+void fp2e_mul_fpe_c(fp2e_t rop, const fp2e_t op1, const fpe_t op2)
+{
+#ifdef N_OPS
+  mulfp2fpctr += 1;
+#endif
+  fpe_t a1,b1;
+  fp2e_to_2fpe(a1,b1,op1);
+  fpe_mul(a1,a1,op2);
+  fpe_mul(b1,b1,op2);
+  _2fpe_to_fp2e(rop,a1,b1);
+}
+#endif
+
+#ifndef QHASM
+/* computes (op1->m_a*op2->m_a, op1->m_b*op2->m_b) */
+void fp2e_parallel_coeffmul_c(fp2e_t rop, const fp2e_t op1, const fp2e_t op2)
+{
+  fpe_t a1, b1, a2, b2;  // Needed for intermediary results
+  fp2e_to_2fpe(a1,b1,op1);
+  fp2e_to_2fpe(a2,b2,op2);
+  fpe_mul(a1, a1, a2);
+  fpe_mul(b1, b1, b2);
+  _2fpe_to_fp2e(rop, a1, b1);
+}
+#endif
+
+// Inverse multiple of an fp2e, store result in rop:
+void fp2e_invert(fp2e_t rop, const fp2e_t op)
+{
+#ifdef N_OPS
+  invfp2ctr += 1;
+#endif
+  /* New version */
+  fp2e_t d1, d2;
+  int i;
+  fp2e_parallel_coeffmul(d1, op, op); 
+  for(i=0;i<24;i+=2)
+    d1->v[i] = d1->v[i+1] = d1->v[i] + d1->v[i+1];
+  fp2e_short_coeffred(d1);
+  for(i=0;i<24;i+=2)
+  {
+    d2->v[i] = op->v[i];
+    d2->v[i+1] = -op->v[i+1];
+  }
+  fp2e_set(rop,d1);
+  for(i = 254; i >= 0; i--)
+  {
+    fp2e_parallel_coeffmul(rop,rop,rop);
+    if(scalar_getbit(bn_pminus2, i))
+      fp2e_parallel_coeffmul(rop,rop,d1);
+  }
+  fp2e_parallel_coeffmul(rop,rop,d2);
+}
+
+// Print the fp2e:
+void fp2e_print(FILE *outfile, const fp2e_t op)
+{
+  fpe_t a,b;
+  fp2e_to_2fpe(a,b,op);
+  fpe_print(outfile, a);
+  fprintf(outfile," * X + ");
+  fpe_print(outfile, b);
+}

+ 211 - 0
dclxvi-20130329/fp2e.h

@@ -0,0 +1,211 @@
+/*
+ * File:   dclxvi-20130329/fp2e.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef FP2E_H
+#define FP2E_H
+
+#include <stdio.h>
+#include "fpe.h"
+#include "mydouble.h"
+
+
+// Elements from F_{p^2}= F_p[X] / (x^2 - alpha)F_p[X] are represented as aX + b
+typedef struct fp2e_struct
+{
+  // Arrangement in memory: (b0, a0, b1, a1, ... b11,a11)  
+  mydouble v[24];
+} __attribute__ ((aligned (16))) fp2e_struct_t;
+
+typedef fp2e_struct_t fp2e_t[1];
+
+void fp2e_to_2fpe(fpe_t ropa, fpe_t ropb, const fp2e_t op);
+void _2fpe_to_fp2e(fp2e_t rop, const fpe_t opa, const fpe_t opb);
+
+#ifdef QHASM
+#define fp2e_short_coeffred fp2e_short_coeffred_qhasm
+#else
+#define fp2e_short_coeffred fp2e_short_coeffred_c
+#endif
+void fp2e_short_coeffred(fp2e_t rop);
+
+// Set fp2e_t rop to given value:
+void fp2e_set(fp2e_t rop, const fp2e_t op);
+
+/* Communicate the fact that the fp2e is reduced (and that we don't know anything more about it) */
+void fp2e_isreduced(fp2e_t rop);
+
+// Set fp2e_t rop to given value contained in the subfield F_p:
+void fp2e_set_fpe(fp2e_t rop, const fpe_t op);
+
+// Set rop to one
+void fp2e_setone(fp2e_t rop);
+
+// Set rop to zero
+void fp2e_setzero(fp2e_t rop);
+
+// Compare for equality:
+int fp2e_iseq(const fp2e_t op1, const fp2e_t op2);
+
+int fp2e_isone(const fp2e_t op);
+
+int fp2e_iszero(const fp2e_t op);
+
+void fp2e_cmov(fp2e_t rop, const fp2e_t op, int c);
+
+#ifdef QHASM
+#define fp2e_double fp2e_double_qhasm
+#else
+#define fp2e_double fp2e_double_c
+#endif
+// Double an fp2e:
+void fp2e_double(fp2e_t rop, const fp2e_t op);
+
+// Double an fp2e:
+#ifdef QHASM
+#define fp2e_double2 fp2e_double2_qhasm
+#else
+#define fp2e_double2 fp2e_double2_c
+#endif
+void fp2e_double2(fp2e_t rop);
+
+#ifdef QHASM
+#define fp2e_triple fp2e_triple_qhasm
+#else
+#define fp2e_triple fp2e_triple_c
+#endif
+// Triple an fp2e:
+void fp2e_triple(fp2e_t rop, const fp2e_t op);
+
+// Triple an fp2e:
+#ifdef QHASM
+#define fp2e_triple2 fp2e_triple2_qhasm
+#else
+#define fp2e_triple2 fp2e_triple2_c
+#endif
+void fp2e_triple2(fp2e_t rop);
+
+void fp2e_mul_scalar(fp2e_t rop, const fp2e_t op, const int s);
+
+#ifdef QHASM
+#define fp2e_add fp2e_add_qhasm
+#else
+#define fp2e_add fp2e_add_c
+#endif
+// Add two fp2e, store result in rop:
+void fp2e_add(fp2e_t rop, const fp2e_t op1, const fp2e_t op2);
+
+// Add rop to up, store result in rop:
+#ifdef QHASM
+#define fp2e_add2 fp2e_add2_qhasm
+#else
+#define fp2e_add2 fp2e_add2_c
+#endif
+void fp2e_add2(fp2e_t rop, const fp2e_t op);
+
+// Load from mem
+void fp2e_load(fp2e_struct_t *rop, const fp2e_t op);
+//void fp2e_load(fp2e_t rop, const fp2e_t op);
+
+// store to mem
+void fp2e_store(fp2e_struct_t *rop, const fp2e_t op);
+//void fp2e_store(fp2e_t rop, const fp2e_t op);
+
+#ifdef QHASM
+#define fp2e_sub fp2e_sub_qhasm
+#else
+#define fp2e_sub fp2e_sub_c
+#endif
+// Subtract op2 from op1, store result in rop:
+void fp2e_sub(fp2e_t rop, const fp2e_t op1, const fp2e_t op2);
+
+#ifdef QHASM
+#define fp2e_sub2 fp2e_sub2_qhasm
+#else
+#define fp2e_sub2 fp2e_sub2_c
+#endif
+// Subtract op from rop, store result in rop:
+void fp2e_sub2(fp2e_t rop, const fp2e_t op);
+
+#ifdef QHASM
+#define fp2e_neg2 fp2e_neg2_qhasm
+#else
+#define fp2e_neg2 fp2e_neg2_c
+#endif
+void fp2e_neg2(fp2e_t op);
+
+#ifdef QHASM
+#define fp2e_neg fp2e_neg_qhasm
+#else
+#define fp2e_neg fp2e_neg_c
+#endif
+void fp2e_neg(fp2e_t rop, const fp2e_t op);
+
+#ifdef QHASM
+#define fp2e_conjugate fp2e_conjugate_qhasm
+#else
+#define fp2e_conjugate fp2e_conjugate_c
+#endif
+// Conjugates: aX+b to -aX+b
+void fp2e_conjugate(fp2e_t rop, const fp2e_t op);
+
+#ifdef QHASM
+#define fp2e_mul fp2e_mul_qhasm
+#else
+#define fp2e_mul fp2e_mul_c
+#endif
+// Multiply two fp2e, store result in rop:
+void fp2e_mul(fp2e_t rop, const fp2e_t op1, const fp2e_t op2);
+
+// Square an fp2e, store result in rop:
+#ifdef QHASM
+#define fp2e_square fp2e_square_qhasm
+#else
+#define fp2e_square fp2e_square_c
+#endif
+void fp2e_square(fp2e_t rop, const fp2e_t op);
+
+// Multiply by xi which is used to construct F_p^6
+
+#ifdef QHASM
+	#ifdef NEW_PARAMETERS
+		#define fp2e_mulxi fp2e_mulxi_qhasm_512
+	#else
+		#define fp2e_mulxi fp2e_mulxi_qhasm
+	#endif
+#else
+	#ifdef NEW_PARAMETERS
+		#define fp2e_mulxi fp2e_mulxi_c_512
+	#else
+		#define fp2e_mulxi fp2e_mulxi_c
+	#endif
+#endif
+	
+void fp2e_mulxi(fp2e_t rop, const fp2e_t op);
+
+// Multiple of an fp2e, store result in rop:
+#ifdef QHASM
+#define fp2e_mul_fpe fp2e_mul_fpe_qhasm
+#else
+#define fp2e_mul_fpe fp2e_mul_fpe_c
+#endif
+void fp2e_mul_fpe(fp2e_t rop, const fp2e_t op1, const fpe_t op2);
+
+#ifdef QHASM
+#define fp2e_parallel_coeffmul fp2e_parallel_coeffmul_qhasm
+#else
+#define fp2e_parallel_coeffmul fp2e_parallel_coeffmul_c
+#endif
+/* computes (op1->m_a*op2->m_a, op1->m_b*op2->m_b) */
+void fp2e_parallel_coeffmul(fp2e_t rop, const fp2e_t op1, const fp2e_t op2);
+
+// Inverse multiple of an fp2e, store result in rop:
+void fp2e_invert(fp2e_t rop, const fp2e_t op1);
+
+// Print the element to stdout:
+void fp2e_print(FILE * outfile, const fp2e_t op);
+
+
+#endif // ifndef FP2E_H

+ 314 - 0
dclxvi-20130329/fp2e_add.s

@@ -0,0 +1,314 @@
+# File:   dclxvi-20130329/fp2e_add.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fp2e_add_qhasm
+.text
+.p2align 5
+.globl _fp2e_add_qhasm
+.globl fp2e_add_qhasm
+_fp2e_add_qhasm:
+fp2e_add_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: int64 0rop
+
+# qhasm: int64 0op1
+
+# qhasm: int64 0op2
+
+# qhasm: input 0rop
+
+# qhasm: input 0op1
+
+# qhasm: input 0op2
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: int6464 0t0
+
+# qhasm: int6464 0t1
+
+# qhasm: int6464 0t2
+
+# qhasm: int6464 0t3
+
+# qhasm: 0r0  = *(int128 *)(0op1 +   0)
+# asm 1: movdqa 0(<0op1=int64#2),>0r0=int6464#1
+# asm 2: movdqa 0(<0op1=%rsi),>0r0=%xmm0
+movdqa 0(%rsi),%xmm0
+
+# qhasm: 0r1  = *(int128 *)(0op1 +  16)
+# asm 1: movdqa 16(<0op1=int64#2),>0r1=int6464#2
+# asm 2: movdqa 16(<0op1=%rsi),>0r1=%xmm1
+movdqa 16(%rsi),%xmm1
+
+# qhasm: 0r2  = *(int128 *)(0op1 +  32)
+# asm 1: movdqa 32(<0op1=int64#2),>0r2=int6464#3
+# asm 2: movdqa 32(<0op1=%rsi),>0r2=%xmm2
+movdqa 32(%rsi),%xmm2
+
+# qhasm: 0r3  = *(int128 *)(0op1 +  48)
+# asm 1: movdqa 48(<0op1=int64#2),>0r3=int6464#4
+# asm 2: movdqa 48(<0op1=%rsi),>0r3=%xmm3
+movdqa 48(%rsi),%xmm3
+
+# qhasm: 0r4  = *(int128 *)(0op1 +  64)
+# asm 1: movdqa 64(<0op1=int64#2),>0r4=int6464#5
+# asm 2: movdqa 64(<0op1=%rsi),>0r4=%xmm4
+movdqa 64(%rsi),%xmm4
+
+# qhasm: 0r5  = *(int128 *)(0op1 +  80)
+# asm 1: movdqa 80(<0op1=int64#2),>0r5=int6464#6
+# asm 2: movdqa 80(<0op1=%rsi),>0r5=%xmm5
+movdqa 80(%rsi),%xmm5
+
+# qhasm: 0r6  = *(int128 *)(0op1 +  96)
+# asm 1: movdqa 96(<0op1=int64#2),>0r6=int6464#7
+# asm 2: movdqa 96(<0op1=%rsi),>0r6=%xmm6
+movdqa 96(%rsi),%xmm6
+
+# qhasm: 0r7  = *(int128 *)(0op1 + 112)
+# asm 1: movdqa 112(<0op1=int64#2),>0r7=int6464#8
+# asm 2: movdqa 112(<0op1=%rsi),>0r7=%xmm7
+movdqa 112(%rsi),%xmm7
+
+# qhasm: 0r8  = *(int128 *)(0op1 + 128)
+# asm 1: movdqa 128(<0op1=int64#2),>0r8=int6464#9
+# asm 2: movdqa 128(<0op1=%rsi),>0r8=%xmm8
+movdqa 128(%rsi),%xmm8
+
+# qhasm: 0r9  = *(int128 *)(0op1 + 144)
+# asm 1: movdqa 144(<0op1=int64#2),>0r9=int6464#10
+# asm 2: movdqa 144(<0op1=%rsi),>0r9=%xmm9
+movdqa 144(%rsi),%xmm9
+
+# qhasm: 0r10 = *(int128 *)(0op1 + 160)
+# asm 1: movdqa 160(<0op1=int64#2),>0r10=int6464#11
+# asm 2: movdqa 160(<0op1=%rsi),>0r10=%xmm10
+movdqa 160(%rsi),%xmm10
+
+# qhasm: 0r11 = *(int128 *)(0op1 + 176)
+# asm 1: movdqa 176(<0op1=int64#2),>0r11=int6464#12
+# asm 2: movdqa 176(<0op1=%rsi),>0r11=%xmm11
+movdqa 176(%rsi),%xmm11
+
+# qhasm: int6464 1t0
+
+# qhasm: int6464 1t1
+
+# qhasm: int6464 1t2
+
+# qhasm: int6464 1t3
+
+# qhasm: 1t0 = *(int128 *)(0op2 + 0)
+# asm 1: movdqa 0(<0op2=int64#3),>1t0=int6464#13
+# asm 2: movdqa 0(<0op2=%rdx),>1t0=%xmm12
+movdqa 0(%rdx),%xmm12
+
+# qhasm: 1t1 = *(int128 *)(0op2 + 16)
+# asm 1: movdqa 16(<0op2=int64#3),>1t1=int6464#14
+# asm 2: movdqa 16(<0op2=%rdx),>1t1=%xmm13
+movdqa 16(%rdx),%xmm13
+
+# qhasm: 1t2 = *(int128 *)(0op2 + 32)
+# asm 1: movdqa 32(<0op2=int64#3),>1t2=int6464#15
+# asm 2: movdqa 32(<0op2=%rdx),>1t2=%xmm14
+movdqa 32(%rdx),%xmm14
+
+# qhasm: 1t3 = *(int128 *)(0op2 + 48)
+# asm 1: movdqa 48(<0op2=int64#3),>1t3=int6464#16
+# asm 2: movdqa 48(<0op2=%rdx),>1t3=%xmm15
+movdqa 48(%rdx),%xmm15
+
+# qhasm: float6464 0r0 += 1t0
+# asm 1: addpd <1t0=int6464#13,<0r0=int6464#1
+# asm 2: addpd <1t0=%xmm12,<0r0=%xmm0
+addpd %xmm12,%xmm0
+
+# qhasm: float6464 0r1 += 1t1
+# asm 1: addpd <1t1=int6464#14,<0r1=int6464#2
+# asm 2: addpd <1t1=%xmm13,<0r1=%xmm1
+addpd %xmm13,%xmm1
+
+# qhasm: float6464 0r2 += 1t2
+# asm 1: addpd <1t2=int6464#15,<0r2=int6464#3
+# asm 2: addpd <1t2=%xmm14,<0r2=%xmm2
+addpd %xmm14,%xmm2
+
+# qhasm: float6464 0r3 += 1t3
+# asm 1: addpd <1t3=int6464#16,<0r3=int6464#4
+# asm 2: addpd <1t3=%xmm15,<0r3=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: 1t0 = *(int128 *)(0op2 + 64)
+# asm 1: movdqa 64(<0op2=int64#3),>1t0=int6464#13
+# asm 2: movdqa 64(<0op2=%rdx),>1t0=%xmm12
+movdqa 64(%rdx),%xmm12
+
+# qhasm: 1t1 = *(int128 *)(0op2 + 80)
+# asm 1: movdqa 80(<0op2=int64#3),>1t1=int6464#14
+# asm 2: movdqa 80(<0op2=%rdx),>1t1=%xmm13
+movdqa 80(%rdx),%xmm13
+
+# qhasm: 1t2 = *(int128 *)(0op2 + 96)
+# asm 1: movdqa 96(<0op2=int64#3),>1t2=int6464#15
+# asm 2: movdqa 96(<0op2=%rdx),>1t2=%xmm14
+movdqa 96(%rdx),%xmm14
+
+# qhasm: 1t3 = *(int128 *)(0op2 + 112)
+# asm 1: movdqa 112(<0op2=int64#3),>1t3=int6464#16
+# asm 2: movdqa 112(<0op2=%rdx),>1t3=%xmm15
+movdqa 112(%rdx),%xmm15
+
+# qhasm: float6464 0r4 += 1t0
+# asm 1: addpd <1t0=int6464#13,<0r4=int6464#5
+# asm 2: addpd <1t0=%xmm12,<0r4=%xmm4
+addpd %xmm12,%xmm4
+
+# qhasm: float6464 0r5 += 1t1
+# asm 1: addpd <1t1=int6464#14,<0r5=int6464#6
+# asm 2: addpd <1t1=%xmm13,<0r5=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: float6464 0r6 += 1t2
+# asm 1: addpd <1t2=int6464#15,<0r6=int6464#7
+# asm 2: addpd <1t2=%xmm14,<0r6=%xmm6
+addpd %xmm14,%xmm6
+
+# qhasm: float6464 0r7 += 1t3
+# asm 1: addpd <1t3=int6464#16,<0r7=int6464#8
+# asm 2: addpd <1t3=%xmm15,<0r7=%xmm7
+addpd %xmm15,%xmm7
+
+# qhasm: 1t0 = *(int128 *)(0op2 + 128)
+# asm 1: movdqa 128(<0op2=int64#3),>1t0=int6464#13
+# asm 2: movdqa 128(<0op2=%rdx),>1t0=%xmm12
+movdqa 128(%rdx),%xmm12
+
+# qhasm: 1t1 = *(int128 *)(0op2 + 144)
+# asm 1: movdqa 144(<0op2=int64#3),>1t1=int6464#14
+# asm 2: movdqa 144(<0op2=%rdx),>1t1=%xmm13
+movdqa 144(%rdx),%xmm13
+
+# qhasm: 1t2 = *(int128 *)(0op2 + 160)
+# asm 1: movdqa 160(<0op2=int64#3),>1t2=int6464#15
+# asm 2: movdqa 160(<0op2=%rdx),>1t2=%xmm14
+movdqa 160(%rdx),%xmm14
+
+# qhasm: 1t3 = *(int128 *)(0op2 + 176)
+# asm 1: movdqa 176(<0op2=int64#3),>1t3=int6464#16
+# asm 2: movdqa 176(<0op2=%rdx),>1t3=%xmm15
+movdqa 176(%rdx),%xmm15
+
+# qhasm: float6464 0r8 += 1t0
+# asm 1: addpd <1t0=int6464#13,<0r8=int6464#9
+# asm 2: addpd <1t0=%xmm12,<0r8=%xmm8
+addpd %xmm12,%xmm8
+
+# qhasm: float6464 0r9 += 1t1
+# asm 1: addpd <1t1=int6464#14,<0r9=int6464#10
+# asm 2: addpd <1t1=%xmm13,<0r9=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: float6464 0r10 += 1t2
+# asm 1: addpd <1t2=int6464#15,<0r10=int6464#11
+# asm 2: addpd <1t2=%xmm14,<0r10=%xmm10
+addpd %xmm14,%xmm10
+
+# qhasm: float6464 0r11 += 1t3
+# asm 1: addpd <1t3=int6464#16,<0r11=int6464#12
+# asm 2: addpd <1t3=%xmm15,<0r11=%xmm11
+addpd %xmm15,%xmm11
+
+# qhasm: *(int128 *)(0rop +   0) =  0r0
+# asm 1: movdqa <0r0=int6464#1,0(<0rop=int64#1)
+# asm 2: movdqa <0r0=%xmm0,0(<0rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: *(int128 *)(0rop +  16) =  0r1
+# asm 1: movdqa <0r1=int6464#2,16(<0rop=int64#1)
+# asm 2: movdqa <0r1=%xmm1,16(<0rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(0rop +  32) =  0r2
+# asm 1: movdqa <0r2=int6464#3,32(<0rop=int64#1)
+# asm 2: movdqa <0r2=%xmm2,32(<0rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: *(int128 *)(0rop +  48) =  0r3
+# asm 1: movdqa <0r3=int6464#4,48(<0rop=int64#1)
+# asm 2: movdqa <0r3=%xmm3,48(<0rop=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm: *(int128 *)(0rop +  64) =  0r4
+# asm 1: movdqa <0r4=int6464#5,64(<0rop=int64#1)
+# asm 2: movdqa <0r4=%xmm4,64(<0rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(0rop +  80) =  0r5
+# asm 1: movdqa <0r5=int6464#6,80(<0rop=int64#1)
+# asm 2: movdqa <0r5=%xmm5,80(<0rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: *(int128 *)(0rop +  96) =  0r6
+# asm 1: movdqa <0r6=int6464#7,96(<0rop=int64#1)
+# asm 2: movdqa <0r6=%xmm6,96(<0rop=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm: *(int128 *)(0rop + 112) =  0r7
+# asm 1: movdqa <0r7=int6464#8,112(<0rop=int64#1)
+# asm 2: movdqa <0r7=%xmm7,112(<0rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(0rop + 128) =  0r8
+# asm 1: movdqa <0r8=int6464#9,128(<0rop=int64#1)
+# asm 2: movdqa <0r8=%xmm8,128(<0rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: *(int128 *)(0rop + 144) =  0r9
+# asm 1: movdqa <0r9=int6464#10,144(<0rop=int64#1)
+# asm 2: movdqa <0r9=%xmm9,144(<0rop=%rdi)
+movdqa %xmm9,144(%rdi)
+
+# qhasm: *(int128 *)(0rop + 160) = 0r10
+# asm 1: movdqa <0r10=int6464#11,160(<0rop=int64#1)
+# asm 2: movdqa <0r10=%xmm10,160(<0rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(0rop + 176) = 0r11
+# asm 1: movdqa <0r11=int6464#12,176(<0rop=int64#1)
+# asm 2: movdqa <0r11=%xmm11,176(<0rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 310 - 0
dclxvi-20130329/fp2e_add2.s

@@ -0,0 +1,310 @@
+# File:   dclxvi-20130329/fp2e_add2.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fp2e_add2_qhasm
+.text
+.p2align 5
+.globl _fp2e_add2_qhasm
+.globl fp2e_add2_qhasm
+_fp2e_add2_qhasm:
+fp2e_add2_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: int64 0rop
+
+# qhasm: int64 0op1
+
+# qhasm: input 0rop
+
+# qhasm: input 0op1
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: int6464 0t0
+
+# qhasm: int6464 0t1
+
+# qhasm: int6464 0t2
+
+# qhasm: int6464 0t3
+
+# qhasm: 0r0  = *(int128 *)(0rop +   0)
+# asm 1: movdqa 0(<0rop=int64#1),>0r0=int6464#1
+# asm 2: movdqa 0(<0rop=%rdi),>0r0=%xmm0
+movdqa 0(%rdi),%xmm0
+
+# qhasm: 0r1  = *(int128 *)(0rop +  16)
+# asm 1: movdqa 16(<0rop=int64#1),>0r1=int6464#2
+# asm 2: movdqa 16(<0rop=%rdi),>0r1=%xmm1
+movdqa 16(%rdi),%xmm1
+
+# qhasm: 0r2  = *(int128 *)(0rop +  32)
+# asm 1: movdqa 32(<0rop=int64#1),>0r2=int6464#3
+# asm 2: movdqa 32(<0rop=%rdi),>0r2=%xmm2
+movdqa 32(%rdi),%xmm2
+
+# qhasm: 0r3  = *(int128 *)(0rop +  48)
+# asm 1: movdqa 48(<0rop=int64#1),>0r3=int6464#4
+# asm 2: movdqa 48(<0rop=%rdi),>0r3=%xmm3
+movdqa 48(%rdi),%xmm3
+
+# qhasm: 0r4  = *(int128 *)(0rop +  64)
+# asm 1: movdqa 64(<0rop=int64#1),>0r4=int6464#5
+# asm 2: movdqa 64(<0rop=%rdi),>0r4=%xmm4
+movdqa 64(%rdi),%xmm4
+
+# qhasm: 0r5  = *(int128 *)(0rop +  80)
+# asm 1: movdqa 80(<0rop=int64#1),>0r5=int6464#6
+# asm 2: movdqa 80(<0rop=%rdi),>0r5=%xmm5
+movdqa 80(%rdi),%xmm5
+
+# qhasm: 0r6  = *(int128 *)(0rop +  96)
+# asm 1: movdqa 96(<0rop=int64#1),>0r6=int6464#7
+# asm 2: movdqa 96(<0rop=%rdi),>0r6=%xmm6
+movdqa 96(%rdi),%xmm6
+
+# qhasm: 0r7  = *(int128 *)(0rop + 112)
+# asm 1: movdqa 112(<0rop=int64#1),>0r7=int6464#8
+# asm 2: movdqa 112(<0rop=%rdi),>0r7=%xmm7
+movdqa 112(%rdi),%xmm7
+
+# qhasm: 0r8  = *(int128 *)(0rop + 128)
+# asm 1: movdqa 128(<0rop=int64#1),>0r8=int6464#9
+# asm 2: movdqa 128(<0rop=%rdi),>0r8=%xmm8
+movdqa 128(%rdi),%xmm8
+
+# qhasm: 0r9  = *(int128 *)(0rop + 144)
+# asm 1: movdqa 144(<0rop=int64#1),>0r9=int6464#10
+# asm 2: movdqa 144(<0rop=%rdi),>0r9=%xmm9
+movdqa 144(%rdi),%xmm9
+
+# qhasm: 0r10 = *(int128 *)(0rop + 160)
+# asm 1: movdqa 160(<0rop=int64#1),>0r10=int6464#11
+# asm 2: movdqa 160(<0rop=%rdi),>0r10=%xmm10
+movdqa 160(%rdi),%xmm10
+
+# qhasm: 0r11 = *(int128 *)(0rop + 176)
+# asm 1: movdqa 176(<0rop=int64#1),>0r11=int6464#12
+# asm 2: movdqa 176(<0rop=%rdi),>0r11=%xmm11
+movdqa 176(%rdi),%xmm11
+
+# qhasm: int6464 1t0
+
+# qhasm: int6464 1t1
+
+# qhasm: int6464 1t2
+
+# qhasm: int6464 1t3
+
+# qhasm: 1t0 = *(int128 *)(0op1 + 0)
+# asm 1: movdqa 0(<0op1=int64#2),>1t0=int6464#13
+# asm 2: movdqa 0(<0op1=%rsi),>1t0=%xmm12
+movdqa 0(%rsi),%xmm12
+
+# qhasm: 1t1 = *(int128 *)(0op1 + 16)
+# asm 1: movdqa 16(<0op1=int64#2),>1t1=int6464#14
+# asm 2: movdqa 16(<0op1=%rsi),>1t1=%xmm13
+movdqa 16(%rsi),%xmm13
+
+# qhasm: 1t2 = *(int128 *)(0op1 + 32)
+# asm 1: movdqa 32(<0op1=int64#2),>1t2=int6464#15
+# asm 2: movdqa 32(<0op1=%rsi),>1t2=%xmm14
+movdqa 32(%rsi),%xmm14
+
+# qhasm: 1t3 = *(int128 *)(0op1 + 48)
+# asm 1: movdqa 48(<0op1=int64#2),>1t3=int6464#16
+# asm 2: movdqa 48(<0op1=%rsi),>1t3=%xmm15
+movdqa 48(%rsi),%xmm15
+
+# qhasm: float6464 0r0 += 1t0
+# asm 1: addpd <1t0=int6464#13,<0r0=int6464#1
+# asm 2: addpd <1t0=%xmm12,<0r0=%xmm0
+addpd %xmm12,%xmm0
+
+# qhasm: float6464 0r1 += 1t1
+# asm 1: addpd <1t1=int6464#14,<0r1=int6464#2
+# asm 2: addpd <1t1=%xmm13,<0r1=%xmm1
+addpd %xmm13,%xmm1
+
+# qhasm: float6464 0r2 += 1t2
+# asm 1: addpd <1t2=int6464#15,<0r2=int6464#3
+# asm 2: addpd <1t2=%xmm14,<0r2=%xmm2
+addpd %xmm14,%xmm2
+
+# qhasm: float6464 0r3 += 1t3
+# asm 1: addpd <1t3=int6464#16,<0r3=int6464#4
+# asm 2: addpd <1t3=%xmm15,<0r3=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: 1t0 = *(int128 *)(0op1 + 64)
+# asm 1: movdqa 64(<0op1=int64#2),>1t0=int6464#13
+# asm 2: movdqa 64(<0op1=%rsi),>1t0=%xmm12
+movdqa 64(%rsi),%xmm12
+
+# qhasm: 1t1 = *(int128 *)(0op1 + 80)
+# asm 1: movdqa 80(<0op1=int64#2),>1t1=int6464#14
+# asm 2: movdqa 80(<0op1=%rsi),>1t1=%xmm13
+movdqa 80(%rsi),%xmm13
+
+# qhasm: 1t2 = *(int128 *)(0op1 + 96)
+# asm 1: movdqa 96(<0op1=int64#2),>1t2=int6464#15
+# asm 2: movdqa 96(<0op1=%rsi),>1t2=%xmm14
+movdqa 96(%rsi),%xmm14
+
+# qhasm: 1t3 = *(int128 *)(0op1 + 112)
+# asm 1: movdqa 112(<0op1=int64#2),>1t3=int6464#16
+# asm 2: movdqa 112(<0op1=%rsi),>1t3=%xmm15
+movdqa 112(%rsi),%xmm15
+
+# qhasm: float6464 0r4 += 1t0
+# asm 1: addpd <1t0=int6464#13,<0r4=int6464#5
+# asm 2: addpd <1t0=%xmm12,<0r4=%xmm4
+addpd %xmm12,%xmm4
+
+# qhasm: float6464 0r5 += 1t1
+# asm 1: addpd <1t1=int6464#14,<0r5=int6464#6
+# asm 2: addpd <1t1=%xmm13,<0r5=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: float6464 0r6 += 1t2
+# asm 1: addpd <1t2=int6464#15,<0r6=int6464#7
+# asm 2: addpd <1t2=%xmm14,<0r6=%xmm6
+addpd %xmm14,%xmm6
+
+# qhasm: float6464 0r7 += 1t3
+# asm 1: addpd <1t3=int6464#16,<0r7=int6464#8
+# asm 2: addpd <1t3=%xmm15,<0r7=%xmm7
+addpd %xmm15,%xmm7
+
+# qhasm: 1t0 = *(int128 *)(0op1 + 128)
+# asm 1: movdqa 128(<0op1=int64#2),>1t0=int6464#13
+# asm 2: movdqa 128(<0op1=%rsi),>1t0=%xmm12
+movdqa 128(%rsi),%xmm12
+
+# qhasm: 1t1 = *(int128 *)(0op1 + 144)
+# asm 1: movdqa 144(<0op1=int64#2),>1t1=int6464#14
+# asm 2: movdqa 144(<0op1=%rsi),>1t1=%xmm13
+movdqa 144(%rsi),%xmm13
+
+# qhasm: 1t2 = *(int128 *)(0op1 + 160)
+# asm 1: movdqa 160(<0op1=int64#2),>1t2=int6464#15
+# asm 2: movdqa 160(<0op1=%rsi),>1t2=%xmm14
+movdqa 160(%rsi),%xmm14
+
+# qhasm: 1t3 = *(int128 *)(0op1 + 176)
+# asm 1: movdqa 176(<0op1=int64#2),>1t3=int6464#16
+# asm 2: movdqa 176(<0op1=%rsi),>1t3=%xmm15
+movdqa 176(%rsi),%xmm15
+
+# qhasm: float6464 0r8 += 1t0
+# asm 1: addpd <1t0=int6464#13,<0r8=int6464#9
+# asm 2: addpd <1t0=%xmm12,<0r8=%xmm8
+addpd %xmm12,%xmm8
+
+# qhasm: float6464 0r9 += 1t1
+# asm 1: addpd <1t1=int6464#14,<0r9=int6464#10
+# asm 2: addpd <1t1=%xmm13,<0r9=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: float6464 0r10 += 1t2
+# asm 1: addpd <1t2=int6464#15,<0r10=int6464#11
+# asm 2: addpd <1t2=%xmm14,<0r10=%xmm10
+addpd %xmm14,%xmm10
+
+# qhasm: float6464 0r11 += 1t3
+# asm 1: addpd <1t3=int6464#16,<0r11=int6464#12
+# asm 2: addpd <1t3=%xmm15,<0r11=%xmm11
+addpd %xmm15,%xmm11
+
+# qhasm: *(int128 *)(0rop +   0) =  0r0
+# asm 1: movdqa <0r0=int6464#1,0(<0rop=int64#1)
+# asm 2: movdqa <0r0=%xmm0,0(<0rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: *(int128 *)(0rop +  16) =  0r1
+# asm 1: movdqa <0r1=int6464#2,16(<0rop=int64#1)
+# asm 2: movdqa <0r1=%xmm1,16(<0rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(0rop +  32) =  0r2
+# asm 1: movdqa <0r2=int6464#3,32(<0rop=int64#1)
+# asm 2: movdqa <0r2=%xmm2,32(<0rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: *(int128 *)(0rop +  48) =  0r3
+# asm 1: movdqa <0r3=int6464#4,48(<0rop=int64#1)
+# asm 2: movdqa <0r3=%xmm3,48(<0rop=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm: *(int128 *)(0rop +  64) =  0r4
+# asm 1: movdqa <0r4=int6464#5,64(<0rop=int64#1)
+# asm 2: movdqa <0r4=%xmm4,64(<0rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(0rop +  80) =  0r5
+# asm 1: movdqa <0r5=int6464#6,80(<0rop=int64#1)
+# asm 2: movdqa <0r5=%xmm5,80(<0rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: *(int128 *)(0rop +  96) =  0r6
+# asm 1: movdqa <0r6=int6464#7,96(<0rop=int64#1)
+# asm 2: movdqa <0r6=%xmm6,96(<0rop=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm: *(int128 *)(0rop + 112) =  0r7
+# asm 1: movdqa <0r7=int6464#8,112(<0rop=int64#1)
+# asm 2: movdqa <0r7=%xmm7,112(<0rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(0rop + 128) =  0r8
+# asm 1: movdqa <0r8=int6464#9,128(<0rop=int64#1)
+# asm 2: movdqa <0r8=%xmm8,128(<0rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: *(int128 *)(0rop + 144) =  0r9
+# asm 1: movdqa <0r9=int6464#10,144(<0rop=int64#1)
+# asm 2: movdqa <0r9=%xmm9,144(<0rop=%rdi)
+movdqa %xmm9,144(%rdi)
+
+# qhasm: *(int128 *)(0rop + 160) = 0r10
+# asm 1: movdqa <0r10=int6464#11,160(<0rop=int64#1)
+# asm 2: movdqa <0r10=%xmm10,160(<0rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(0rop + 176) = 0r11
+# asm 1: movdqa <0r11=int6464#12,176(<0rop=int64#1)
+# asm 2: movdqa <0r11=%xmm11,176(<0rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 249 - 0
dclxvi-20130329/fp2e_conjugate.s

@@ -0,0 +1,249 @@
+# File:   dclxvi-20130329/fp2e_conjugate.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fp2e_conjugate_qhasm
+.text
+.p2align 5
+.globl _fp2e_conjugate_qhasm
+.globl fp2e_conjugate_qhasm
+_fp2e_conjugate_qhasm:
+fp2e_conjugate_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: int64 0rop
+
+# qhasm: int64 0op
+
+# qhasm: input 0rop
+
+# qhasm: input 0op
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: int6464 0t0
+
+# qhasm: int6464 0t1
+
+# qhasm: int6464 0t2
+
+# qhasm: int6464 0t3
+
+# qhasm: 0r0  = *(int128 *)(0op +   0)
+# asm 1: movdqa 0(<0op=int64#2),>0r0=int6464#1
+# asm 2: movdqa 0(<0op=%rsi),>0r0=%xmm0
+movdqa 0(%rsi),%xmm0
+
+# qhasm: 0r1  = *(int128 *)(0op +  16)
+# asm 1: movdqa 16(<0op=int64#2),>0r1=int6464#2
+# asm 2: movdqa 16(<0op=%rsi),>0r1=%xmm1
+movdqa 16(%rsi),%xmm1
+
+# qhasm: 0r2  = *(int128 *)(0op +  32)
+# asm 1: movdqa 32(<0op=int64#2),>0r2=int6464#3
+# asm 2: movdqa 32(<0op=%rsi),>0r2=%xmm2
+movdqa 32(%rsi),%xmm2
+
+# qhasm: 0r3  = *(int128 *)(0op +  48)
+# asm 1: movdqa 48(<0op=int64#2),>0r3=int6464#4
+# asm 2: movdqa 48(<0op=%rsi),>0r3=%xmm3
+movdqa 48(%rsi),%xmm3
+
+# qhasm: 0r4  = *(int128 *)(0op +  64)
+# asm 1: movdqa 64(<0op=int64#2),>0r4=int6464#5
+# asm 2: movdqa 64(<0op=%rsi),>0r4=%xmm4
+movdqa 64(%rsi),%xmm4
+
+# qhasm: 0r5  = *(int128 *)(0op +  80)
+# asm 1: movdqa 80(<0op=int64#2),>0r5=int6464#6
+# asm 2: movdqa 80(<0op=%rsi),>0r5=%xmm5
+movdqa 80(%rsi),%xmm5
+
+# qhasm: 0r6  = *(int128 *)(0op +  96)
+# asm 1: movdqa 96(<0op=int64#2),>0r6=int6464#7
+# asm 2: movdqa 96(<0op=%rsi),>0r6=%xmm6
+movdqa 96(%rsi),%xmm6
+
+# qhasm: 0r7  = *(int128 *)(0op + 112)
+# asm 1: movdqa 112(<0op=int64#2),>0r7=int6464#8
+# asm 2: movdqa 112(<0op=%rsi),>0r7=%xmm7
+movdqa 112(%rsi),%xmm7
+
+# qhasm: 0r8  = *(int128 *)(0op + 128)
+# asm 1: movdqa 128(<0op=int64#2),>0r8=int6464#9
+# asm 2: movdqa 128(<0op=%rsi),>0r8=%xmm8
+movdqa 128(%rsi),%xmm8
+
+# qhasm: 0r9  = *(int128 *)(0op + 144)
+# asm 1: movdqa 144(<0op=int64#2),>0r9=int6464#10
+# asm 2: movdqa 144(<0op=%rsi),>0r9=%xmm9
+movdqa 144(%rsi),%xmm9
+
+# qhasm: 0r10 = *(int128 *)(0op + 160)
+# asm 1: movdqa 160(<0op=int64#2),>0r10=int6464#11
+# asm 2: movdqa 160(<0op=%rsi),>0r10=%xmm10
+movdqa 160(%rsi),%xmm10
+
+# qhasm: 0r11 = *(int128 *)(0op + 176)
+# asm 1: movdqa 176(<0op=int64#2),>0r11=int6464#12
+# asm 2: movdqa 176(<0op=%rsi),>0r11=%xmm11
+movdqa 176(%rsi),%xmm11
+
+# qhasm: int6464 1t0
+
+# qhasm: 1t0 = ONE_MINUSONE
+# asm 1: movdqa ONE_MINUSONE,<1t0=int6464#13
+# asm 2: movdqa ONE_MINUSONE,<1t0=%xmm12
+movdqa ONE_MINUSONE,%xmm12
+
+# qhasm: float6464 0r0  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r0=int6464#1
+# asm 2: mulpd <1t0=%xmm12,<0r0=%xmm0
+mulpd %xmm12,%xmm0
+
+# qhasm: float6464 0r1  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r1=int6464#2
+# asm 2: mulpd <1t0=%xmm12,<0r1=%xmm1
+mulpd %xmm12,%xmm1
+
+# qhasm: float6464 0r2  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r2=int6464#3
+# asm 2: mulpd <1t0=%xmm12,<0r2=%xmm2
+mulpd %xmm12,%xmm2
+
+# qhasm: float6464 0r3  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r3=int6464#4
+# asm 2: mulpd <1t0=%xmm12,<0r3=%xmm3
+mulpd %xmm12,%xmm3
+
+# qhasm: float6464 0r4  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r4=int6464#5
+# asm 2: mulpd <1t0=%xmm12,<0r4=%xmm4
+mulpd %xmm12,%xmm4
+
+# qhasm: float6464 0r5  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r5=int6464#6
+# asm 2: mulpd <1t0=%xmm12,<0r5=%xmm5
+mulpd %xmm12,%xmm5
+
+# qhasm: float6464 0r6  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r6=int6464#7
+# asm 2: mulpd <1t0=%xmm12,<0r6=%xmm6
+mulpd %xmm12,%xmm6
+
+# qhasm: float6464 0r7  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r7=int6464#8
+# asm 2: mulpd <1t0=%xmm12,<0r7=%xmm7
+mulpd %xmm12,%xmm7
+
+# qhasm: float6464 0r8  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r8=int6464#9
+# asm 2: mulpd <1t0=%xmm12,<0r8=%xmm8
+mulpd %xmm12,%xmm8
+
+# qhasm: float6464 0r9  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r9=int6464#10
+# asm 2: mulpd <1t0=%xmm12,<0r9=%xmm9
+mulpd %xmm12,%xmm9
+
+# qhasm: float6464 0r10 *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r10=int6464#11
+# asm 2: mulpd <1t0=%xmm12,<0r10=%xmm10
+mulpd %xmm12,%xmm10
+
+# qhasm: float6464 0r11 *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r11=int6464#12
+# asm 2: mulpd <1t0=%xmm12,<0r11=%xmm11
+mulpd %xmm12,%xmm11
+
+# qhasm: *(int128 *)(0rop +   0) =  0r0
+# asm 1: movdqa <0r0=int6464#1,0(<0rop=int64#1)
+# asm 2: movdqa <0r0=%xmm0,0(<0rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: *(int128 *)(0rop +  16) =  0r1
+# asm 1: movdqa <0r1=int6464#2,16(<0rop=int64#1)
+# asm 2: movdqa <0r1=%xmm1,16(<0rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(0rop +  32) =  0r2
+# asm 1: movdqa <0r2=int6464#3,32(<0rop=int64#1)
+# asm 2: movdqa <0r2=%xmm2,32(<0rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: *(int128 *)(0rop +  48) =  0r3
+# asm 1: movdqa <0r3=int6464#4,48(<0rop=int64#1)
+# asm 2: movdqa <0r3=%xmm3,48(<0rop=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm: *(int128 *)(0rop +  64) =  0r4
+# asm 1: movdqa <0r4=int6464#5,64(<0rop=int64#1)
+# asm 2: movdqa <0r4=%xmm4,64(<0rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(0rop +  80) =  0r5
+# asm 1: movdqa <0r5=int6464#6,80(<0rop=int64#1)
+# asm 2: movdqa <0r5=%xmm5,80(<0rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: *(int128 *)(0rop +  96) =  0r6
+# asm 1: movdqa <0r6=int6464#7,96(<0rop=int64#1)
+# asm 2: movdqa <0r6=%xmm6,96(<0rop=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm: *(int128 *)(0rop + 112) =  0r7
+# asm 1: movdqa <0r7=int6464#8,112(<0rop=int64#1)
+# asm 2: movdqa <0r7=%xmm7,112(<0rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(0rop + 128) =  0r8
+# asm 1: movdqa <0r8=int6464#9,128(<0rop=int64#1)
+# asm 2: movdqa <0r8=%xmm8,128(<0rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: *(int128 *)(0rop + 144) =  0r9
+# asm 1: movdqa <0r9=int6464#10,144(<0rop=int64#1)
+# asm 2: movdqa <0r9=%xmm9,144(<0rop=%rdi)
+movdqa %xmm9,144(%rdi)
+
+# qhasm: *(int128 *)(0rop + 160) = 0r10
+# asm 1: movdqa <0r10=int6464#11,160(<0rop=int64#1)
+# asm 2: movdqa <0r10=%xmm10,160(<0rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(0rop + 176) = 0r11
+# asm 1: movdqa <0r11=int6464#12,176(<0rop=int64#1)
+# asm 2: movdqa <0r11=%xmm11,176(<0rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 249 - 0
dclxvi-20130329/fp2e_double.s

@@ -0,0 +1,249 @@
+# File:   dclxvi-20130329/fp2e_double.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fp2e_double_qhasm
+.text
+.p2align 5
+.globl _fp2e_double_qhasm
+.globl fp2e_double_qhasm
+_fp2e_double_qhasm:
+fp2e_double_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: int64 0rop
+
+# qhasm: int64 0op
+
+# qhasm: input 0rop
+
+# qhasm: input 0op
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: int6464 0t0
+
+# qhasm: int6464 0t1
+
+# qhasm: int6464 0t2
+
+# qhasm: int6464 0t3
+
+# qhasm: 0r0  = *(int128 *)(0op +   0)
+# asm 1: movdqa 0(<0op=int64#2),>0r0=int6464#1
+# asm 2: movdqa 0(<0op=%rsi),>0r0=%xmm0
+movdqa 0(%rsi),%xmm0
+
+# qhasm: 0r1  = *(int128 *)(0op +  16)
+# asm 1: movdqa 16(<0op=int64#2),>0r1=int6464#2
+# asm 2: movdqa 16(<0op=%rsi),>0r1=%xmm1
+movdqa 16(%rsi),%xmm1
+
+# qhasm: 0r2  = *(int128 *)(0op +  32)
+# asm 1: movdqa 32(<0op=int64#2),>0r2=int6464#3
+# asm 2: movdqa 32(<0op=%rsi),>0r2=%xmm2
+movdqa 32(%rsi),%xmm2
+
+# qhasm: 0r3  = *(int128 *)(0op +  48)
+# asm 1: movdqa 48(<0op=int64#2),>0r3=int6464#4
+# asm 2: movdqa 48(<0op=%rsi),>0r3=%xmm3
+movdqa 48(%rsi),%xmm3
+
+# qhasm: 0r4  = *(int128 *)(0op +  64)
+# asm 1: movdqa 64(<0op=int64#2),>0r4=int6464#5
+# asm 2: movdqa 64(<0op=%rsi),>0r4=%xmm4
+movdqa 64(%rsi),%xmm4
+
+# qhasm: 0r5  = *(int128 *)(0op +  80)
+# asm 1: movdqa 80(<0op=int64#2),>0r5=int6464#6
+# asm 2: movdqa 80(<0op=%rsi),>0r5=%xmm5
+movdqa 80(%rsi),%xmm5
+
+# qhasm: 0r6  = *(int128 *)(0op +  96)
+# asm 1: movdqa 96(<0op=int64#2),>0r6=int6464#7
+# asm 2: movdqa 96(<0op=%rsi),>0r6=%xmm6
+movdqa 96(%rsi),%xmm6
+
+# qhasm: 0r7  = *(int128 *)(0op + 112)
+# asm 1: movdqa 112(<0op=int64#2),>0r7=int6464#8
+# asm 2: movdqa 112(<0op=%rsi),>0r7=%xmm7
+movdqa 112(%rsi),%xmm7
+
+# qhasm: 0r8  = *(int128 *)(0op + 128)
+# asm 1: movdqa 128(<0op=int64#2),>0r8=int6464#9
+# asm 2: movdqa 128(<0op=%rsi),>0r8=%xmm8
+movdqa 128(%rsi),%xmm8
+
+# qhasm: 0r9  = *(int128 *)(0op + 144)
+# asm 1: movdqa 144(<0op=int64#2),>0r9=int6464#10
+# asm 2: movdqa 144(<0op=%rsi),>0r9=%xmm9
+movdqa 144(%rsi),%xmm9
+
+# qhasm: 0r10 = *(int128 *)(0op + 160)
+# asm 1: movdqa 160(<0op=int64#2),>0r10=int6464#11
+# asm 2: movdqa 160(<0op=%rsi),>0r10=%xmm10
+movdqa 160(%rsi),%xmm10
+
+# qhasm: 0r11 = *(int128 *)(0op + 176)
+# asm 1: movdqa 176(<0op=int64#2),>0r11=int6464#12
+# asm 2: movdqa 176(<0op=%rsi),>0r11=%xmm11
+movdqa 176(%rsi),%xmm11
+
+# qhasm: int6464 1t0
+
+# qhasm: 1t0 = TWO_TWO
+# asm 1: movdqa TWO_TWO,<1t0=int6464#13
+# asm 2: movdqa TWO_TWO,<1t0=%xmm12
+movdqa TWO_TWO,%xmm12
+
+# qhasm: float6464 0r0  += 0r0
+# asm 1: addpd <0r0=int6464#1,<0r0=int6464#1
+# asm 2: addpd <0r0=%xmm0,<0r0=%xmm0
+addpd %xmm0,%xmm0
+
+# qhasm: float6464 0r1  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r1=int6464#2
+# asm 2: mulpd <1t0=%xmm12,<0r1=%xmm1
+mulpd %xmm12,%xmm1
+
+# qhasm: float6464 0r2  += 0r2
+# asm 1: addpd <0r2=int6464#3,<0r2=int6464#3
+# asm 2: addpd <0r2=%xmm2,<0r2=%xmm2
+addpd %xmm2,%xmm2
+
+# qhasm: float6464 0r3  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r3=int6464#4
+# asm 2: mulpd <1t0=%xmm12,<0r3=%xmm3
+mulpd %xmm12,%xmm3
+
+# qhasm: float6464 0r4  += 0r4
+# asm 1: addpd <0r4=int6464#5,<0r4=int6464#5
+# asm 2: addpd <0r4=%xmm4,<0r4=%xmm4
+addpd %xmm4,%xmm4
+
+# qhasm: float6464 0r5  *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r5=int6464#6
+# asm 2: mulpd <1t0=%xmm12,<0r5=%xmm5
+mulpd %xmm12,%xmm5
+
+# qhasm: float6464 0r6  += 0r6
+# asm 1: addpd <0r6=int6464#7,<0r6=int6464#7
+# asm 2: addpd <0r6=%xmm6,<0r6=%xmm6
+addpd %xmm6,%xmm6
+
+# qhasm: float6464 0r7  *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r7=int6464#8
+# asm 2: mulpd <1t0=%xmm12,<0r7=%xmm7
+mulpd %xmm12,%xmm7
+
+# qhasm: float6464 0r8  += 0r8
+# asm 1: addpd <0r8=int6464#9,<0r8=int6464#9
+# asm 2: addpd <0r8=%xmm8,<0r8=%xmm8
+addpd %xmm8,%xmm8
+
+# qhasm: float6464 0r9  *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r9=int6464#10
+# asm 2: mulpd <1t0=%xmm12,<0r9=%xmm9
+mulpd %xmm12,%xmm9
+
+# qhasm: float6464 0r10 += 0r10
+# asm 1: addpd <0r10=int6464#11,<0r10=int6464#11
+# asm 2: addpd <0r10=%xmm10,<0r10=%xmm10
+addpd %xmm10,%xmm10
+
+# qhasm: float6464 0r11 *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r11=int6464#12
+# asm 2: mulpd <1t0=%xmm12,<0r11=%xmm11
+mulpd %xmm12,%xmm11
+
+# qhasm: *(int128 *)(0rop +   0) =  0r0
+# asm 1: movdqa <0r0=int6464#1,0(<0rop=int64#1)
+# asm 2: movdqa <0r0=%xmm0,0(<0rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: *(int128 *)(0rop +  16) =  0r1
+# asm 1: movdqa <0r1=int6464#2,16(<0rop=int64#1)
+# asm 2: movdqa <0r1=%xmm1,16(<0rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(0rop +  32) =  0r2
+# asm 1: movdqa <0r2=int6464#3,32(<0rop=int64#1)
+# asm 2: movdqa <0r2=%xmm2,32(<0rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: *(int128 *)(0rop +  48) =  0r3
+# asm 1: movdqa <0r3=int6464#4,48(<0rop=int64#1)
+# asm 2: movdqa <0r3=%xmm3,48(<0rop=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm: *(int128 *)(0rop +  64) =  0r4
+# asm 1: movdqa <0r4=int6464#5,64(<0rop=int64#1)
+# asm 2: movdqa <0r4=%xmm4,64(<0rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(0rop +  80) =  0r5
+# asm 1: movdqa <0r5=int6464#6,80(<0rop=int64#1)
+# asm 2: movdqa <0r5=%xmm5,80(<0rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: *(int128 *)(0rop +  96) =  0r6
+# asm 1: movdqa <0r6=int6464#7,96(<0rop=int64#1)
+# asm 2: movdqa <0r6=%xmm6,96(<0rop=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm: *(int128 *)(0rop + 112) =  0r7
+# asm 1: movdqa <0r7=int6464#8,112(<0rop=int64#1)
+# asm 2: movdqa <0r7=%xmm7,112(<0rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(0rop + 128) =  0r8
+# asm 1: movdqa <0r8=int6464#9,128(<0rop=int64#1)
+# asm 2: movdqa <0r8=%xmm8,128(<0rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: *(int128 *)(0rop + 144) =  0r9
+# asm 1: movdqa <0r9=int6464#10,144(<0rop=int64#1)
+# asm 2: movdqa <0r9=%xmm9,144(<0rop=%rdi)
+movdqa %xmm9,144(%rdi)
+
+# qhasm: *(int128 *)(0rop + 160) = 0r10
+# asm 1: movdqa <0r10=int6464#11,160(<0rop=int64#1)
+# asm 2: movdqa <0r10=%xmm10,160(<0rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(0rop + 176) = 0r11
+# asm 1: movdqa <0r11=int6464#12,176(<0rop=int64#1)
+# asm 2: movdqa <0r11=%xmm11,176(<0rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 245 - 0
dclxvi-20130329/fp2e_double2.s

@@ -0,0 +1,245 @@
+# File:   dclxvi-20130329/fp2e_double2.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fp2e_double2_qhasm
+.text
+.p2align 5
+.globl _fp2e_double2_qhasm
+.globl fp2e_double2_qhasm
+_fp2e_double2_qhasm:
+fp2e_double2_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: int64 0rop
+
+# qhasm: input 0rop
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: int6464 0t0
+
+# qhasm: int6464 0t1
+
+# qhasm: int6464 0t2
+
+# qhasm: int6464 0t3
+
+# qhasm: 0r0  = *(int128 *)(0rop +   0)
+# asm 1: movdqa 0(<0rop=int64#1),>0r0=int6464#1
+# asm 2: movdqa 0(<0rop=%rdi),>0r0=%xmm0
+movdqa 0(%rdi),%xmm0
+
+# qhasm: 0r1  = *(int128 *)(0rop +  16)
+# asm 1: movdqa 16(<0rop=int64#1),>0r1=int6464#2
+# asm 2: movdqa 16(<0rop=%rdi),>0r1=%xmm1
+movdqa 16(%rdi),%xmm1
+
+# qhasm: 0r2  = *(int128 *)(0rop +  32)
+# asm 1: movdqa 32(<0rop=int64#1),>0r2=int6464#3
+# asm 2: movdqa 32(<0rop=%rdi),>0r2=%xmm2
+movdqa 32(%rdi),%xmm2
+
+# qhasm: 0r3  = *(int128 *)(0rop +  48)
+# asm 1: movdqa 48(<0rop=int64#1),>0r3=int6464#4
+# asm 2: movdqa 48(<0rop=%rdi),>0r3=%xmm3
+movdqa 48(%rdi),%xmm3
+
+# qhasm: 0r4  = *(int128 *)(0rop +  64)
+# asm 1: movdqa 64(<0rop=int64#1),>0r4=int6464#5
+# asm 2: movdqa 64(<0rop=%rdi),>0r4=%xmm4
+movdqa 64(%rdi),%xmm4
+
+# qhasm: 0r5  = *(int128 *)(0rop +  80)
+# asm 1: movdqa 80(<0rop=int64#1),>0r5=int6464#6
+# asm 2: movdqa 80(<0rop=%rdi),>0r5=%xmm5
+movdqa 80(%rdi),%xmm5
+
+# qhasm: 0r6  = *(int128 *)(0rop +  96)
+# asm 1: movdqa 96(<0rop=int64#1),>0r6=int6464#7
+# asm 2: movdqa 96(<0rop=%rdi),>0r6=%xmm6
+movdqa 96(%rdi),%xmm6
+
+# qhasm: 0r7  = *(int128 *)(0rop + 112)
+# asm 1: movdqa 112(<0rop=int64#1),>0r7=int6464#8
+# asm 2: movdqa 112(<0rop=%rdi),>0r7=%xmm7
+movdqa 112(%rdi),%xmm7
+
+# qhasm: 0r8  = *(int128 *)(0rop + 128)
+# asm 1: movdqa 128(<0rop=int64#1),>0r8=int6464#9
+# asm 2: movdqa 128(<0rop=%rdi),>0r8=%xmm8
+movdqa 128(%rdi),%xmm8
+
+# qhasm: 0r9  = *(int128 *)(0rop + 144)
+# asm 1: movdqa 144(<0rop=int64#1),>0r9=int6464#10
+# asm 2: movdqa 144(<0rop=%rdi),>0r9=%xmm9
+movdqa 144(%rdi),%xmm9
+
+# qhasm: 0r10 = *(int128 *)(0rop + 160)
+# asm 1: movdqa 160(<0rop=int64#1),>0r10=int6464#11
+# asm 2: movdqa 160(<0rop=%rdi),>0r10=%xmm10
+movdqa 160(%rdi),%xmm10
+
+# qhasm: 0r11 = *(int128 *)(0rop + 176)
+# asm 1: movdqa 176(<0rop=int64#1),>0r11=int6464#12
+# asm 2: movdqa 176(<0rop=%rdi),>0r11=%xmm11
+movdqa 176(%rdi),%xmm11
+
+# qhasm: int6464 1t0
+
+# qhasm: 1t0 = TWO_TWO
+# asm 1: movdqa TWO_TWO,<1t0=int6464#13
+# asm 2: movdqa TWO_TWO,<1t0=%xmm12
+movdqa TWO_TWO,%xmm12
+
+# qhasm: float6464 0r0  += 0r0
+# asm 1: addpd <0r0=int6464#1,<0r0=int6464#1
+# asm 2: addpd <0r0=%xmm0,<0r0=%xmm0
+addpd %xmm0,%xmm0
+
+# qhasm: float6464 0r1  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r1=int6464#2
+# asm 2: mulpd <1t0=%xmm12,<0r1=%xmm1
+mulpd %xmm12,%xmm1
+
+# qhasm: float6464 0r2  += 0r2
+# asm 1: addpd <0r2=int6464#3,<0r2=int6464#3
+# asm 2: addpd <0r2=%xmm2,<0r2=%xmm2
+addpd %xmm2,%xmm2
+
+# qhasm: float6464 0r3  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r3=int6464#4
+# asm 2: mulpd <1t0=%xmm12,<0r3=%xmm3
+mulpd %xmm12,%xmm3
+
+# qhasm: float6464 0r4  += 0r4
+# asm 1: addpd <0r4=int6464#5,<0r4=int6464#5
+# asm 2: addpd <0r4=%xmm4,<0r4=%xmm4
+addpd %xmm4,%xmm4
+
+# qhasm: float6464 0r5  *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r5=int6464#6
+# asm 2: mulpd <1t0=%xmm12,<0r5=%xmm5
+mulpd %xmm12,%xmm5
+
+# qhasm: float6464 0r6  += 0r6
+# asm 1: addpd <0r6=int6464#7,<0r6=int6464#7
+# asm 2: addpd <0r6=%xmm6,<0r6=%xmm6
+addpd %xmm6,%xmm6
+
+# qhasm: float6464 0r7  *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r7=int6464#8
+# asm 2: mulpd <1t0=%xmm12,<0r7=%xmm7
+mulpd %xmm12,%xmm7
+
+# qhasm: float6464 0r8  += 0r8
+# asm 1: addpd <0r8=int6464#9,<0r8=int6464#9
+# asm 2: addpd <0r8=%xmm8,<0r8=%xmm8
+addpd %xmm8,%xmm8
+
+# qhasm: float6464 0r9  *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r9=int6464#10
+# asm 2: mulpd <1t0=%xmm12,<0r9=%xmm9
+mulpd %xmm12,%xmm9
+
+# qhasm: float6464 0r10 += 0r10
+# asm 1: addpd <0r10=int6464#11,<0r10=int6464#11
+# asm 2: addpd <0r10=%xmm10,<0r10=%xmm10
+addpd %xmm10,%xmm10
+
+# qhasm: float6464 0r11 *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r11=int6464#12
+# asm 2: mulpd <1t0=%xmm12,<0r11=%xmm11
+mulpd %xmm12,%xmm11
+
+# qhasm: *(int128 *)(0rop +   0) =  0r0
+# asm 1: movdqa <0r0=int6464#1,0(<0rop=int64#1)
+# asm 2: movdqa <0r0=%xmm0,0(<0rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: *(int128 *)(0rop +  16) =  0r1
+# asm 1: movdqa <0r1=int6464#2,16(<0rop=int64#1)
+# asm 2: movdqa <0r1=%xmm1,16(<0rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(0rop +  32) =  0r2
+# asm 1: movdqa <0r2=int6464#3,32(<0rop=int64#1)
+# asm 2: movdqa <0r2=%xmm2,32(<0rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: *(int128 *)(0rop +  48) =  0r3
+# asm 1: movdqa <0r3=int6464#4,48(<0rop=int64#1)
+# asm 2: movdqa <0r3=%xmm3,48(<0rop=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm: *(int128 *)(0rop +  64) =  0r4
+# asm 1: movdqa <0r4=int6464#5,64(<0rop=int64#1)
+# asm 2: movdqa <0r4=%xmm4,64(<0rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(0rop +  80) =  0r5
+# asm 1: movdqa <0r5=int6464#6,80(<0rop=int64#1)
+# asm 2: movdqa <0r5=%xmm5,80(<0rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: *(int128 *)(0rop +  96) =  0r6
+# asm 1: movdqa <0r6=int6464#7,96(<0rop=int64#1)
+# asm 2: movdqa <0r6=%xmm6,96(<0rop=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm: *(int128 *)(0rop + 112) =  0r7
+# asm 1: movdqa <0r7=int6464#8,112(<0rop=int64#1)
+# asm 2: movdqa <0r7=%xmm7,112(<0rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(0rop + 128) =  0r8
+# asm 1: movdqa <0r8=int6464#9,128(<0rop=int64#1)
+# asm 2: movdqa <0r8=%xmm8,128(<0rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: *(int128 *)(0rop + 144) =  0r9
+# asm 1: movdqa <0r9=int6464#10,144(<0rop=int64#1)
+# asm 2: movdqa <0r9=%xmm9,144(<0rop=%rdi)
+movdqa %xmm9,144(%rdi)
+
+# qhasm: *(int128 *)(0rop + 160) = 0r10
+# asm 1: movdqa <0r10=int6464#11,160(<0rop=int64#1)
+# asm 2: movdqa <0r10=%xmm10,160(<0rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(0rop + 176) = 0r11
+# asm 1: movdqa <0r11=int6464#12,176(<0rop=int64#1)
+# asm 2: movdqa <0r11=%xmm11,176(<0rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 6792 - 0
dclxvi-20130329/fp2e_mul.s

@@ -0,0 +1,6792 @@
+# File:   dclxvi-20130329/fp2e_mul.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: int64 rop
+
+# qhasm: int64 op1
+
+# qhasm: int64 b2a2p
+
+# qhasm: input rop
+
+# qhasm: input op1
+
+# qhasm: input b2a2p
+
+# qhasm: stack7680 mystack
+
+# qhasm: int64 c1
+
+# qhasm: int64 c2
+
+# qhasm: int64 c3
+
+# qhasm: int64 c4
+
+# qhasm: int64 c5
+
+# qhasm: int64 c6
+
+# qhasm: int64 c7
+
+# qhasm: caller c1
+
+# qhasm: caller c2
+
+# qhasm: caller c3
+
+# qhasm: caller c4
+
+# qhasm: caller c5
+
+# qhasm: caller c6
+
+# qhasm: caller c7
+
+# qhasm: int6464 r0
+
+# qhasm: int6464 r1
+
+# qhasm: int6464 r2
+
+# qhasm: int6464 r3
+
+# qhasm: int6464 r4
+
+# qhasm: int6464 r5
+
+# qhasm: int6464 r6
+
+# qhasm: int6464 r7
+
+# qhasm: int6464 r8
+
+# qhasm: int6464 r9
+
+# qhasm: int6464 r10
+
+# qhasm: int6464 r11
+
+# qhasm: int6464 r12
+
+# qhasm: int6464 r13
+
+# qhasm: int6464 r14
+
+# qhasm: int6464 r15
+
+# qhasm: int6464 r16
+
+# qhasm: int6464 r17
+
+# qhasm: int6464 r18
+
+# qhasm: int6464 r19
+
+# qhasm: int6464 r20
+
+# qhasm: int6464 r21
+
+# qhasm: int6464 r22
+
+# qhasm: int6464 t0
+
+# qhasm: int6464 t1
+
+# qhasm: int6464 t2
+
+# qhasm: int6464 t3
+
+# qhasm: int6464 t4
+
+# qhasm: int6464 t5
+
+# qhasm: int6464 t6
+
+# qhasm: int6464 t7
+
+# qhasm: int6464 t8
+
+# qhasm: int6464 t9
+
+# qhasm: int6464 t10
+
+# qhasm: int6464 t11
+
+# qhasm: int6464 t12
+
+# qhasm: int6464 t13
+
+# qhasm: int6464 t14
+
+# qhasm: int6464 t15
+
+# qhasm: int6464 t16
+
+# qhasm: int6464 t17
+
+# qhasm: int6464 t18
+
+# qhasm: int6464 t19
+
+# qhasm: int6464 t20
+
+# qhasm: int6464 t21
+
+# qhasm: int6464 t22
+
+# qhasm: int6464 d0
+
+# qhasm: int6464 d1
+
+# qhasm: int6464 d2
+
+# qhasm: int6464 d3
+
+# qhasm: int6464 d4
+
+# qhasm: int6464 d5
+
+# qhasm: int6464 d6
+
+# qhasm: int6464 d7
+
+# qhasm: int6464 d8
+
+# qhasm: int6464 d9
+
+# qhasm: int6464 d10
+
+# qhasm: int6464 d11
+
+# qhasm: int6464 d12
+
+# qhasm: int6464 d13
+
+# qhasm: int6464 d14
+
+# qhasm: int6464 d15
+
+# qhasm: int6464 d16
+
+# qhasm: int6464 d17
+
+# qhasm: int6464 d18
+
+# qhasm: int6464 d19
+
+# qhasm: int6464 d20
+
+# qhasm: int6464 d21
+
+# qhasm: int6464 d22
+
+# qhasm: int6464 ab0
+
+# qhasm: int6464 ab1
+
+# qhasm: int6464 ab2
+
+# qhasm: int6464 ab3
+
+# qhasm: int6464 ab4
+
+# qhasm: int6464 ab5
+
+# qhasm: int6464 ab6
+
+# qhasm: int6464 ab7
+
+# qhasm: int6464 ab8
+
+# qhasm: int6464 ab9
+
+# qhasm: int6464 ab10
+
+# qhasm: int6464 ab11
+
+# qhasm: int6464 ab0six
+
+# qhasm: int6464 ab1six
+
+# qhasm: int6464 ab2six
+
+# qhasm: int6464 ab3six
+
+# qhasm: int6464 ab4six
+
+# qhasm: int6464 ab5six
+
+# qhasm: int6464 ab6six
+
+# qhasm: int6464 ab7six
+
+# qhasm: int6464 ab8six
+
+# qhasm: int6464 ab9six
+
+# qhasm: int6464 ab10six
+
+# qhasm: int6464 ab11six
+
+# qhasm: int6464 cd0
+
+# qhasm: int6464 cd1
+
+# qhasm: int6464 cd2
+
+# qhasm: int6464 cd3
+
+# qhasm: int6464 cd4
+
+# qhasm: int6464 cd5
+
+# qhasm: int6464 cd6
+
+# qhasm: int6464 cd7
+
+# qhasm: int6464 cd8
+
+# qhasm: int6464 cd9
+
+# qhasm: int6464 cd10
+
+# qhasm: int6464 cd11
+
+# qhasm: int6464 cd0six
+
+# qhasm: int6464 cd1six
+
+# qhasm: int6464 cd2six
+
+# qhasm: int6464 cd3six
+
+# qhasm: int6464 cd4six
+
+# qhasm: int6464 cd5six
+
+# qhasm: int6464 cd6six
+
+# qhasm: int6464 cd7six
+
+# qhasm: int6464 cd8six
+
+# qhasm: int6464 cd9six
+
+# qhasm: int6464 cd10six
+
+# qhasm: int6464 cd11six
+
+# qhasm: int6464 round
+
+# qhasm: int6464 carry
+
+# qhasm: int64 b1b1p
+
+# qhasm: int64 ma1a1p
+
+# qhasm: int64 a2b2p
+
+# qhasm: int64 sixa2b2p
+
+# qhasm: int64 sixb2a2p
+
+# qhasm: enter fp2e_mul_qhasm
+.text
+.p2align 5
+.globl _fp2e_mul_qhasm
+.globl fp2e_mul_qhasm
+_fp2e_mul_qhasm:
+fp2e_mul_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $960,%r11
+sub %r11,%rsp
+
+# qhasm: b1b1p = &mystack
+# asm 1: leaq <mystack=stack7680#1,>b1b1p=int64#4
+# asm 2: leaq <mystack=0(%rsp),>b1b1p=%rcx
+leaq 0(%rsp),%rcx
+
+# qhasm: ma1a1p = b1b1p + 192
+# asm 1: lea  192(<b1b1p=int64#4),>ma1a1p=int64#5
+# asm 2: lea  192(<b1b1p=%rcx),>ma1a1p=%r8
+lea  192(%rcx),%r8
+
+# qhasm: a2b2p = b1b1p + 384
+# asm 1: lea  384(<b1b1p=int64#4),>a2b2p=int64#6
+# asm 2: lea  384(<b1b1p=%rcx),>a2b2p=%r9
+lea  384(%rcx),%r9
+
+# qhasm: sixa2b2p = b1b1p + 576
+# asm 1: lea  576(<b1b1p=int64#4),>sixa2b2p=int64#7
+# asm 2: lea  576(<b1b1p=%rcx),>sixa2b2p=%rax
+lea  576(%rcx),%rax
+
+# qhasm: sixb2a2p = b1b1p + 768
+# asm 1: lea  768(<b1b1p=int64#4),>sixb2a2p=int64#8
+# asm 2: lea  768(<b1b1p=%rcx),>sixb2a2p=%r10
+lea  768(%rcx),%r10
+
+# qhasm: t0 = *(int128 *)(b2a2p + 0)
+# asm 1: movdqa 0(<b2a2p=int64#3),>t0=int6464#1
+# asm 2: movdqa 0(<b2a2p=%rdx),>t0=%xmm0
+movdqa 0(%rdx),%xmm0
+
+# qhasm: t0 = shuffle float64 of t0 and t0 by 0x1
+# asm 1: shufpd $0x1,<t0=int6464#1,<t0=int6464#1
+# asm 2: shufpd $0x1,<t0=%xmm0,<t0=%xmm0
+shufpd $0x1,%xmm0,%xmm0
+
+# qhasm: *(int128 *)(a2b2p + 0) = t0
+# asm 1: movdqa <t0=int6464#1,0(<a2b2p=int64#6)
+# asm 2: movdqa <t0=%xmm0,0(<a2b2p=%r9)
+movdqa %xmm0,0(%r9)
+
+# qhasm: t1 = *(int128 *)(b2a2p + 16)
+# asm 1: movdqa 16(<b2a2p=int64#3),>t1=int6464#1
+# asm 2: movdqa 16(<b2a2p=%rdx),>t1=%xmm0
+movdqa 16(%rdx),%xmm0
+
+# qhasm: t1 = shuffle float64 of t1 and t1 by 0x1
+# asm 1: shufpd $0x1,<t1=int6464#1,<t1=int6464#1
+# asm 2: shufpd $0x1,<t1=%xmm0,<t1=%xmm0
+shufpd $0x1,%xmm0,%xmm0
+
+# qhasm: *(int128 *)(a2b2p + 16) = t1
+# asm 1: movdqa <t1=int6464#1,16(<a2b2p=int64#6)
+# asm 2: movdqa <t1=%xmm0,16(<a2b2p=%r9)
+movdqa %xmm0,16(%r9)
+
+# qhasm: t2 = *(int128 *)(b2a2p + 32)
+# asm 1: movdqa 32(<b2a2p=int64#3),>t2=int6464#1
+# asm 2: movdqa 32(<b2a2p=%rdx),>t2=%xmm0
+movdqa 32(%rdx),%xmm0
+
+# qhasm: t2 = shuffle float64 of t2 and t2 by 0x1
+# asm 1: shufpd $0x1,<t2=int6464#1,<t2=int6464#1
+# asm 2: shufpd $0x1,<t2=%xmm0,<t2=%xmm0
+shufpd $0x1,%xmm0,%xmm0
+
+# qhasm: *(int128 *)(a2b2p + 32) = t2
+# asm 1: movdqa <t2=int6464#1,32(<a2b2p=int64#6)
+# asm 2: movdqa <t2=%xmm0,32(<a2b2p=%r9)
+movdqa %xmm0,32(%r9)
+
+# qhasm: t3 = *(int128 *)(b2a2p + 48)
+# asm 1: movdqa 48(<b2a2p=int64#3),>t3=int6464#1
+# asm 2: movdqa 48(<b2a2p=%rdx),>t3=%xmm0
+movdqa 48(%rdx),%xmm0
+
+# qhasm: t3 = shuffle float64 of t3 and t3 by 0x1
+# asm 1: shufpd $0x1,<t3=int6464#1,<t3=int6464#1
+# asm 2: shufpd $0x1,<t3=%xmm0,<t3=%xmm0
+shufpd $0x1,%xmm0,%xmm0
+
+# qhasm: *(int128 *)(a2b2p + 48) = t3
+# asm 1: movdqa <t3=int6464#1,48(<a2b2p=int64#6)
+# asm 2: movdqa <t3=%xmm0,48(<a2b2p=%r9)
+movdqa %xmm0,48(%r9)
+
+# qhasm: t4 = *(int128 *)(b2a2p + 64)
+# asm 1: movdqa 64(<b2a2p=int64#3),>t4=int6464#1
+# asm 2: movdqa 64(<b2a2p=%rdx),>t4=%xmm0
+movdqa 64(%rdx),%xmm0
+
+# qhasm: t4 = shuffle float64 of t4 and t4 by 0x1
+# asm 1: shufpd $0x1,<t4=int6464#1,<t4=int6464#1
+# asm 2: shufpd $0x1,<t4=%xmm0,<t4=%xmm0
+shufpd $0x1,%xmm0,%xmm0
+
+# qhasm: *(int128 *)(a2b2p + 64) = t4
+# asm 1: movdqa <t4=int6464#1,64(<a2b2p=int64#6)
+# asm 2: movdqa <t4=%xmm0,64(<a2b2p=%r9)
+movdqa %xmm0,64(%r9)
+
+# qhasm: t5 = *(int128 *)(b2a2p + 80)
+# asm 1: movdqa 80(<b2a2p=int64#3),>t5=int6464#1
+# asm 2: movdqa 80(<b2a2p=%rdx),>t5=%xmm0
+movdqa 80(%rdx),%xmm0
+
+# qhasm: t5 = shuffle float64 of t5 and t5 by 0x1
+# asm 1: shufpd $0x1,<t5=int6464#1,<t5=int6464#1
+# asm 2: shufpd $0x1,<t5=%xmm0,<t5=%xmm0
+shufpd $0x1,%xmm0,%xmm0
+
+# qhasm: *(int128 *)(a2b2p + 80) = t5
+# asm 1: movdqa <t5=int6464#1,80(<a2b2p=int64#6)
+# asm 2: movdqa <t5=%xmm0,80(<a2b2p=%r9)
+movdqa %xmm0,80(%r9)
+
+# qhasm: t6 = *(int128 *)(b2a2p + 96)
+# asm 1: movdqa 96(<b2a2p=int64#3),>t6=int6464#1
+# asm 2: movdqa 96(<b2a2p=%rdx),>t6=%xmm0
+movdqa 96(%rdx),%xmm0
+
+# qhasm: t6 = shuffle float64 of t6 and t6 by 0x1
+# asm 1: shufpd $0x1,<t6=int6464#1,<t6=int6464#1
+# asm 2: shufpd $0x1,<t6=%xmm0,<t6=%xmm0
+shufpd $0x1,%xmm0,%xmm0
+
+# qhasm: *(int128 *)(a2b2p + 96) = t6
+# asm 1: movdqa <t6=int6464#1,96(<a2b2p=int64#6)
+# asm 2: movdqa <t6=%xmm0,96(<a2b2p=%r9)
+movdqa %xmm0,96(%r9)
+
+# qhasm: t7 = *(int128 *)(b2a2p + 112)
+# asm 1: movdqa 112(<b2a2p=int64#3),>t7=int6464#1
+# asm 2: movdqa 112(<b2a2p=%rdx),>t7=%xmm0
+movdqa 112(%rdx),%xmm0
+
+# qhasm: t7 = shuffle float64 of t7 and t7 by 0x1
+# asm 1: shufpd $0x1,<t7=int6464#1,<t7=int6464#1
+# asm 2: shufpd $0x1,<t7=%xmm0,<t7=%xmm0
+shufpd $0x1,%xmm0,%xmm0
+
+# qhasm: *(int128 *)(a2b2p + 112) = t7
+# asm 1: movdqa <t7=int6464#1,112(<a2b2p=int64#6)
+# asm 2: movdqa <t7=%xmm0,112(<a2b2p=%r9)
+movdqa %xmm0,112(%r9)
+
+# qhasm: t8 = *(int128 *)(b2a2p + 128)
+# asm 1: movdqa 128(<b2a2p=int64#3),>t8=int6464#1
+# asm 2: movdqa 128(<b2a2p=%rdx),>t8=%xmm0
+movdqa 128(%rdx),%xmm0
+
+# qhasm: t8 = shuffle float64 of t8 and t8 by 0x1
+# asm 1: shufpd $0x1,<t8=int6464#1,<t8=int6464#1
+# asm 2: shufpd $0x1,<t8=%xmm0,<t8=%xmm0
+shufpd $0x1,%xmm0,%xmm0
+
+# qhasm: *(int128 *)(a2b2p + 128) = t8
+# asm 1: movdqa <t8=int6464#1,128(<a2b2p=int64#6)
+# asm 2: movdqa <t8=%xmm0,128(<a2b2p=%r9)
+movdqa %xmm0,128(%r9)
+
+# qhasm: t9 = *(int128 *)(b2a2p + 144)
+# asm 1: movdqa 144(<b2a2p=int64#3),>t9=int6464#1
+# asm 2: movdqa 144(<b2a2p=%rdx),>t9=%xmm0
+movdqa 144(%rdx),%xmm0
+
+# qhasm: t9 = shuffle float64 of t9 and t9 by 0x1
+# asm 1: shufpd $0x1,<t9=int6464#1,<t9=int6464#1
+# asm 2: shufpd $0x1,<t9=%xmm0,<t9=%xmm0
+shufpd $0x1,%xmm0,%xmm0
+
+# qhasm: *(int128 *)(a2b2p + 144) = t9
+# asm 1: movdqa <t9=int6464#1,144(<a2b2p=int64#6)
+# asm 2: movdqa <t9=%xmm0,144(<a2b2p=%r9)
+movdqa %xmm0,144(%r9)
+
+# qhasm: t10 = *(int128 *)(b2a2p + 160)
+# asm 1: movdqa 160(<b2a2p=int64#3),>t10=int6464#1
+# asm 2: movdqa 160(<b2a2p=%rdx),>t10=%xmm0
+movdqa 160(%rdx),%xmm0
+
+# qhasm: t10 = shuffle float64 of t10 and t10 by 0x1
+# asm 1: shufpd $0x1,<t10=int6464#1,<t10=int6464#1
+# asm 2: shufpd $0x1,<t10=%xmm0,<t10=%xmm0
+shufpd $0x1,%xmm0,%xmm0
+
+# qhasm: *(int128 *)(a2b2p + 160) = t10
+# asm 1: movdqa <t10=int6464#1,160(<a2b2p=int64#6)
+# asm 2: movdqa <t10=%xmm0,160(<a2b2p=%r9)
+movdqa %xmm0,160(%r9)
+
+# qhasm: t11 = *(int128 *)(b2a2p + 176)
+# asm 1: movdqa 176(<b2a2p=int64#3),>t11=int6464#1
+# asm 2: movdqa 176(<b2a2p=%rdx),>t11=%xmm0
+movdqa 176(%rdx),%xmm0
+
+# qhasm: t11 = shuffle float64 of t11 and t11 by 0x1
+# asm 1: shufpd $0x1,<t11=int6464#1,<t11=int6464#1
+# asm 2: shufpd $0x1,<t11=%xmm0,<t11=%xmm0
+shufpd $0x1,%xmm0,%xmm0
+
+# qhasm: *(int128 *)(a2b2p + 176) = t11
+# asm 1: movdqa <t11=int6464#1,176(<a2b2p=int64#6)
+# asm 2: movdqa <t11=%xmm0,176(<a2b2p=%r9)
+movdqa %xmm0,176(%r9)
+
+# qhasm: t0 = *(int128 *)(op1 + 0)
+# asm 1: movdqa 0(<op1=int64#2),>t0=int6464#1
+# asm 2: movdqa 0(<op1=%rsi),>t0=%xmm0
+movdqa 0(%rsi),%xmm0
+
+# qhasm: d0 = t0
+# asm 1: movdqa <t0=int6464#1,>d0=int6464#2
+# asm 2: movdqa <t0=%xmm0,>d0=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: t0 = unpack low double of t0 and t0
+# asm 1: unpcklpd <t0=int6464#1,<t0=int6464#1
+# asm 2: unpcklpd <t0=%xmm0,<t0=%xmm0
+unpcklpd %xmm0,%xmm0
+
+# qhasm: d0 = unpack high double of d0 and d0
+# asm 1: unpckhpd <d0=int6464#2,<d0=int6464#2
+# asm 2: unpckhpd <d0=%xmm1,<d0=%xmm1
+unpckhpd %xmm1,%xmm1
+
+# qhasm: float6464 d0 *= MINUSONE_ONE
+# asm 1: mulpd MINUSONE_ONE,<d0=int6464#2
+# asm 2: mulpd MINUSONE_ONE,<d0=%xmm1
+mulpd MINUSONE_ONE,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 0)  = t0
+# asm 1: movdqa <t0=int6464#1,0(<b1b1p=int64#4)
+# asm 2: movdqa <t0=%xmm0,0(<b1b1p=%rcx)
+movdqa %xmm0,0(%rcx)
+
+# qhasm: *(int128 *)(ma1a1p + 0)  = d0
+# asm 1: movdqa <d0=int6464#2,0(<ma1a1p=int64#5)
+# asm 2: movdqa <d0=%xmm1,0(<ma1a1p=%r8)
+movdqa %xmm1,0(%r8)
+
+# qhasm: t1 = *(int128 *)(op1 + 16)
+# asm 1: movdqa 16(<op1=int64#2),>t1=int6464#1
+# asm 2: movdqa 16(<op1=%rsi),>t1=%xmm0
+movdqa 16(%rsi),%xmm0
+
+# qhasm: d1 = t1
+# asm 1: movdqa <t1=int6464#1,>d1=int6464#2
+# asm 2: movdqa <t1=%xmm0,>d1=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: t1 = unpack low double of t1 and t1
+# asm 1: unpcklpd <t1=int6464#1,<t1=int6464#1
+# asm 2: unpcklpd <t1=%xmm0,<t1=%xmm0
+unpcklpd %xmm0,%xmm0
+
+# qhasm: d1 = unpack high double of d1 and d1
+# asm 1: unpckhpd <d1=int6464#2,<d1=int6464#2
+# asm 2: unpckhpd <d1=%xmm1,<d1=%xmm1
+unpckhpd %xmm1,%xmm1
+
+# qhasm: float6464 d1 *= MINUSONE_ONE
+# asm 1: mulpd MINUSONE_ONE,<d1=int6464#2
+# asm 2: mulpd MINUSONE_ONE,<d1=%xmm1
+mulpd MINUSONE_ONE,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 16)  = t1
+# asm 1: movdqa <t1=int6464#1,16(<b1b1p=int64#4)
+# asm 2: movdqa <t1=%xmm0,16(<b1b1p=%rcx)
+movdqa %xmm0,16(%rcx)
+
+# qhasm: *(int128 *)(ma1a1p + 16)  = d1
+# asm 1: movdqa <d1=int6464#2,16(<ma1a1p=int64#5)
+# asm 2: movdqa <d1=%xmm1,16(<ma1a1p=%r8)
+movdqa %xmm1,16(%r8)
+
+# qhasm: t2 = *(int128 *)(op1 + 32)
+# asm 1: movdqa 32(<op1=int64#2),>t2=int6464#1
+# asm 2: movdqa 32(<op1=%rsi),>t2=%xmm0
+movdqa 32(%rsi),%xmm0
+
+# qhasm: d2 = t2
+# asm 1: movdqa <t2=int6464#1,>d2=int6464#2
+# asm 2: movdqa <t2=%xmm0,>d2=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: t2 = unpack low double of t2 and t2
+# asm 1: unpcklpd <t2=int6464#1,<t2=int6464#1
+# asm 2: unpcklpd <t2=%xmm0,<t2=%xmm0
+unpcklpd %xmm0,%xmm0
+
+# qhasm: d2 = unpack high double of d2 and d2
+# asm 1: unpckhpd <d2=int6464#2,<d2=int6464#2
+# asm 2: unpckhpd <d2=%xmm1,<d2=%xmm1
+unpckhpd %xmm1,%xmm1
+
+# qhasm: float6464 d2 *= MINUSONE_ONE
+# asm 1: mulpd MINUSONE_ONE,<d2=int6464#2
+# asm 2: mulpd MINUSONE_ONE,<d2=%xmm1
+mulpd MINUSONE_ONE,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 32)  = t2
+# asm 1: movdqa <t2=int6464#1,32(<b1b1p=int64#4)
+# asm 2: movdqa <t2=%xmm0,32(<b1b1p=%rcx)
+movdqa %xmm0,32(%rcx)
+
+# qhasm: *(int128 *)(ma1a1p + 32)  = d2
+# asm 1: movdqa <d2=int6464#2,32(<ma1a1p=int64#5)
+# asm 2: movdqa <d2=%xmm1,32(<ma1a1p=%r8)
+movdqa %xmm1,32(%r8)
+
+# qhasm: t3 = *(int128 *)(op1 + 48)
+# asm 1: movdqa 48(<op1=int64#2),>t3=int6464#1
+# asm 2: movdqa 48(<op1=%rsi),>t3=%xmm0
+movdqa 48(%rsi),%xmm0
+
+# qhasm: d3 = t3
+# asm 1: movdqa <t3=int6464#1,>d3=int6464#2
+# asm 2: movdqa <t3=%xmm0,>d3=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: t3 = unpack low double of t3 and t3
+# asm 1: unpcklpd <t3=int6464#1,<t3=int6464#1
+# asm 2: unpcklpd <t3=%xmm0,<t3=%xmm0
+unpcklpd %xmm0,%xmm0
+
+# qhasm: d3 = unpack high double of d3 and d3
+# asm 1: unpckhpd <d3=int6464#2,<d3=int6464#2
+# asm 2: unpckhpd <d3=%xmm1,<d3=%xmm1
+unpckhpd %xmm1,%xmm1
+
+# qhasm: float6464 d3 *= MINUSONE_ONE
+# asm 1: mulpd MINUSONE_ONE,<d3=int6464#2
+# asm 2: mulpd MINUSONE_ONE,<d3=%xmm1
+mulpd MINUSONE_ONE,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 48)  = t3
+# asm 1: movdqa <t3=int6464#1,48(<b1b1p=int64#4)
+# asm 2: movdqa <t3=%xmm0,48(<b1b1p=%rcx)
+movdqa %xmm0,48(%rcx)
+
+# qhasm: *(int128 *)(ma1a1p + 48)  = d3
+# asm 1: movdqa <d3=int6464#2,48(<ma1a1p=int64#5)
+# asm 2: movdqa <d3=%xmm1,48(<ma1a1p=%r8)
+movdqa %xmm1,48(%r8)
+
+# qhasm: t4 = *(int128 *)(op1 + 64)
+# asm 1: movdqa 64(<op1=int64#2),>t4=int6464#1
+# asm 2: movdqa 64(<op1=%rsi),>t4=%xmm0
+movdqa 64(%rsi),%xmm0
+
+# qhasm: d4 = t4
+# asm 1: movdqa <t4=int6464#1,>d4=int6464#2
+# asm 2: movdqa <t4=%xmm0,>d4=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: t4 = unpack low double of t4 and t4
+# asm 1: unpcklpd <t4=int6464#1,<t4=int6464#1
+# asm 2: unpcklpd <t4=%xmm0,<t4=%xmm0
+unpcklpd %xmm0,%xmm0
+
+# qhasm: d4 = unpack high double of d4 and d4
+# asm 1: unpckhpd <d4=int6464#2,<d4=int6464#2
+# asm 2: unpckhpd <d4=%xmm1,<d4=%xmm1
+unpckhpd %xmm1,%xmm1
+
+# qhasm: float6464 d4 *= MINUSONE_ONE
+# asm 1: mulpd MINUSONE_ONE,<d4=int6464#2
+# asm 2: mulpd MINUSONE_ONE,<d4=%xmm1
+mulpd MINUSONE_ONE,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 64)  = t4
+# asm 1: movdqa <t4=int6464#1,64(<b1b1p=int64#4)
+# asm 2: movdqa <t4=%xmm0,64(<b1b1p=%rcx)
+movdqa %xmm0,64(%rcx)
+
+# qhasm: *(int128 *)(ma1a1p + 64)  = d4
+# asm 1: movdqa <d4=int6464#2,64(<ma1a1p=int64#5)
+# asm 2: movdqa <d4=%xmm1,64(<ma1a1p=%r8)
+movdqa %xmm1,64(%r8)
+
+# qhasm: t5 = *(int128 *)(op1 + 80)
+# asm 1: movdqa 80(<op1=int64#2),>t5=int6464#1
+# asm 2: movdqa 80(<op1=%rsi),>t5=%xmm0
+movdqa 80(%rsi),%xmm0
+
+# qhasm: d5 = t5
+# asm 1: movdqa <t5=int6464#1,>d5=int6464#2
+# asm 2: movdqa <t5=%xmm0,>d5=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: t5 = unpack low double of t5 and t5
+# asm 1: unpcklpd <t5=int6464#1,<t5=int6464#1
+# asm 2: unpcklpd <t5=%xmm0,<t5=%xmm0
+unpcklpd %xmm0,%xmm0
+
+# qhasm: d5 = unpack high double of d5 and d5
+# asm 1: unpckhpd <d5=int6464#2,<d5=int6464#2
+# asm 2: unpckhpd <d5=%xmm1,<d5=%xmm1
+unpckhpd %xmm1,%xmm1
+
+# qhasm: float6464 d5 *= MINUSONE_ONE
+# asm 1: mulpd MINUSONE_ONE,<d5=int6464#2
+# asm 2: mulpd MINUSONE_ONE,<d5=%xmm1
+mulpd MINUSONE_ONE,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 80)  = t5
+# asm 1: movdqa <t5=int6464#1,80(<b1b1p=int64#4)
+# asm 2: movdqa <t5=%xmm0,80(<b1b1p=%rcx)
+movdqa %xmm0,80(%rcx)
+
+# qhasm: *(int128 *)(ma1a1p + 80)  = d5
+# asm 1: movdqa <d5=int6464#2,80(<ma1a1p=int64#5)
+# asm 2: movdqa <d5=%xmm1,80(<ma1a1p=%r8)
+movdqa %xmm1,80(%r8)
+
+# qhasm: t6 = *(int128 *)(op1 + 96)
+# asm 1: movdqa 96(<op1=int64#2),>t6=int6464#1
+# asm 2: movdqa 96(<op1=%rsi),>t6=%xmm0
+movdqa 96(%rsi),%xmm0
+
+# qhasm: d6 = t6
+# asm 1: movdqa <t6=int6464#1,>d6=int6464#2
+# asm 2: movdqa <t6=%xmm0,>d6=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: t6 = unpack low double of t6 and t6
+# asm 1: unpcklpd <t6=int6464#1,<t6=int6464#1
+# asm 2: unpcklpd <t6=%xmm0,<t6=%xmm0
+unpcklpd %xmm0,%xmm0
+
+# qhasm: d6 = unpack high double of d6 and d6
+# asm 1: unpckhpd <d6=int6464#2,<d6=int6464#2
+# asm 2: unpckhpd <d6=%xmm1,<d6=%xmm1
+unpckhpd %xmm1,%xmm1
+
+# qhasm: float6464 d6 *= MINUSONE_ONE
+# asm 1: mulpd MINUSONE_ONE,<d6=int6464#2
+# asm 2: mulpd MINUSONE_ONE,<d6=%xmm1
+mulpd MINUSONE_ONE,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 96)  = t6
+# asm 1: movdqa <t6=int6464#1,96(<b1b1p=int64#4)
+# asm 2: movdqa <t6=%xmm0,96(<b1b1p=%rcx)
+movdqa %xmm0,96(%rcx)
+
+# qhasm: *(int128 *)(ma1a1p + 96)  = d6
+# asm 1: movdqa <d6=int6464#2,96(<ma1a1p=int64#5)
+# asm 2: movdqa <d6=%xmm1,96(<ma1a1p=%r8)
+movdqa %xmm1,96(%r8)
+
+# qhasm: t7 = *(int128 *)(op1 + 112)
+# asm 1: movdqa 112(<op1=int64#2),>t7=int6464#1
+# asm 2: movdqa 112(<op1=%rsi),>t7=%xmm0
+movdqa 112(%rsi),%xmm0
+
+# qhasm: d7 = t7
+# asm 1: movdqa <t7=int6464#1,>d7=int6464#2
+# asm 2: movdqa <t7=%xmm0,>d7=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: t7 = unpack low double of t7 and t7
+# asm 1: unpcklpd <t7=int6464#1,<t7=int6464#1
+# asm 2: unpcklpd <t7=%xmm0,<t7=%xmm0
+unpcklpd %xmm0,%xmm0
+
+# qhasm: d7 = unpack high double of d7 and d7
+# asm 1: unpckhpd <d7=int6464#2,<d7=int6464#2
+# asm 2: unpckhpd <d7=%xmm1,<d7=%xmm1
+unpckhpd %xmm1,%xmm1
+
+# qhasm: float6464 d7 *= MINUSONE_ONE
+# asm 1: mulpd MINUSONE_ONE,<d7=int6464#2
+# asm 2: mulpd MINUSONE_ONE,<d7=%xmm1
+mulpd MINUSONE_ONE,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 112)  = t7
+# asm 1: movdqa <t7=int6464#1,112(<b1b1p=int64#4)
+# asm 2: movdqa <t7=%xmm0,112(<b1b1p=%rcx)
+movdqa %xmm0,112(%rcx)
+
+# qhasm: *(int128 *)(ma1a1p + 112)  = d7
+# asm 1: movdqa <d7=int6464#2,112(<ma1a1p=int64#5)
+# asm 2: movdqa <d7=%xmm1,112(<ma1a1p=%r8)
+movdqa %xmm1,112(%r8)
+
+# qhasm: t8 = *(int128 *)(op1 + 128)
+# asm 1: movdqa 128(<op1=int64#2),>t8=int6464#1
+# asm 2: movdqa 128(<op1=%rsi),>t8=%xmm0
+movdqa 128(%rsi),%xmm0
+
+# qhasm: d8 = t8
+# asm 1: movdqa <t8=int6464#1,>d8=int6464#2
+# asm 2: movdqa <t8=%xmm0,>d8=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: t8 = unpack low double of t8 and t8
+# asm 1: unpcklpd <t8=int6464#1,<t8=int6464#1
+# asm 2: unpcklpd <t8=%xmm0,<t8=%xmm0
+unpcklpd %xmm0,%xmm0
+
+# qhasm: d8 = unpack high double of d8 and d8
+# asm 1: unpckhpd <d8=int6464#2,<d8=int6464#2
+# asm 2: unpckhpd <d8=%xmm1,<d8=%xmm1
+unpckhpd %xmm1,%xmm1
+
+# qhasm: float6464 d8 *= MINUSONE_ONE
+# asm 1: mulpd MINUSONE_ONE,<d8=int6464#2
+# asm 2: mulpd MINUSONE_ONE,<d8=%xmm1
+mulpd MINUSONE_ONE,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 128)  = t8
+# asm 1: movdqa <t8=int6464#1,128(<b1b1p=int64#4)
+# asm 2: movdqa <t8=%xmm0,128(<b1b1p=%rcx)
+movdqa %xmm0,128(%rcx)
+
+# qhasm: *(int128 *)(ma1a1p + 128)  = d8
+# asm 1: movdqa <d8=int6464#2,128(<ma1a1p=int64#5)
+# asm 2: movdqa <d8=%xmm1,128(<ma1a1p=%r8)
+movdqa %xmm1,128(%r8)
+
+# qhasm: t9 = *(int128 *)(op1 + 144)
+# asm 1: movdqa 144(<op1=int64#2),>t9=int6464#1
+# asm 2: movdqa 144(<op1=%rsi),>t9=%xmm0
+movdqa 144(%rsi),%xmm0
+
+# qhasm: d9 = t9
+# asm 1: movdqa <t9=int6464#1,>d9=int6464#2
+# asm 2: movdqa <t9=%xmm0,>d9=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: t9 = unpack low double of t9 and t9
+# asm 1: unpcklpd <t9=int6464#1,<t9=int6464#1
+# asm 2: unpcklpd <t9=%xmm0,<t9=%xmm0
+unpcklpd %xmm0,%xmm0
+
+# qhasm: d9 = unpack high double of d9 and d9
+# asm 1: unpckhpd <d9=int6464#2,<d9=int6464#2
+# asm 2: unpckhpd <d9=%xmm1,<d9=%xmm1
+unpckhpd %xmm1,%xmm1
+
+# qhasm: float6464 d9 *= MINUSONE_ONE
+# asm 1: mulpd MINUSONE_ONE,<d9=int6464#2
+# asm 2: mulpd MINUSONE_ONE,<d9=%xmm1
+mulpd MINUSONE_ONE,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 144)  = t9
+# asm 1: movdqa <t9=int6464#1,144(<b1b1p=int64#4)
+# asm 2: movdqa <t9=%xmm0,144(<b1b1p=%rcx)
+movdqa %xmm0,144(%rcx)
+
+# qhasm: *(int128 *)(ma1a1p + 144)  = d9
+# asm 1: movdqa <d9=int6464#2,144(<ma1a1p=int64#5)
+# asm 2: movdqa <d9=%xmm1,144(<ma1a1p=%r8)
+movdqa %xmm1,144(%r8)
+
+# qhasm: t10 = *(int128 *)(op1 + 160)
+# asm 1: movdqa 160(<op1=int64#2),>t10=int6464#1
+# asm 2: movdqa 160(<op1=%rsi),>t10=%xmm0
+movdqa 160(%rsi),%xmm0
+
+# qhasm: d10 = t10
+# asm 1: movdqa <t10=int6464#1,>d10=int6464#2
+# asm 2: movdqa <t10=%xmm0,>d10=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: t10 = unpack low double of t10 and t10
+# asm 1: unpcklpd <t10=int6464#1,<t10=int6464#1
+# asm 2: unpcklpd <t10=%xmm0,<t10=%xmm0
+unpcklpd %xmm0,%xmm0
+
+# qhasm: d10 = unpack high double of d10 and d10
+# asm 1: unpckhpd <d10=int6464#2,<d10=int6464#2
+# asm 2: unpckhpd <d10=%xmm1,<d10=%xmm1
+unpckhpd %xmm1,%xmm1
+
+# qhasm: float6464 d10 *= MINUSONE_ONE
+# asm 1: mulpd MINUSONE_ONE,<d10=int6464#2
+# asm 2: mulpd MINUSONE_ONE,<d10=%xmm1
+mulpd MINUSONE_ONE,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 160)  = t10
+# asm 1: movdqa <t10=int6464#1,160(<b1b1p=int64#4)
+# asm 2: movdqa <t10=%xmm0,160(<b1b1p=%rcx)
+movdqa %xmm0,160(%rcx)
+
+# qhasm: *(int128 *)(ma1a1p + 160)  = d10
+# asm 1: movdqa <d10=int6464#2,160(<ma1a1p=int64#5)
+# asm 2: movdqa <d10=%xmm1,160(<ma1a1p=%r8)
+movdqa %xmm1,160(%r8)
+
+# qhasm: t11 = *(int128 *)(op1 + 176)
+# asm 1: movdqa 176(<op1=int64#2),>t11=int6464#1
+# asm 2: movdqa 176(<op1=%rsi),>t11=%xmm0
+movdqa 176(%rsi),%xmm0
+
+# qhasm: d11 = t11
+# asm 1: movdqa <t11=int6464#1,>d11=int6464#2
+# asm 2: movdqa <t11=%xmm0,>d11=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: t11 = unpack low double of t11 and t11
+# asm 1: unpcklpd <t11=int6464#1,<t11=int6464#1
+# asm 2: unpcklpd <t11=%xmm0,<t11=%xmm0
+unpcklpd %xmm0,%xmm0
+
+# qhasm: d11 = unpack high double of d11 and d11
+# asm 1: unpckhpd <d11=int6464#2,<d11=int6464#2
+# asm 2: unpckhpd <d11=%xmm1,<d11=%xmm1
+unpckhpd %xmm1,%xmm1
+
+# qhasm: float6464 d11 *= MINUSONE_ONE
+# asm 1: mulpd MINUSONE_ONE,<d11=int6464#2
+# asm 2: mulpd MINUSONE_ONE,<d11=%xmm1
+mulpd MINUSONE_ONE,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 176)  = t11
+# asm 1: movdqa <t11=int6464#1,176(<b1b1p=int64#4)
+# asm 2: movdqa <t11=%xmm0,176(<b1b1p=%rcx)
+movdqa %xmm0,176(%rcx)
+
+# qhasm: *(int128 *)(ma1a1p + 176)  = d11
+# asm 1: movdqa <d11=int6464#2,176(<ma1a1p=int64#5)
+# asm 2: movdqa <d11=%xmm1,176(<ma1a1p=%r8)
+movdqa %xmm1,176(%r8)
+
+# qhasm: ab0 = *(int128 *)(b1b1p + 0)
+# asm 1: movdqa 0(<b1b1p=int64#4),>ab0=int6464#1
+# asm 2: movdqa 0(<b1b1p=%rcx),>ab0=%xmm0
+movdqa 0(%rcx),%xmm0
+
+# qhasm: cd0 = *(int128 *)(ma1a1p + 0)
+# asm 1: movdqa 0(<ma1a1p=int64#5),>cd0=int6464#2
+# asm 2: movdqa 0(<ma1a1p=%r8),>cd0=%xmm1
+movdqa 0(%r8),%xmm1
+
+# qhasm: r0 = ab0
+# asm 1: movdqa <ab0=int6464#1,>r0=int6464#3
+# asm 2: movdqa <ab0=%xmm0,>r0=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: float6464 r0 *= *(int128 *)(b2a2p + 0)
+# asm 1: mulpd 0(<b2a2p=int64#3),<r0=int6464#3
+# asm 2: mulpd 0(<b2a2p=%rdx),<r0=%xmm2
+mulpd 0(%rdx),%xmm2
+
+# qhasm: d0 = cd0
+# asm 1: movdqa <cd0=int6464#2,>d0=int6464#4
+# asm 2: movdqa <cd0=%xmm1,>d0=%xmm3
+movdqa %xmm1,%xmm3
+
+# qhasm: float6464 d0 *= *(int128 *)(a2b2p + 0)
+# asm 1: mulpd 0(<a2b2p=int64#6),<d0=int6464#4
+# asm 2: mulpd 0(<a2b2p=%r9),<d0=%xmm3
+mulpd 0(%r9),%xmm3
+
+# qhasm: float6464 r0 += d0
+# asm 1: addpd <d0=int6464#4,<r0=int6464#3
+# asm 2: addpd <d0=%xmm3,<r0=%xmm2
+addpd %xmm3,%xmm2
+
+# qhasm: r1 = ab0
+# asm 1: movdqa <ab0=int6464#1,>r1=int6464#4
+# asm 2: movdqa <ab0=%xmm0,>r1=%xmm3
+movdqa %xmm0,%xmm3
+
+# qhasm: float6464 r1 *= *(int128 *)(b2a2p + 16)
+# asm 1: mulpd 16(<b2a2p=int64#3),<r1=int6464#4
+# asm 2: mulpd 16(<b2a2p=%rdx),<r1=%xmm3
+mulpd 16(%rdx),%xmm3
+
+# qhasm: d1 = cd0
+# asm 1: movdqa <cd0=int6464#2,>d1=int6464#5
+# asm 2: movdqa <cd0=%xmm1,>d1=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm: float6464 d1 *= *(int128 *)(a2b2p + 16)
+# asm 1: mulpd 16(<a2b2p=int64#6),<d1=int6464#5
+# asm 2: mulpd 16(<a2b2p=%r9),<d1=%xmm4
+mulpd 16(%r9),%xmm4
+
+# qhasm: float6464 r1 += d1
+# asm 1: addpd <d1=int6464#5,<r1=int6464#4
+# asm 2: addpd <d1=%xmm4,<r1=%xmm3
+addpd %xmm4,%xmm3
+
+# qhasm: r2 = ab0
+# asm 1: movdqa <ab0=int6464#1,>r2=int6464#5
+# asm 2: movdqa <ab0=%xmm0,>r2=%xmm4
+movdqa %xmm0,%xmm4
+
+# qhasm: float6464 r2 *= *(int128 *)(b2a2p + 32)
+# asm 1: mulpd 32(<b2a2p=int64#3),<r2=int6464#5
+# asm 2: mulpd 32(<b2a2p=%rdx),<r2=%xmm4
+mulpd 32(%rdx),%xmm4
+
+# qhasm: d2 = cd0
+# asm 1: movdqa <cd0=int6464#2,>d2=int6464#6
+# asm 2: movdqa <cd0=%xmm1,>d2=%xmm5
+movdqa %xmm1,%xmm5
+
+# qhasm: float6464 d2 *= *(int128 *)(a2b2p + 32)
+# asm 1: mulpd 32(<a2b2p=int64#6),<d2=int6464#6
+# asm 2: mulpd 32(<a2b2p=%r9),<d2=%xmm5
+mulpd 32(%r9),%xmm5
+
+# qhasm: float6464 r2 += d2
+# asm 1: addpd <d2=int6464#6,<r2=int6464#5
+# asm 2: addpd <d2=%xmm5,<r2=%xmm4
+addpd %xmm5,%xmm4
+
+# qhasm: r3 = ab0
+# asm 1: movdqa <ab0=int6464#1,>r3=int6464#6
+# asm 2: movdqa <ab0=%xmm0,>r3=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm: float6464 r3 *= *(int128 *)(b2a2p + 48)
+# asm 1: mulpd 48(<b2a2p=int64#3),<r3=int6464#6
+# asm 2: mulpd 48(<b2a2p=%rdx),<r3=%xmm5
+mulpd 48(%rdx),%xmm5
+
+# qhasm: d3 = cd0
+# asm 1: movdqa <cd0=int6464#2,>d3=int6464#7
+# asm 2: movdqa <cd0=%xmm1,>d3=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: float6464 d3 *= *(int128 *)(a2b2p + 48)
+# asm 1: mulpd 48(<a2b2p=int64#6),<d3=int6464#7
+# asm 2: mulpd 48(<a2b2p=%r9),<d3=%xmm6
+mulpd 48(%r9),%xmm6
+
+# qhasm: float6464 r3 += d3
+# asm 1: addpd <d3=int6464#7,<r3=int6464#6
+# asm 2: addpd <d3=%xmm6,<r3=%xmm5
+addpd %xmm6,%xmm5
+
+# qhasm: r4 = ab0
+# asm 1: movdqa <ab0=int6464#1,>r4=int6464#7
+# asm 2: movdqa <ab0=%xmm0,>r4=%xmm6
+movdqa %xmm0,%xmm6
+
+# qhasm: float6464 r4 *= *(int128 *)(b2a2p + 64)
+# asm 1: mulpd 64(<b2a2p=int64#3),<r4=int6464#7
+# asm 2: mulpd 64(<b2a2p=%rdx),<r4=%xmm6
+mulpd 64(%rdx),%xmm6
+
+# qhasm: d4 = cd0
+# asm 1: movdqa <cd0=int6464#2,>d4=int6464#8
+# asm 2: movdqa <cd0=%xmm1,>d4=%xmm7
+movdqa %xmm1,%xmm7
+
+# qhasm: float6464 d4 *= *(int128 *)(a2b2p + 64)
+# asm 1: mulpd 64(<a2b2p=int64#6),<d4=int6464#8
+# asm 2: mulpd 64(<a2b2p=%r9),<d4=%xmm7
+mulpd 64(%r9),%xmm7
+
+# qhasm: float6464 r4 += d4
+# asm 1: addpd <d4=int6464#8,<r4=int6464#7
+# asm 2: addpd <d4=%xmm7,<r4=%xmm6
+addpd %xmm7,%xmm6
+
+# qhasm: r5 = ab0
+# asm 1: movdqa <ab0=int6464#1,>r5=int6464#8
+# asm 2: movdqa <ab0=%xmm0,>r5=%xmm7
+movdqa %xmm0,%xmm7
+
+# qhasm: float6464 r5 *= *(int128 *)(b2a2p + 80)
+# asm 1: mulpd 80(<b2a2p=int64#3),<r5=int6464#8
+# asm 2: mulpd 80(<b2a2p=%rdx),<r5=%xmm7
+mulpd 80(%rdx),%xmm7
+
+# qhasm: d5 = cd0
+# asm 1: movdqa <cd0=int6464#2,>d5=int6464#9
+# asm 2: movdqa <cd0=%xmm1,>d5=%xmm8
+movdqa %xmm1,%xmm8
+
+# qhasm: float6464 d5 *= *(int128 *)(a2b2p + 80)
+# asm 1: mulpd 80(<a2b2p=int64#6),<d5=int6464#9
+# asm 2: mulpd 80(<a2b2p=%r9),<d5=%xmm8
+mulpd 80(%r9),%xmm8
+
+# qhasm: float6464 r5 += d5
+# asm 1: addpd <d5=int6464#9,<r5=int6464#8
+# asm 2: addpd <d5=%xmm8,<r5=%xmm7
+addpd %xmm8,%xmm7
+
+# qhasm: r6 = ab0
+# asm 1: movdqa <ab0=int6464#1,>r6=int6464#9
+# asm 2: movdqa <ab0=%xmm0,>r6=%xmm8
+movdqa %xmm0,%xmm8
+
+# qhasm: float6464 r6 *= *(int128 *)(b2a2p + 96)
+# asm 1: mulpd 96(<b2a2p=int64#3),<r6=int6464#9
+# asm 2: mulpd 96(<b2a2p=%rdx),<r6=%xmm8
+mulpd 96(%rdx),%xmm8
+
+# qhasm: d6 = cd0
+# asm 1: movdqa <cd0=int6464#2,>d6=int6464#10
+# asm 2: movdqa <cd0=%xmm1,>d6=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm: float6464 d6 *= *(int128 *)(a2b2p + 96)
+# asm 1: mulpd 96(<a2b2p=int64#6),<d6=int6464#10
+# asm 2: mulpd 96(<a2b2p=%r9),<d6=%xmm9
+mulpd 96(%r9),%xmm9
+
+# qhasm: float6464 r6 += d6
+# asm 1: addpd <d6=int6464#10,<r6=int6464#9
+# asm 2: addpd <d6=%xmm9,<r6=%xmm8
+addpd %xmm9,%xmm8
+
+# qhasm: r7 = ab0
+# asm 1: movdqa <ab0=int6464#1,>r7=int6464#10
+# asm 2: movdqa <ab0=%xmm0,>r7=%xmm9
+movdqa %xmm0,%xmm9
+
+# qhasm: float6464 r7 *= *(int128 *)(b2a2p + 112)
+# asm 1: mulpd 112(<b2a2p=int64#3),<r7=int6464#10
+# asm 2: mulpd 112(<b2a2p=%rdx),<r7=%xmm9
+mulpd 112(%rdx),%xmm9
+
+# qhasm: d7 = cd0
+# asm 1: movdqa <cd0=int6464#2,>d7=int6464#11
+# asm 2: movdqa <cd0=%xmm1,>d7=%xmm10
+movdqa %xmm1,%xmm10
+
+# qhasm: float6464 d7 *= *(int128 *)(a2b2p + 112)
+# asm 1: mulpd 112(<a2b2p=int64#6),<d7=int6464#11
+# asm 2: mulpd 112(<a2b2p=%r9),<d7=%xmm10
+mulpd 112(%r9),%xmm10
+
+# qhasm: float6464 r7 += d7
+# asm 1: addpd <d7=int6464#11,<r7=int6464#10
+# asm 2: addpd <d7=%xmm10,<r7=%xmm9
+addpd %xmm10,%xmm9
+
+# qhasm: r8 = ab0
+# asm 1: movdqa <ab0=int6464#1,>r8=int6464#11
+# asm 2: movdqa <ab0=%xmm0,>r8=%xmm10
+movdqa %xmm0,%xmm10
+
+# qhasm: float6464 r8 *= *(int128 *)(b2a2p + 128)
+# asm 1: mulpd 128(<b2a2p=int64#3),<r8=int6464#11
+# asm 2: mulpd 128(<b2a2p=%rdx),<r8=%xmm10
+mulpd 128(%rdx),%xmm10
+
+# qhasm: d8 = cd0
+# asm 1: movdqa <cd0=int6464#2,>d8=int6464#12
+# asm 2: movdqa <cd0=%xmm1,>d8=%xmm11
+movdqa %xmm1,%xmm11
+
+# qhasm: float6464 d8 *= *(int128 *)(a2b2p + 128)
+# asm 1: mulpd 128(<a2b2p=int64#6),<d8=int6464#12
+# asm 2: mulpd 128(<a2b2p=%r9),<d8=%xmm11
+mulpd 128(%r9),%xmm11
+
+# qhasm: float6464 r8 += d8
+# asm 1: addpd <d8=int6464#12,<r8=int6464#11
+# asm 2: addpd <d8=%xmm11,<r8=%xmm10
+addpd %xmm11,%xmm10
+
+# qhasm: r9 = ab0
+# asm 1: movdqa <ab0=int6464#1,>r9=int6464#12
+# asm 2: movdqa <ab0=%xmm0,>r9=%xmm11
+movdqa %xmm0,%xmm11
+
+# qhasm: float6464 r9 *= *(int128 *)(b2a2p + 144)
+# asm 1: mulpd 144(<b2a2p=int64#3),<r9=int6464#12
+# asm 2: mulpd 144(<b2a2p=%rdx),<r9=%xmm11
+mulpd 144(%rdx),%xmm11
+
+# qhasm: d9 = cd0
+# asm 1: movdqa <cd0=int6464#2,>d9=int6464#13
+# asm 2: movdqa <cd0=%xmm1,>d9=%xmm12
+movdqa %xmm1,%xmm12
+
+# qhasm: float6464 d9 *= *(int128 *)(a2b2p + 144)
+# asm 1: mulpd 144(<a2b2p=int64#6),<d9=int6464#13
+# asm 2: mulpd 144(<a2b2p=%r9),<d9=%xmm12
+mulpd 144(%r9),%xmm12
+
+# qhasm: float6464 r9 += d9
+# asm 1: addpd <d9=int6464#13,<r9=int6464#12
+# asm 2: addpd <d9=%xmm12,<r9=%xmm11
+addpd %xmm12,%xmm11
+
+# qhasm: r10 = ab0
+# asm 1: movdqa <ab0=int6464#1,>r10=int6464#13
+# asm 2: movdqa <ab0=%xmm0,>r10=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: float6464 r10 *= *(int128 *)(b2a2p + 160)
+# asm 1: mulpd 160(<b2a2p=int64#3),<r10=int6464#13
+# asm 2: mulpd 160(<b2a2p=%rdx),<r10=%xmm12
+mulpd 160(%rdx),%xmm12
+
+# qhasm: d10 = cd0
+# asm 1: movdqa <cd0=int6464#2,>d10=int6464#14
+# asm 2: movdqa <cd0=%xmm1,>d10=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm: float6464 d10 *= *(int128 *)(a2b2p + 160)
+# asm 1: mulpd 160(<a2b2p=int64#6),<d10=int6464#14
+# asm 2: mulpd 160(<a2b2p=%r9),<d10=%xmm13
+mulpd 160(%r9),%xmm13
+
+# qhasm: float6464 r10 += d10
+# asm 1: addpd <d10=int6464#14,<r10=int6464#13
+# asm 2: addpd <d10=%xmm13,<r10=%xmm12
+addpd %xmm13,%xmm12
+
+# qhasm: r11 = ab0
+# asm 1: movdqa <ab0=int6464#1,>r11=int6464#1
+# asm 2: movdqa <ab0=%xmm0,>r11=%xmm0
+movdqa %xmm0,%xmm0
+
+# qhasm: float6464 r11 *= *(int128 *)(b2a2p + 176)
+# asm 1: mulpd 176(<b2a2p=int64#3),<r11=int6464#1
+# asm 2: mulpd 176(<b2a2p=%rdx),<r11=%xmm0
+mulpd 176(%rdx),%xmm0
+
+# qhasm: d11 = cd0
+# asm 1: movdqa <cd0=int6464#2,>d11=int6464#2
+# asm 2: movdqa <cd0=%xmm1,>d11=%xmm1
+movdqa %xmm1,%xmm1
+
+# qhasm: float6464 d11 *= *(int128 *)(a2b2p + 176)
+# asm 1: mulpd 176(<a2b2p=int64#6),<d11=int6464#2
+# asm 2: mulpd 176(<a2b2p=%r9),<d11=%xmm1
+mulpd 176(%r9),%xmm1
+
+# qhasm: float6464 r11 += d11
+# asm 1: addpd <d11=int6464#2,<r11=int6464#1
+# asm 2: addpd <d11=%xmm1,<r11=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(b1b1p + 0) = r0
+# asm 1: movdqa <r0=int6464#3,0(<b1b1p=int64#4)
+# asm 2: movdqa <r0=%xmm2,0(<b1b1p=%rcx)
+movdqa %xmm2,0(%rcx)
+
+# qhasm: ab1 = *(int128 *)(b1b1p + 16)
+# asm 1: movdqa 16(<b1b1p=int64#4),>ab1=int6464#2
+# asm 2: movdqa 16(<b1b1p=%rcx),>ab1=%xmm1
+movdqa 16(%rcx),%xmm1
+
+# qhasm: cd1 = *(int128 *)(ma1a1p + 16)
+# asm 1: movdqa 16(<ma1a1p=int64#5),>cd1=int6464#3
+# asm 2: movdqa 16(<ma1a1p=%r8),>cd1=%xmm2
+movdqa 16(%r8),%xmm2
+
+# qhasm: ab1six = ab1
+# asm 1: movdqa <ab1=int6464#2,>ab1six=int6464#14
+# asm 2: movdqa <ab1=%xmm1,>ab1six=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm: cd1six = cd1
+# asm 1: movdqa <cd1=int6464#3,>cd1six=int6464#15
+# asm 2: movdqa <cd1=%xmm2,>cd1six=%xmm14
+movdqa %xmm2,%xmm14
+
+# qhasm: float6464 ab1six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab1six=int6464#14
+# asm 2: mulpd SIX_SIX,<ab1six=%xmm13
+mulpd SIX_SIX,%xmm13
+
+# qhasm: float6464 cd1six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<cd1six=int6464#15
+# asm 2: mulpd SIX_SIX,<cd1six=%xmm14
+mulpd SIX_SIX,%xmm14
+
+# qhasm: t1 = ab1
+# asm 1: movdqa <ab1=int6464#2,>t1=int6464#16
+# asm 2: movdqa <ab1=%xmm1,>t1=%xmm15
+movdqa %xmm1,%xmm15
+
+# qhasm: float6464 t1 *= *(int128 *)(b2a2p + 0)
+# asm 1: mulpd 0(<b2a2p=int64#3),<t1=int6464#16
+# asm 2: mulpd 0(<b2a2p=%rdx),<t1=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r1 += t1
+# asm 1: addpd <t1=int6464#16,<r1=int6464#4
+# asm 2: addpd <t1=%xmm15,<r1=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: d1 = cd1
+# asm 1: movdqa <cd1=int6464#3,>d1=int6464#16
+# asm 2: movdqa <cd1=%xmm2,>d1=%xmm15
+movdqa %xmm2,%xmm15
+
+# qhasm: float6464 d1 *= *(int128 *)(a2b2p + 0)
+# asm 1: mulpd 0(<a2b2p=int64#6),<d1=int6464#16
+# asm 2: mulpd 0(<a2b2p=%r9),<d1=%xmm15
+mulpd 0(%r9),%xmm15
+
+# qhasm: float6464 r1 += d1
+# asm 1: addpd <d1=int6464#16,<r1=int6464#4
+# asm 2: addpd <d1=%xmm15,<r1=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: t7 = ab1
+# asm 1: movdqa <ab1=int6464#2,>t7=int6464#2
+# asm 2: movdqa <ab1=%xmm1,>t7=%xmm1
+movdqa %xmm1,%xmm1
+
+# qhasm: float6464 t7 *= *(int128 *)(b2a2p + 96)
+# asm 1: mulpd 96(<b2a2p=int64#3),<t7=int6464#2
+# asm 2: mulpd 96(<b2a2p=%rdx),<t7=%xmm1
+mulpd 96(%rdx),%xmm1
+
+# qhasm: float6464 r7 += t7
+# asm 1: addpd <t7=int6464#2,<r7=int6464#10
+# asm 2: addpd <t7=%xmm1,<r7=%xmm9
+addpd %xmm1,%xmm9
+
+# qhasm: d7 = cd1
+# asm 1: movdqa <cd1=int6464#3,>d7=int6464#2
+# asm 2: movdqa <cd1=%xmm2,>d7=%xmm1
+movdqa %xmm2,%xmm1
+
+# qhasm: float6464 d7 *= *(int128 *)(a2b2p + 96)
+# asm 1: mulpd 96(<a2b2p=int64#6),<d7=int6464#2
+# asm 2: mulpd 96(<a2b2p=%r9),<d7=%xmm1
+mulpd 96(%r9),%xmm1
+
+# qhasm: float6464 r7 += d7
+# asm 1: addpd <d7=int6464#2,<r7=int6464#10
+# asm 2: addpd <d7=%xmm1,<r7=%xmm9
+addpd %xmm1,%xmm9
+
+# qhasm: t2 = ab1six
+# asm 1: movdqa <ab1six=int6464#14,>t2=int6464#2
+# asm 2: movdqa <ab1six=%xmm13,>t2=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm: float6464 t2 *= *(int128 *)(b2a2p + 16)
+# asm 1: mulpd 16(<b2a2p=int64#3),<t2=int6464#2
+# asm 2: mulpd 16(<b2a2p=%rdx),<t2=%xmm1
+mulpd 16(%rdx),%xmm1
+
+# qhasm: float6464 r2 += t2
+# asm 1: addpd <t2=int6464#2,<r2=int6464#5
+# asm 2: addpd <t2=%xmm1,<r2=%xmm4
+addpd %xmm1,%xmm4
+
+# qhasm: d2 = cd1six
+# asm 1: movdqa <cd1six=int6464#15,>d2=int6464#2
+# asm 2: movdqa <cd1six=%xmm14,>d2=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: float6464 d2 *= *(int128 *)(a2b2p + 16)
+# asm 1: mulpd 16(<a2b2p=int64#6),<d2=int6464#2
+# asm 2: mulpd 16(<a2b2p=%r9),<d2=%xmm1
+mulpd 16(%r9),%xmm1
+
+# qhasm: float6464 r2 += d2
+# asm 1: addpd <d2=int6464#2,<r2=int6464#5
+# asm 2: addpd <d2=%xmm1,<r2=%xmm4
+addpd %xmm1,%xmm4
+
+# qhasm: t3 = ab1six
+# asm 1: movdqa <ab1six=int6464#14,>t3=int6464#2
+# asm 2: movdqa <ab1six=%xmm13,>t3=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm: float6464 t3 *= *(int128 *)(b2a2p + 32)
+# asm 1: mulpd 32(<b2a2p=int64#3),<t3=int6464#2
+# asm 2: mulpd 32(<b2a2p=%rdx),<t3=%xmm1
+mulpd 32(%rdx),%xmm1
+
+# qhasm: float6464 r3 += t3
+# asm 1: addpd <t3=int6464#2,<r3=int6464#6
+# asm 2: addpd <t3=%xmm1,<r3=%xmm5
+addpd %xmm1,%xmm5
+
+# qhasm: d3 = cd1six
+# asm 1: movdqa <cd1six=int6464#15,>d3=int6464#2
+# asm 2: movdqa <cd1six=%xmm14,>d3=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: float6464 d3 *= *(int128 *)(a2b2p + 32)
+# asm 1: mulpd 32(<a2b2p=int64#6),<d3=int6464#2
+# asm 2: mulpd 32(<a2b2p=%r9),<d3=%xmm1
+mulpd 32(%r9),%xmm1
+
+# qhasm: float6464 r3 += d3
+# asm 1: addpd <d3=int6464#2,<r3=int6464#6
+# asm 2: addpd <d3=%xmm1,<r3=%xmm5
+addpd %xmm1,%xmm5
+
+# qhasm: t4 = ab1six
+# asm 1: movdqa <ab1six=int6464#14,>t4=int6464#2
+# asm 2: movdqa <ab1six=%xmm13,>t4=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm: float6464 t4 *= *(int128 *)(b2a2p + 48)
+# asm 1: mulpd 48(<b2a2p=int64#3),<t4=int6464#2
+# asm 2: mulpd 48(<b2a2p=%rdx),<t4=%xmm1
+mulpd 48(%rdx),%xmm1
+
+# qhasm: float6464 r4 += t4
+# asm 1: addpd <t4=int6464#2,<r4=int6464#7
+# asm 2: addpd <t4=%xmm1,<r4=%xmm6
+addpd %xmm1,%xmm6
+
+# qhasm: d4 = cd1six
+# asm 1: movdqa <cd1six=int6464#15,>d4=int6464#2
+# asm 2: movdqa <cd1six=%xmm14,>d4=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: float6464 d4 *= *(int128 *)(a2b2p + 48)
+# asm 1: mulpd 48(<a2b2p=int64#6),<d4=int6464#2
+# asm 2: mulpd 48(<a2b2p=%r9),<d4=%xmm1
+mulpd 48(%r9),%xmm1
+
+# qhasm: float6464 r4 += d4
+# asm 1: addpd <d4=int6464#2,<r4=int6464#7
+# asm 2: addpd <d4=%xmm1,<r4=%xmm6
+addpd %xmm1,%xmm6
+
+# qhasm: t5 = ab1six
+# asm 1: movdqa <ab1six=int6464#14,>t5=int6464#2
+# asm 2: movdqa <ab1six=%xmm13,>t5=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm: float6464 t5 *= *(int128 *)(b2a2p + 64)
+# asm 1: mulpd 64(<b2a2p=int64#3),<t5=int6464#2
+# asm 2: mulpd 64(<b2a2p=%rdx),<t5=%xmm1
+mulpd 64(%rdx),%xmm1
+
+# qhasm: float6464 r5 += t5
+# asm 1: addpd <t5=int6464#2,<r5=int6464#8
+# asm 2: addpd <t5=%xmm1,<r5=%xmm7
+addpd %xmm1,%xmm7
+
+# qhasm: d5 = cd1six
+# asm 1: movdqa <cd1six=int6464#15,>d5=int6464#2
+# asm 2: movdqa <cd1six=%xmm14,>d5=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: float6464 d5 *= *(int128 *)(a2b2p + 64)
+# asm 1: mulpd 64(<a2b2p=int64#6),<d5=int6464#2
+# asm 2: mulpd 64(<a2b2p=%r9),<d5=%xmm1
+mulpd 64(%r9),%xmm1
+
+# qhasm: float6464 r5 += d5
+# asm 1: addpd <d5=int6464#2,<r5=int6464#8
+# asm 2: addpd <d5=%xmm1,<r5=%xmm7
+addpd %xmm1,%xmm7
+
+# qhasm: t6 = ab1six
+# asm 1: movdqa <ab1six=int6464#14,>t6=int6464#2
+# asm 2: movdqa <ab1six=%xmm13,>t6=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm: float6464 t6 *= *(int128 *)(b2a2p + 80)
+# asm 1: mulpd 80(<b2a2p=int64#3),<t6=int6464#2
+# asm 2: mulpd 80(<b2a2p=%rdx),<t6=%xmm1
+mulpd 80(%rdx),%xmm1
+
+# qhasm: float6464 r6 += t6
+# asm 1: addpd <t6=int6464#2,<r6=int6464#9
+# asm 2: addpd <t6=%xmm1,<r6=%xmm8
+addpd %xmm1,%xmm8
+
+# qhasm: d6 = cd1six
+# asm 1: movdqa <cd1six=int6464#15,>d6=int6464#2
+# asm 2: movdqa <cd1six=%xmm14,>d6=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: float6464 d6 *= *(int128 *)(a2b2p + 80)
+# asm 1: mulpd 80(<a2b2p=int64#6),<d6=int6464#2
+# asm 2: mulpd 80(<a2b2p=%r9),<d6=%xmm1
+mulpd 80(%r9),%xmm1
+
+# qhasm: float6464 r6 += d6
+# asm 1: addpd <d6=int6464#2,<r6=int6464#9
+# asm 2: addpd <d6=%xmm1,<r6=%xmm8
+addpd %xmm1,%xmm8
+
+# qhasm: t8 = ab1six
+# asm 1: movdqa <ab1six=int6464#14,>t8=int6464#2
+# asm 2: movdqa <ab1six=%xmm13,>t8=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm: float6464 t8 *= *(int128 *)(b2a2p + 112)
+# asm 1: mulpd 112(<b2a2p=int64#3),<t8=int6464#2
+# asm 2: mulpd 112(<b2a2p=%rdx),<t8=%xmm1
+mulpd 112(%rdx),%xmm1
+
+# qhasm: float6464 r8 += t8
+# asm 1: addpd <t8=int6464#2,<r8=int6464#11
+# asm 2: addpd <t8=%xmm1,<r8=%xmm10
+addpd %xmm1,%xmm10
+
+# qhasm: d8 = cd1six
+# asm 1: movdqa <cd1six=int6464#15,>d8=int6464#2
+# asm 2: movdqa <cd1six=%xmm14,>d8=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: float6464 d8 *= *(int128 *)(a2b2p + 112)
+# asm 1: mulpd 112(<a2b2p=int64#6),<d8=int6464#2
+# asm 2: mulpd 112(<a2b2p=%r9),<d8=%xmm1
+mulpd 112(%r9),%xmm1
+
+# qhasm: float6464 r8 += d8
+# asm 1: addpd <d8=int6464#2,<r8=int6464#11
+# asm 2: addpd <d8=%xmm1,<r8=%xmm10
+addpd %xmm1,%xmm10
+
+# qhasm: t9 = ab1six
+# asm 1: movdqa <ab1six=int6464#14,>t9=int6464#2
+# asm 2: movdqa <ab1six=%xmm13,>t9=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm: float6464 t9 *= *(int128 *)(b2a2p + 128)
+# asm 1: mulpd 128(<b2a2p=int64#3),<t9=int6464#2
+# asm 2: mulpd 128(<b2a2p=%rdx),<t9=%xmm1
+mulpd 128(%rdx),%xmm1
+
+# qhasm: float6464 r9 += t9
+# asm 1: addpd <t9=int6464#2,<r9=int6464#12
+# asm 2: addpd <t9=%xmm1,<r9=%xmm11
+addpd %xmm1,%xmm11
+
+# qhasm: d9 = cd1six
+# asm 1: movdqa <cd1six=int6464#15,>d9=int6464#2
+# asm 2: movdqa <cd1six=%xmm14,>d9=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: float6464 d9 *= *(int128 *)(a2b2p + 128)
+# asm 1: mulpd 128(<a2b2p=int64#6),<d9=int6464#2
+# asm 2: mulpd 128(<a2b2p=%r9),<d9=%xmm1
+mulpd 128(%r9),%xmm1
+
+# qhasm: float6464 r9 += d9
+# asm 1: addpd <d9=int6464#2,<r9=int6464#12
+# asm 2: addpd <d9=%xmm1,<r9=%xmm11
+addpd %xmm1,%xmm11
+
+# qhasm: t10 = ab1six
+# asm 1: movdqa <ab1six=int6464#14,>t10=int6464#2
+# asm 2: movdqa <ab1six=%xmm13,>t10=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm: float6464 t10 *= *(int128 *)(b2a2p + 144)
+# asm 1: mulpd 144(<b2a2p=int64#3),<t10=int6464#2
+# asm 2: mulpd 144(<b2a2p=%rdx),<t10=%xmm1
+mulpd 144(%rdx),%xmm1
+
+# qhasm: float6464 r10 += t10
+# asm 1: addpd <t10=int6464#2,<r10=int6464#13
+# asm 2: addpd <t10=%xmm1,<r10=%xmm12
+addpd %xmm1,%xmm12
+
+# qhasm: d10 = cd1six
+# asm 1: movdqa <cd1six=int6464#15,>d10=int6464#2
+# asm 2: movdqa <cd1six=%xmm14,>d10=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: float6464 d10 *= *(int128 *)(a2b2p + 144)
+# asm 1: mulpd 144(<a2b2p=int64#6),<d10=int6464#2
+# asm 2: mulpd 144(<a2b2p=%r9),<d10=%xmm1
+mulpd 144(%r9),%xmm1
+
+# qhasm: float6464 r10 += d10
+# asm 1: addpd <d10=int6464#2,<r10=int6464#13
+# asm 2: addpd <d10=%xmm1,<r10=%xmm12
+addpd %xmm1,%xmm12
+
+# qhasm: t11 = ab1six
+# asm 1: movdqa <ab1six=int6464#14,>t11=int6464#2
+# asm 2: movdqa <ab1six=%xmm13,>t11=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm: float6464 t11 *= *(int128 *)(b2a2p + 160)
+# asm 1: mulpd 160(<b2a2p=int64#3),<t11=int6464#2
+# asm 2: mulpd 160(<b2a2p=%rdx),<t11=%xmm1
+mulpd 160(%rdx),%xmm1
+
+# qhasm: float6464 r11 += t11
+# asm 1: addpd <t11=int6464#2,<r11=int6464#1
+# asm 2: addpd <t11=%xmm1,<r11=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: d11 = cd1six
+# asm 1: movdqa <cd1six=int6464#15,>d11=int6464#2
+# asm 2: movdqa <cd1six=%xmm14,>d11=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: float6464 d11 *= *(int128 *)(a2b2p + 160)
+# asm 1: mulpd 160(<a2b2p=int64#6),<d11=int6464#2
+# asm 2: mulpd 160(<a2b2p=%r9),<d11=%xmm1
+mulpd 160(%r9),%xmm1
+
+# qhasm: float6464 r11 += d11
+# asm 1: addpd <d11=int6464#2,<r11=int6464#1
+# asm 2: addpd <d11=%xmm1,<r11=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: r12 = ab1six
+# asm 1: movdqa <ab1six=int6464#14,>r12=int6464#2
+# asm 2: movdqa <ab1six=%xmm13,>r12=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm: float6464 r12 *= *(int128 *)(b2a2p + 176)
+# asm 1: mulpd 176(<b2a2p=int64#3),<r12=int6464#2
+# asm 2: mulpd 176(<b2a2p=%rdx),<r12=%xmm1
+mulpd 176(%rdx),%xmm1
+
+# qhasm: d12 = cd1six
+# asm 1: movdqa <cd1six=int6464#15,>d12=int6464#3
+# asm 2: movdqa <cd1six=%xmm14,>d12=%xmm2
+movdqa %xmm14,%xmm2
+
+# qhasm: float6464 d12 *= *(int128 *)(a2b2p + 176)
+# asm 1: mulpd 176(<a2b2p=int64#6),<d12=int6464#3
+# asm 2: mulpd 176(<a2b2p=%r9),<d12=%xmm2
+mulpd 176(%r9),%xmm2
+
+# qhasm: float6464 r12 += d12
+# asm 1: addpd <d12=int6464#3,<r12=int6464#2
+# asm 2: addpd <d12=%xmm2,<r12=%xmm1
+addpd %xmm2,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 16) = r1
+# asm 1: movdqa <r1=int6464#4,16(<b1b1p=int64#4)
+# asm 2: movdqa <r1=%xmm3,16(<b1b1p=%rcx)
+movdqa %xmm3,16(%rcx)
+
+# qhasm: ab2 = *(int128 *)(b1b1p + 32)
+# asm 1: movdqa 32(<b1b1p=int64#4),>ab2=int6464#3
+# asm 2: movdqa 32(<b1b1p=%rcx),>ab2=%xmm2
+movdqa 32(%rcx),%xmm2
+
+# qhasm: cd2 = *(int128 *)(ma1a1p + 32)
+# asm 1: movdqa 32(<ma1a1p=int64#5),>cd2=int6464#4
+# asm 2: movdqa 32(<ma1a1p=%r8),>cd2=%xmm3
+movdqa 32(%r8),%xmm3
+
+# qhasm: ab2six = ab2
+# asm 1: movdqa <ab2=int6464#3,>ab2six=int6464#14
+# asm 2: movdqa <ab2=%xmm2,>ab2six=%xmm13
+movdqa %xmm2,%xmm13
+
+# qhasm: cd2six = cd2
+# asm 1: movdqa <cd2=int6464#4,>cd2six=int6464#15
+# asm 2: movdqa <cd2=%xmm3,>cd2six=%xmm14
+movdqa %xmm3,%xmm14
+
+# qhasm: float6464 ab2six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab2six=int6464#14
+# asm 2: mulpd SIX_SIX,<ab2six=%xmm13
+mulpd SIX_SIX,%xmm13
+
+# qhasm: float6464 cd2six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<cd2six=int6464#15
+# asm 2: mulpd SIX_SIX,<cd2six=%xmm14
+mulpd SIX_SIX,%xmm14
+
+# qhasm: t2 = ab2
+# asm 1: movdqa <ab2=int6464#3,>t2=int6464#16
+# asm 2: movdqa <ab2=%xmm2,>t2=%xmm15
+movdqa %xmm2,%xmm15
+
+# qhasm: float6464 t2 *= *(int128 *)(b2a2p + 0)
+# asm 1: mulpd 0(<b2a2p=int64#3),<t2=int6464#16
+# asm 2: mulpd 0(<b2a2p=%rdx),<t2=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r2 += t2
+# asm 1: addpd <t2=int6464#16,<r2=int6464#5
+# asm 2: addpd <t2=%xmm15,<r2=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: d2 = cd2
+# asm 1: movdqa <cd2=int6464#4,>d2=int6464#16
+# asm 2: movdqa <cd2=%xmm3,>d2=%xmm15
+movdqa %xmm3,%xmm15
+
+# qhasm: float6464 d2 *= *(int128 *)(a2b2p + 0)
+# asm 1: mulpd 0(<a2b2p=int64#6),<d2=int6464#16
+# asm 2: mulpd 0(<a2b2p=%r9),<d2=%xmm15
+mulpd 0(%r9),%xmm15
+
+# qhasm: float6464 r2 += d2
+# asm 1: addpd <d2=int6464#16,<r2=int6464#5
+# asm 2: addpd <d2=%xmm15,<r2=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: t7 = ab2
+# asm 1: movdqa <ab2=int6464#3,>t7=int6464#16
+# asm 2: movdqa <ab2=%xmm2,>t7=%xmm15
+movdqa %xmm2,%xmm15
+
+# qhasm: float6464 t7 *= *(int128 *)(b2a2p + 80)
+# asm 1: mulpd 80(<b2a2p=int64#3),<t7=int6464#16
+# asm 2: mulpd 80(<b2a2p=%rdx),<t7=%xmm15
+mulpd 80(%rdx),%xmm15
+
+# qhasm: float6464 r7 += t7
+# asm 1: addpd <t7=int6464#16,<r7=int6464#10
+# asm 2: addpd <t7=%xmm15,<r7=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: d7 = cd2
+# asm 1: movdqa <cd2=int6464#4,>d7=int6464#16
+# asm 2: movdqa <cd2=%xmm3,>d7=%xmm15
+movdqa %xmm3,%xmm15
+
+# qhasm: float6464 d7 *= *(int128 *)(a2b2p + 80)
+# asm 1: mulpd 80(<a2b2p=int64#6),<d7=int6464#16
+# asm 2: mulpd 80(<a2b2p=%r9),<d7=%xmm15
+mulpd 80(%r9),%xmm15
+
+# qhasm: float6464 r7 += d7
+# asm 1: addpd <d7=int6464#16,<r7=int6464#10
+# asm 2: addpd <d7=%xmm15,<r7=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: t8 = ab2
+# asm 1: movdqa <ab2=int6464#3,>t8=int6464#16
+# asm 2: movdqa <ab2=%xmm2,>t8=%xmm15
+movdqa %xmm2,%xmm15
+
+# qhasm: float6464 t8 *= *(int128 *)(b2a2p + 96)
+# asm 1: mulpd 96(<b2a2p=int64#3),<t8=int6464#16
+# asm 2: mulpd 96(<b2a2p=%rdx),<t8=%xmm15
+mulpd 96(%rdx),%xmm15
+
+# qhasm: float6464 r8 += t8
+# asm 1: addpd <t8=int6464#16,<r8=int6464#11
+# asm 2: addpd <t8=%xmm15,<r8=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: d8 = cd2
+# asm 1: movdqa <cd2=int6464#4,>d8=int6464#16
+# asm 2: movdqa <cd2=%xmm3,>d8=%xmm15
+movdqa %xmm3,%xmm15
+
+# qhasm: float6464 d8 *= *(int128 *)(a2b2p + 96)
+# asm 1: mulpd 96(<a2b2p=int64#6),<d8=int6464#16
+# asm 2: mulpd 96(<a2b2p=%r9),<d8=%xmm15
+mulpd 96(%r9),%xmm15
+
+# qhasm: float6464 r8 += d8
+# asm 1: addpd <d8=int6464#16,<r8=int6464#11
+# asm 2: addpd <d8=%xmm15,<r8=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: r13 = ab2
+# asm 1: movdqa <ab2=int6464#3,>r13=int6464#3
+# asm 2: movdqa <ab2=%xmm2,>r13=%xmm2
+movdqa %xmm2,%xmm2
+
+# qhasm: float6464 r13 *= *(int128 *)(b2a2p + 176)
+# asm 1: mulpd 176(<b2a2p=int64#3),<r13=int6464#3
+# asm 2: mulpd 176(<b2a2p=%rdx),<r13=%xmm2
+mulpd 176(%rdx),%xmm2
+
+# qhasm: d13 = cd2
+# asm 1: movdqa <cd2=int6464#4,>d13=int6464#4
+# asm 2: movdqa <cd2=%xmm3,>d13=%xmm3
+movdqa %xmm3,%xmm3
+
+# qhasm: float6464 d13 *= *(int128 *)(a2b2p + 176)
+# asm 1: mulpd 176(<a2b2p=int64#6),<d13=int6464#4
+# asm 2: mulpd 176(<a2b2p=%r9),<d13=%xmm3
+mulpd 176(%r9),%xmm3
+
+# qhasm: float6464 r13 += d13
+# asm 1: addpd <d13=int6464#4,<r13=int6464#3
+# asm 2: addpd <d13=%xmm3,<r13=%xmm2
+addpd %xmm3,%xmm2
+
+# qhasm: t3 = ab2six
+# asm 1: movdqa <ab2six=int6464#14,>t3=int6464#4
+# asm 2: movdqa <ab2six=%xmm13,>t3=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm: float6464 t3 *= *(int128 *)(b2a2p + 16)
+# asm 1: mulpd 16(<b2a2p=int64#3),<t3=int6464#4
+# asm 2: mulpd 16(<b2a2p=%rdx),<t3=%xmm3
+mulpd 16(%rdx),%xmm3
+
+# qhasm: float6464 r3 += t3
+# asm 1: addpd <t3=int6464#4,<r3=int6464#6
+# asm 2: addpd <t3=%xmm3,<r3=%xmm5
+addpd %xmm3,%xmm5
+
+# qhasm: d3 = cd2six
+# asm 1: movdqa <cd2six=int6464#15,>d3=int6464#4
+# asm 2: movdqa <cd2six=%xmm14,>d3=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 d3 *= *(int128 *)(a2b2p + 16)
+# asm 1: mulpd 16(<a2b2p=int64#6),<d3=int6464#4
+# asm 2: mulpd 16(<a2b2p=%r9),<d3=%xmm3
+mulpd 16(%r9),%xmm3
+
+# qhasm: float6464 r3 += d3
+# asm 1: addpd <d3=int6464#4,<r3=int6464#6
+# asm 2: addpd <d3=%xmm3,<r3=%xmm5
+addpd %xmm3,%xmm5
+
+# qhasm: t4 = ab2six
+# asm 1: movdqa <ab2six=int6464#14,>t4=int6464#4
+# asm 2: movdqa <ab2six=%xmm13,>t4=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm: float6464 t4 *= *(int128 *)(b2a2p + 32)
+# asm 1: mulpd 32(<b2a2p=int64#3),<t4=int6464#4
+# asm 2: mulpd 32(<b2a2p=%rdx),<t4=%xmm3
+mulpd 32(%rdx),%xmm3
+
+# qhasm: float6464 r4 += t4
+# asm 1: addpd <t4=int6464#4,<r4=int6464#7
+# asm 2: addpd <t4=%xmm3,<r4=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: d4 = cd2six
+# asm 1: movdqa <cd2six=int6464#15,>d4=int6464#4
+# asm 2: movdqa <cd2six=%xmm14,>d4=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 d4 *= *(int128 *)(a2b2p + 32)
+# asm 1: mulpd 32(<a2b2p=int64#6),<d4=int6464#4
+# asm 2: mulpd 32(<a2b2p=%r9),<d4=%xmm3
+mulpd 32(%r9),%xmm3
+
+# qhasm: float6464 r4 += d4
+# asm 1: addpd <d4=int6464#4,<r4=int6464#7
+# asm 2: addpd <d4=%xmm3,<r4=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: t5 = ab2six
+# asm 1: movdqa <ab2six=int6464#14,>t5=int6464#4
+# asm 2: movdqa <ab2six=%xmm13,>t5=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm: float6464 t5 *= *(int128 *)(b2a2p + 48)
+# asm 1: mulpd 48(<b2a2p=int64#3),<t5=int6464#4
+# asm 2: mulpd 48(<b2a2p=%rdx),<t5=%xmm3
+mulpd 48(%rdx),%xmm3
+
+# qhasm: float6464 r5 += t5
+# asm 1: addpd <t5=int6464#4,<r5=int6464#8
+# asm 2: addpd <t5=%xmm3,<r5=%xmm7
+addpd %xmm3,%xmm7
+
+# qhasm: d5 = cd2six
+# asm 1: movdqa <cd2six=int6464#15,>d5=int6464#4
+# asm 2: movdqa <cd2six=%xmm14,>d5=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 d5 *= *(int128 *)(a2b2p + 48)
+# asm 1: mulpd 48(<a2b2p=int64#6),<d5=int6464#4
+# asm 2: mulpd 48(<a2b2p=%r9),<d5=%xmm3
+mulpd 48(%r9),%xmm3
+
+# qhasm: float6464 r5 += d5
+# asm 1: addpd <d5=int6464#4,<r5=int6464#8
+# asm 2: addpd <d5=%xmm3,<r5=%xmm7
+addpd %xmm3,%xmm7
+
+# qhasm: t6 = ab2six
+# asm 1: movdqa <ab2six=int6464#14,>t6=int6464#4
+# asm 2: movdqa <ab2six=%xmm13,>t6=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm: float6464 t6 *= *(int128 *)(b2a2p + 64)
+# asm 1: mulpd 64(<b2a2p=int64#3),<t6=int6464#4
+# asm 2: mulpd 64(<b2a2p=%rdx),<t6=%xmm3
+mulpd 64(%rdx),%xmm3
+
+# qhasm: float6464 r6 += t6
+# asm 1: addpd <t6=int6464#4,<r6=int6464#9
+# asm 2: addpd <t6=%xmm3,<r6=%xmm8
+addpd %xmm3,%xmm8
+
+# qhasm: d6 = cd2six
+# asm 1: movdqa <cd2six=int6464#15,>d6=int6464#4
+# asm 2: movdqa <cd2six=%xmm14,>d6=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 d6 *= *(int128 *)(a2b2p + 64)
+# asm 1: mulpd 64(<a2b2p=int64#6),<d6=int6464#4
+# asm 2: mulpd 64(<a2b2p=%r9),<d6=%xmm3
+mulpd 64(%r9),%xmm3
+
+# qhasm: float6464 r6 += d6
+# asm 1: addpd <d6=int6464#4,<r6=int6464#9
+# asm 2: addpd <d6=%xmm3,<r6=%xmm8
+addpd %xmm3,%xmm8
+
+# qhasm: t9 = ab2six
+# asm 1: movdqa <ab2six=int6464#14,>t9=int6464#4
+# asm 2: movdqa <ab2six=%xmm13,>t9=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm: float6464 t9 *= *(int128 *)(b2a2p + 112)
+# asm 1: mulpd 112(<b2a2p=int64#3),<t9=int6464#4
+# asm 2: mulpd 112(<b2a2p=%rdx),<t9=%xmm3
+mulpd 112(%rdx),%xmm3
+
+# qhasm: float6464 r9 += t9
+# asm 1: addpd <t9=int6464#4,<r9=int6464#12
+# asm 2: addpd <t9=%xmm3,<r9=%xmm11
+addpd %xmm3,%xmm11
+
+# qhasm: d9 = cd2six
+# asm 1: movdqa <cd2six=int6464#15,>d9=int6464#4
+# asm 2: movdqa <cd2six=%xmm14,>d9=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 d9 *= *(int128 *)(a2b2p + 112)
+# asm 1: mulpd 112(<a2b2p=int64#6),<d9=int6464#4
+# asm 2: mulpd 112(<a2b2p=%r9),<d9=%xmm3
+mulpd 112(%r9),%xmm3
+
+# qhasm: float6464 r9 += d9
+# asm 1: addpd <d9=int6464#4,<r9=int6464#12
+# asm 2: addpd <d9=%xmm3,<r9=%xmm11
+addpd %xmm3,%xmm11
+
+# qhasm: t10 = ab2six
+# asm 1: movdqa <ab2six=int6464#14,>t10=int6464#4
+# asm 2: movdqa <ab2six=%xmm13,>t10=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm: float6464 t10 *= *(int128 *)(b2a2p + 128)
+# asm 1: mulpd 128(<b2a2p=int64#3),<t10=int6464#4
+# asm 2: mulpd 128(<b2a2p=%rdx),<t10=%xmm3
+mulpd 128(%rdx),%xmm3
+
+# qhasm: float6464 r10 += t10
+# asm 1: addpd <t10=int6464#4,<r10=int6464#13
+# asm 2: addpd <t10=%xmm3,<r10=%xmm12
+addpd %xmm3,%xmm12
+
+# qhasm: d10 = cd2six
+# asm 1: movdqa <cd2six=int6464#15,>d10=int6464#4
+# asm 2: movdqa <cd2six=%xmm14,>d10=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 d10 *= *(int128 *)(a2b2p + 128)
+# asm 1: mulpd 128(<a2b2p=int64#6),<d10=int6464#4
+# asm 2: mulpd 128(<a2b2p=%r9),<d10=%xmm3
+mulpd 128(%r9),%xmm3
+
+# qhasm: float6464 r10 += d10
+# asm 1: addpd <d10=int6464#4,<r10=int6464#13
+# asm 2: addpd <d10=%xmm3,<r10=%xmm12
+addpd %xmm3,%xmm12
+
+# qhasm: t11 = ab2six
+# asm 1: movdqa <ab2six=int6464#14,>t11=int6464#4
+# asm 2: movdqa <ab2six=%xmm13,>t11=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm: float6464 t11 *= *(int128 *)(b2a2p + 144)
+# asm 1: mulpd 144(<b2a2p=int64#3),<t11=int6464#4
+# asm 2: mulpd 144(<b2a2p=%rdx),<t11=%xmm3
+mulpd 144(%rdx),%xmm3
+
+# qhasm: float6464 r11 += t11
+# asm 1: addpd <t11=int6464#4,<r11=int6464#1
+# asm 2: addpd <t11=%xmm3,<r11=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: d11 = cd2six
+# asm 1: movdqa <cd2six=int6464#15,>d11=int6464#4
+# asm 2: movdqa <cd2six=%xmm14,>d11=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 d11 *= *(int128 *)(a2b2p + 144)
+# asm 1: mulpd 144(<a2b2p=int64#6),<d11=int6464#4
+# asm 2: mulpd 144(<a2b2p=%r9),<d11=%xmm3
+mulpd 144(%r9),%xmm3
+
+# qhasm: float6464 r11 += d11
+# asm 1: addpd <d11=int6464#4,<r11=int6464#1
+# asm 2: addpd <d11=%xmm3,<r11=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: t12 = ab2six
+# asm 1: movdqa <ab2six=int6464#14,>t12=int6464#4
+# asm 2: movdqa <ab2six=%xmm13,>t12=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm: float6464 t12 *= *(int128 *)(b2a2p + 160)
+# asm 1: mulpd 160(<b2a2p=int64#3),<t12=int6464#4
+# asm 2: mulpd 160(<b2a2p=%rdx),<t12=%xmm3
+mulpd 160(%rdx),%xmm3
+
+# qhasm: float6464 r12 += t12
+# asm 1: addpd <t12=int6464#4,<r12=int6464#2
+# asm 2: addpd <t12=%xmm3,<r12=%xmm1
+addpd %xmm3,%xmm1
+
+# qhasm: d12 = cd2six
+# asm 1: movdqa <cd2six=int6464#15,>d12=int6464#4
+# asm 2: movdqa <cd2six=%xmm14,>d12=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 d12 *= *(int128 *)(a2b2p + 160)
+# asm 1: mulpd 160(<a2b2p=int64#6),<d12=int6464#4
+# asm 2: mulpd 160(<a2b2p=%r9),<d12=%xmm3
+mulpd 160(%r9),%xmm3
+
+# qhasm: float6464 r12 += d12
+# asm 1: addpd <d12=int6464#4,<r12=int6464#2
+# asm 2: addpd <d12=%xmm3,<r12=%xmm1
+addpd %xmm3,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 32) = r2
+# asm 1: movdqa <r2=int6464#5,32(<b1b1p=int64#4)
+# asm 2: movdqa <r2=%xmm4,32(<b1b1p=%rcx)
+movdqa %xmm4,32(%rcx)
+
+# qhasm: ab3 = *(int128 *)(b1b1p + 48)
+# asm 1: movdqa 48(<b1b1p=int64#4),>ab3=int6464#4
+# asm 2: movdqa 48(<b1b1p=%rcx),>ab3=%xmm3
+movdqa 48(%rcx),%xmm3
+
+# qhasm: cd3 = *(int128 *)(ma1a1p + 48)
+# asm 1: movdqa 48(<ma1a1p=int64#5),>cd3=int6464#5
+# asm 2: movdqa 48(<ma1a1p=%r8),>cd3=%xmm4
+movdqa 48(%r8),%xmm4
+
+# qhasm: ab3six = ab3
+# asm 1: movdqa <ab3=int6464#4,>ab3six=int6464#14
+# asm 2: movdqa <ab3=%xmm3,>ab3six=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: cd3six = cd3
+# asm 1: movdqa <cd3=int6464#5,>cd3six=int6464#15
+# asm 2: movdqa <cd3=%xmm4,>cd3six=%xmm14
+movdqa %xmm4,%xmm14
+
+# qhasm: float6464 ab3six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab3six=int6464#14
+# asm 2: mulpd SIX_SIX,<ab3six=%xmm13
+mulpd SIX_SIX,%xmm13
+
+# qhasm: float6464 cd3six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<cd3six=int6464#15
+# asm 2: mulpd SIX_SIX,<cd3six=%xmm14
+mulpd SIX_SIX,%xmm14
+
+# qhasm: t3 = ab3
+# asm 1: movdqa <ab3=int6464#4,>t3=int6464#16
+# asm 2: movdqa <ab3=%xmm3,>t3=%xmm15
+movdqa %xmm3,%xmm15
+
+# qhasm: float6464 t3 *= *(int128 *)(b2a2p + 0)
+# asm 1: mulpd 0(<b2a2p=int64#3),<t3=int6464#16
+# asm 2: mulpd 0(<b2a2p=%rdx),<t3=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r3 += t3
+# asm 1: addpd <t3=int6464#16,<r3=int6464#6
+# asm 2: addpd <t3=%xmm15,<r3=%xmm5
+addpd %xmm15,%xmm5
+
+# qhasm: d3 = cd3
+# asm 1: movdqa <cd3=int6464#5,>d3=int6464#16
+# asm 2: movdqa <cd3=%xmm4,>d3=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 d3 *= *(int128 *)(a2b2p + 0)
+# asm 1: mulpd 0(<a2b2p=int64#6),<d3=int6464#16
+# asm 2: mulpd 0(<a2b2p=%r9),<d3=%xmm15
+mulpd 0(%r9),%xmm15
+
+# qhasm: float6464 r3 += d3
+# asm 1: addpd <d3=int6464#16,<r3=int6464#6
+# asm 2: addpd <d3=%xmm15,<r3=%xmm5
+addpd %xmm15,%xmm5
+
+# qhasm: t7 = ab3
+# asm 1: movdqa <ab3=int6464#4,>t7=int6464#16
+# asm 2: movdqa <ab3=%xmm3,>t7=%xmm15
+movdqa %xmm3,%xmm15
+
+# qhasm: float6464 t7 *= *(int128 *)(b2a2p + 64)
+# asm 1: mulpd 64(<b2a2p=int64#3),<t7=int6464#16
+# asm 2: mulpd 64(<b2a2p=%rdx),<t7=%xmm15
+mulpd 64(%rdx),%xmm15
+
+# qhasm: float6464 r7 += t7
+# asm 1: addpd <t7=int6464#16,<r7=int6464#10
+# asm 2: addpd <t7=%xmm15,<r7=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: d7 = cd3
+# asm 1: movdqa <cd3=int6464#5,>d7=int6464#16
+# asm 2: movdqa <cd3=%xmm4,>d7=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 d7 *= *(int128 *)(a2b2p + 64)
+# asm 1: mulpd 64(<a2b2p=int64#6),<d7=int6464#16
+# asm 2: mulpd 64(<a2b2p=%r9),<d7=%xmm15
+mulpd 64(%r9),%xmm15
+
+# qhasm: float6464 r7 += d7
+# asm 1: addpd <d7=int6464#16,<r7=int6464#10
+# asm 2: addpd <d7=%xmm15,<r7=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: t8 = ab3
+# asm 1: movdqa <ab3=int6464#4,>t8=int6464#16
+# asm 2: movdqa <ab3=%xmm3,>t8=%xmm15
+movdqa %xmm3,%xmm15
+
+# qhasm: float6464 t8 *= *(int128 *)(b2a2p + 80)
+# asm 1: mulpd 80(<b2a2p=int64#3),<t8=int6464#16
+# asm 2: mulpd 80(<b2a2p=%rdx),<t8=%xmm15
+mulpd 80(%rdx),%xmm15
+
+# qhasm: float6464 r8 += t8
+# asm 1: addpd <t8=int6464#16,<r8=int6464#11
+# asm 2: addpd <t8=%xmm15,<r8=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: d8 = cd3
+# asm 1: movdqa <cd3=int6464#5,>d8=int6464#16
+# asm 2: movdqa <cd3=%xmm4,>d8=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 d8 *= *(int128 *)(a2b2p + 80)
+# asm 1: mulpd 80(<a2b2p=int64#6),<d8=int6464#16
+# asm 2: mulpd 80(<a2b2p=%r9),<d8=%xmm15
+mulpd 80(%r9),%xmm15
+
+# qhasm: float6464 r8 += d8
+# asm 1: addpd <d8=int6464#16,<r8=int6464#11
+# asm 2: addpd <d8=%xmm15,<r8=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: t9 = ab3
+# asm 1: movdqa <ab3=int6464#4,>t9=int6464#16
+# asm 2: movdqa <ab3=%xmm3,>t9=%xmm15
+movdqa %xmm3,%xmm15
+
+# qhasm: float6464 t9 *= *(int128 *)(b2a2p + 96)
+# asm 1: mulpd 96(<b2a2p=int64#3),<t9=int6464#16
+# asm 2: mulpd 96(<b2a2p=%rdx),<t9=%xmm15
+mulpd 96(%rdx),%xmm15
+
+# qhasm: float6464 r9 += t9
+# asm 1: addpd <t9=int6464#16,<r9=int6464#12
+# asm 2: addpd <t9=%xmm15,<r9=%xmm11
+addpd %xmm15,%xmm11
+
+# qhasm: d9 = cd3
+# asm 1: movdqa <cd3=int6464#5,>d9=int6464#16
+# asm 2: movdqa <cd3=%xmm4,>d9=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 d9 *= *(int128 *)(a2b2p + 96)
+# asm 1: mulpd 96(<a2b2p=int64#6),<d9=int6464#16
+# asm 2: mulpd 96(<a2b2p=%r9),<d9=%xmm15
+mulpd 96(%r9),%xmm15
+
+# qhasm: float6464 r9 += d9
+# asm 1: addpd <d9=int6464#16,<r9=int6464#12
+# asm 2: addpd <d9=%xmm15,<r9=%xmm11
+addpd %xmm15,%xmm11
+
+# qhasm: t13 = ab3
+# asm 1: movdqa <ab3=int6464#4,>t13=int6464#16
+# asm 2: movdqa <ab3=%xmm3,>t13=%xmm15
+movdqa %xmm3,%xmm15
+
+# qhasm: float6464 t13 *= *(int128 *)(b2a2p + 160)
+# asm 1: mulpd 160(<b2a2p=int64#3),<t13=int6464#16
+# asm 2: mulpd 160(<b2a2p=%rdx),<t13=%xmm15
+mulpd 160(%rdx),%xmm15
+
+# qhasm: float6464 r13 += t13
+# asm 1: addpd <t13=int6464#16,<r13=int6464#3
+# asm 2: addpd <t13=%xmm15,<r13=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: d13 = cd3
+# asm 1: movdqa <cd3=int6464#5,>d13=int6464#16
+# asm 2: movdqa <cd3=%xmm4,>d13=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 d13 *= *(int128 *)(a2b2p + 160)
+# asm 1: mulpd 160(<a2b2p=int64#6),<d13=int6464#16
+# asm 2: mulpd 160(<a2b2p=%r9),<d13=%xmm15
+mulpd 160(%r9),%xmm15
+
+# qhasm: float6464 r13 += d13
+# asm 1: addpd <d13=int6464#16,<r13=int6464#3
+# asm 2: addpd <d13=%xmm15,<r13=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: r14 = ab3
+# asm 1: movdqa <ab3=int6464#4,>r14=int6464#4
+# asm 2: movdqa <ab3=%xmm3,>r14=%xmm3
+movdqa %xmm3,%xmm3
+
+# qhasm: float6464 r14 *= *(int128 *)(b2a2p + 176)
+# asm 1: mulpd 176(<b2a2p=int64#3),<r14=int6464#4
+# asm 2: mulpd 176(<b2a2p=%rdx),<r14=%xmm3
+mulpd 176(%rdx),%xmm3
+
+# qhasm: d14 = cd3
+# asm 1: movdqa <cd3=int6464#5,>d14=int6464#5
+# asm 2: movdqa <cd3=%xmm4,>d14=%xmm4
+movdqa %xmm4,%xmm4
+
+# qhasm: float6464 d14 *= *(int128 *)(a2b2p + 176)
+# asm 1: mulpd 176(<a2b2p=int64#6),<d14=int6464#5
+# asm 2: mulpd 176(<a2b2p=%r9),<d14=%xmm4
+mulpd 176(%r9),%xmm4
+
+# qhasm: float6464 r14 += d14
+# asm 1: addpd <d14=int6464#5,<r14=int6464#4
+# asm 2: addpd <d14=%xmm4,<r14=%xmm3
+addpd %xmm4,%xmm3
+
+# qhasm: t4 = ab3six
+# asm 1: movdqa <ab3six=int6464#14,>t4=int6464#5
+# asm 2: movdqa <ab3six=%xmm13,>t4=%xmm4
+movdqa %xmm13,%xmm4
+
+# qhasm: float6464 t4 *= *(int128 *)(b2a2p + 16)
+# asm 1: mulpd 16(<b2a2p=int64#3),<t4=int6464#5
+# asm 2: mulpd 16(<b2a2p=%rdx),<t4=%xmm4
+mulpd 16(%rdx),%xmm4
+
+# qhasm: float6464 r4 += t4
+# asm 1: addpd <t4=int6464#5,<r4=int6464#7
+# asm 2: addpd <t4=%xmm4,<r4=%xmm6
+addpd %xmm4,%xmm6
+
+# qhasm: d4 = cd3six
+# asm 1: movdqa <cd3six=int6464#15,>d4=int6464#5
+# asm 2: movdqa <cd3six=%xmm14,>d4=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm: float6464 d4 *= *(int128 *)(a2b2p + 16)
+# asm 1: mulpd 16(<a2b2p=int64#6),<d4=int6464#5
+# asm 2: mulpd 16(<a2b2p=%r9),<d4=%xmm4
+mulpd 16(%r9),%xmm4
+
+# qhasm: float6464 r4 += d4
+# asm 1: addpd <d4=int6464#5,<r4=int6464#7
+# asm 2: addpd <d4=%xmm4,<r4=%xmm6
+addpd %xmm4,%xmm6
+
+# qhasm: t5 = ab3six
+# asm 1: movdqa <ab3six=int6464#14,>t5=int6464#5
+# asm 2: movdqa <ab3six=%xmm13,>t5=%xmm4
+movdqa %xmm13,%xmm4
+
+# qhasm: float6464 t5 *= *(int128 *)(b2a2p + 32)
+# asm 1: mulpd 32(<b2a2p=int64#3),<t5=int6464#5
+# asm 2: mulpd 32(<b2a2p=%rdx),<t5=%xmm4
+mulpd 32(%rdx),%xmm4
+
+# qhasm: float6464 r5 += t5
+# asm 1: addpd <t5=int6464#5,<r5=int6464#8
+# asm 2: addpd <t5=%xmm4,<r5=%xmm7
+addpd %xmm4,%xmm7
+
+# qhasm: d5 = cd3six
+# asm 1: movdqa <cd3six=int6464#15,>d5=int6464#5
+# asm 2: movdqa <cd3six=%xmm14,>d5=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm: float6464 d5 *= *(int128 *)(a2b2p + 32)
+# asm 1: mulpd 32(<a2b2p=int64#6),<d5=int6464#5
+# asm 2: mulpd 32(<a2b2p=%r9),<d5=%xmm4
+mulpd 32(%r9),%xmm4
+
+# qhasm: float6464 r5 += d5
+# asm 1: addpd <d5=int6464#5,<r5=int6464#8
+# asm 2: addpd <d5=%xmm4,<r5=%xmm7
+addpd %xmm4,%xmm7
+
+# qhasm: t6 = ab3six
+# asm 1: movdqa <ab3six=int6464#14,>t6=int6464#5
+# asm 2: movdqa <ab3six=%xmm13,>t6=%xmm4
+movdqa %xmm13,%xmm4
+
+# qhasm: float6464 t6 *= *(int128 *)(b2a2p + 48)
+# asm 1: mulpd 48(<b2a2p=int64#3),<t6=int6464#5
+# asm 2: mulpd 48(<b2a2p=%rdx),<t6=%xmm4
+mulpd 48(%rdx),%xmm4
+
+# qhasm: float6464 r6 += t6
+# asm 1: addpd <t6=int6464#5,<r6=int6464#9
+# asm 2: addpd <t6=%xmm4,<r6=%xmm8
+addpd %xmm4,%xmm8
+
+# qhasm: d6 = cd3six
+# asm 1: movdqa <cd3six=int6464#15,>d6=int6464#5
+# asm 2: movdqa <cd3six=%xmm14,>d6=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm: float6464 d6 *= *(int128 *)(a2b2p + 48)
+# asm 1: mulpd 48(<a2b2p=int64#6),<d6=int6464#5
+# asm 2: mulpd 48(<a2b2p=%r9),<d6=%xmm4
+mulpd 48(%r9),%xmm4
+
+# qhasm: float6464 r6 += d6
+# asm 1: addpd <d6=int6464#5,<r6=int6464#9
+# asm 2: addpd <d6=%xmm4,<r6=%xmm8
+addpd %xmm4,%xmm8
+
+# qhasm: t10 = ab3six
+# asm 1: movdqa <ab3six=int6464#14,>t10=int6464#5
+# asm 2: movdqa <ab3six=%xmm13,>t10=%xmm4
+movdqa %xmm13,%xmm4
+
+# qhasm: float6464 t10 *= *(int128 *)(b2a2p + 112)
+# asm 1: mulpd 112(<b2a2p=int64#3),<t10=int6464#5
+# asm 2: mulpd 112(<b2a2p=%rdx),<t10=%xmm4
+mulpd 112(%rdx),%xmm4
+
+# qhasm: float6464 r10 += t10
+# asm 1: addpd <t10=int6464#5,<r10=int6464#13
+# asm 2: addpd <t10=%xmm4,<r10=%xmm12
+addpd %xmm4,%xmm12
+
+# qhasm: d10 = cd3six
+# asm 1: movdqa <cd3six=int6464#15,>d10=int6464#5
+# asm 2: movdqa <cd3six=%xmm14,>d10=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm: float6464 d10 *= *(int128 *)(a2b2p + 112)
+# asm 1: mulpd 112(<a2b2p=int64#6),<d10=int6464#5
+# asm 2: mulpd 112(<a2b2p=%r9),<d10=%xmm4
+mulpd 112(%r9),%xmm4
+
+# qhasm: float6464 r10 += d10
+# asm 1: addpd <d10=int6464#5,<r10=int6464#13
+# asm 2: addpd <d10=%xmm4,<r10=%xmm12
+addpd %xmm4,%xmm12
+
+# qhasm: t11 = ab3six
+# asm 1: movdqa <ab3six=int6464#14,>t11=int6464#5
+# asm 2: movdqa <ab3six=%xmm13,>t11=%xmm4
+movdqa %xmm13,%xmm4
+
+# qhasm: float6464 t11 *= *(int128 *)(b2a2p + 128)
+# asm 1: mulpd 128(<b2a2p=int64#3),<t11=int6464#5
+# asm 2: mulpd 128(<b2a2p=%rdx),<t11=%xmm4
+mulpd 128(%rdx),%xmm4
+
+# qhasm: float6464 r11 += t11
+# asm 1: addpd <t11=int6464#5,<r11=int6464#1
+# asm 2: addpd <t11=%xmm4,<r11=%xmm0
+addpd %xmm4,%xmm0
+
+# qhasm: d11 = cd3six
+# asm 1: movdqa <cd3six=int6464#15,>d11=int6464#5
+# asm 2: movdqa <cd3six=%xmm14,>d11=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm: float6464 d11 *= *(int128 *)(a2b2p + 128)
+# asm 1: mulpd 128(<a2b2p=int64#6),<d11=int6464#5
+# asm 2: mulpd 128(<a2b2p=%r9),<d11=%xmm4
+mulpd 128(%r9),%xmm4
+
+# qhasm: float6464 r11 += d11
+# asm 1: addpd <d11=int6464#5,<r11=int6464#1
+# asm 2: addpd <d11=%xmm4,<r11=%xmm0
+addpd %xmm4,%xmm0
+
+# qhasm: t12 = ab3six
+# asm 1: movdqa <ab3six=int6464#14,>t12=int6464#5
+# asm 2: movdqa <ab3six=%xmm13,>t12=%xmm4
+movdqa %xmm13,%xmm4
+
+# qhasm: float6464 t12 *= *(int128 *)(b2a2p + 144)
+# asm 1: mulpd 144(<b2a2p=int64#3),<t12=int6464#5
+# asm 2: mulpd 144(<b2a2p=%rdx),<t12=%xmm4
+mulpd 144(%rdx),%xmm4
+
+# qhasm: float6464 r12 += t12
+# asm 1: addpd <t12=int6464#5,<r12=int6464#2
+# asm 2: addpd <t12=%xmm4,<r12=%xmm1
+addpd %xmm4,%xmm1
+
+# qhasm: d12 = cd3six
+# asm 1: movdqa <cd3six=int6464#15,>d12=int6464#5
+# asm 2: movdqa <cd3six=%xmm14,>d12=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm: float6464 d12 *= *(int128 *)(a2b2p + 144)
+# asm 1: mulpd 144(<a2b2p=int64#6),<d12=int6464#5
+# asm 2: mulpd 144(<a2b2p=%r9),<d12=%xmm4
+mulpd 144(%r9),%xmm4
+
+# qhasm: float6464 r12 += d12
+# asm 1: addpd <d12=int6464#5,<r12=int6464#2
+# asm 2: addpd <d12=%xmm4,<r12=%xmm1
+addpd %xmm4,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 48) = r3
+# asm 1: movdqa <r3=int6464#6,48(<b1b1p=int64#4)
+# asm 2: movdqa <r3=%xmm5,48(<b1b1p=%rcx)
+movdqa %xmm5,48(%rcx)
+
+# qhasm: ab4 = *(int128 *)(b1b1p + 64)
+# asm 1: movdqa 64(<b1b1p=int64#4),>ab4=int6464#5
+# asm 2: movdqa 64(<b1b1p=%rcx),>ab4=%xmm4
+movdqa 64(%rcx),%xmm4
+
+# qhasm: cd4 = *(int128 *)(ma1a1p + 64)
+# asm 1: movdqa 64(<ma1a1p=int64#5),>cd4=int6464#6
+# asm 2: movdqa 64(<ma1a1p=%r8),>cd4=%xmm5
+movdqa 64(%r8),%xmm5
+
+# qhasm: ab4six = ab4
+# asm 1: movdqa <ab4=int6464#5,>ab4six=int6464#14
+# asm 2: movdqa <ab4=%xmm4,>ab4six=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: cd4six = cd4
+# asm 1: movdqa <cd4=int6464#6,>cd4six=int6464#15
+# asm 2: movdqa <cd4=%xmm5,>cd4six=%xmm14
+movdqa %xmm5,%xmm14
+
+# qhasm: float6464 ab4six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab4six=int6464#14
+# asm 2: mulpd SIX_SIX,<ab4six=%xmm13
+mulpd SIX_SIX,%xmm13
+
+# qhasm: float6464 cd4six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<cd4six=int6464#15
+# asm 2: mulpd SIX_SIX,<cd4six=%xmm14
+mulpd SIX_SIX,%xmm14
+
+# qhasm: t4 = ab4
+# asm 1: movdqa <ab4=int6464#5,>t4=int6464#16
+# asm 2: movdqa <ab4=%xmm4,>t4=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t4 *= *(int128 *)(b2a2p + 0)
+# asm 1: mulpd 0(<b2a2p=int64#3),<t4=int6464#16
+# asm 2: mulpd 0(<b2a2p=%rdx),<t4=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r4 += t4
+# asm 1: addpd <t4=int6464#16,<r4=int6464#7
+# asm 2: addpd <t4=%xmm15,<r4=%xmm6
+addpd %xmm15,%xmm6
+
+# qhasm: d4 = cd4
+# asm 1: movdqa <cd4=int6464#6,>d4=int6464#16
+# asm 2: movdqa <cd4=%xmm5,>d4=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 d4 *= *(int128 *)(a2b2p + 0)
+# asm 1: mulpd 0(<a2b2p=int64#6),<d4=int6464#16
+# asm 2: mulpd 0(<a2b2p=%r9),<d4=%xmm15
+mulpd 0(%r9),%xmm15
+
+# qhasm: float6464 r4 += d4
+# asm 1: addpd <d4=int6464#16,<r4=int6464#7
+# asm 2: addpd <d4=%xmm15,<r4=%xmm6
+addpd %xmm15,%xmm6
+
+# qhasm: t7 = ab4
+# asm 1: movdqa <ab4=int6464#5,>t7=int6464#16
+# asm 2: movdqa <ab4=%xmm4,>t7=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t7 *= *(int128 *)(b2a2p + 48)
+# asm 1: mulpd 48(<b2a2p=int64#3),<t7=int6464#16
+# asm 2: mulpd 48(<b2a2p=%rdx),<t7=%xmm15
+mulpd 48(%rdx),%xmm15
+
+# qhasm: float6464 r7 += t7
+# asm 1: addpd <t7=int6464#16,<r7=int6464#10
+# asm 2: addpd <t7=%xmm15,<r7=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: d7 = cd4
+# asm 1: movdqa <cd4=int6464#6,>d7=int6464#16
+# asm 2: movdqa <cd4=%xmm5,>d7=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 d7 *= *(int128 *)(a2b2p + 48)
+# asm 1: mulpd 48(<a2b2p=int64#6),<d7=int6464#16
+# asm 2: mulpd 48(<a2b2p=%r9),<d7=%xmm15
+mulpd 48(%r9),%xmm15
+
+# qhasm: float6464 r7 += d7
+# asm 1: addpd <d7=int6464#16,<r7=int6464#10
+# asm 2: addpd <d7=%xmm15,<r7=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: t8 = ab4
+# asm 1: movdqa <ab4=int6464#5,>t8=int6464#16
+# asm 2: movdqa <ab4=%xmm4,>t8=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t8 *= *(int128 *)(b2a2p + 64)
+# asm 1: mulpd 64(<b2a2p=int64#3),<t8=int6464#16
+# asm 2: mulpd 64(<b2a2p=%rdx),<t8=%xmm15
+mulpd 64(%rdx),%xmm15
+
+# qhasm: float6464 r8 += t8
+# asm 1: addpd <t8=int6464#16,<r8=int6464#11
+# asm 2: addpd <t8=%xmm15,<r8=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: d8 = cd4
+# asm 1: movdqa <cd4=int6464#6,>d8=int6464#16
+# asm 2: movdqa <cd4=%xmm5,>d8=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 d8 *= *(int128 *)(a2b2p + 64)
+# asm 1: mulpd 64(<a2b2p=int64#6),<d8=int6464#16
+# asm 2: mulpd 64(<a2b2p=%r9),<d8=%xmm15
+mulpd 64(%r9),%xmm15
+
+# qhasm: float6464 r8 += d8
+# asm 1: addpd <d8=int6464#16,<r8=int6464#11
+# asm 2: addpd <d8=%xmm15,<r8=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: t9 = ab4
+# asm 1: movdqa <ab4=int6464#5,>t9=int6464#16
+# asm 2: movdqa <ab4=%xmm4,>t9=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t9 *= *(int128 *)(b2a2p + 80)
+# asm 1: mulpd 80(<b2a2p=int64#3),<t9=int6464#16
+# asm 2: mulpd 80(<b2a2p=%rdx),<t9=%xmm15
+mulpd 80(%rdx),%xmm15
+
+# qhasm: float6464 r9 += t9
+# asm 1: addpd <t9=int6464#16,<r9=int6464#12
+# asm 2: addpd <t9=%xmm15,<r9=%xmm11
+addpd %xmm15,%xmm11
+
+# qhasm: d9 = cd4
+# asm 1: movdqa <cd4=int6464#6,>d9=int6464#16
+# asm 2: movdqa <cd4=%xmm5,>d9=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 d9 *= *(int128 *)(a2b2p + 80)
+# asm 1: mulpd 80(<a2b2p=int64#6),<d9=int6464#16
+# asm 2: mulpd 80(<a2b2p=%r9),<d9=%xmm15
+mulpd 80(%r9),%xmm15
+
+# qhasm: float6464 r9 += d9
+# asm 1: addpd <d9=int6464#16,<r9=int6464#12
+# asm 2: addpd <d9=%xmm15,<r9=%xmm11
+addpd %xmm15,%xmm11
+
+# qhasm: t10 = ab4
+# asm 1: movdqa <ab4=int6464#5,>t10=int6464#16
+# asm 2: movdqa <ab4=%xmm4,>t10=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t10 *= *(int128 *)(b2a2p + 96)
+# asm 1: mulpd 96(<b2a2p=int64#3),<t10=int6464#16
+# asm 2: mulpd 96(<b2a2p=%rdx),<t10=%xmm15
+mulpd 96(%rdx),%xmm15
+
+# qhasm: float6464 r10 += t10
+# asm 1: addpd <t10=int6464#16,<r10=int6464#13
+# asm 2: addpd <t10=%xmm15,<r10=%xmm12
+addpd %xmm15,%xmm12
+
+# qhasm: d10 = cd4
+# asm 1: movdqa <cd4=int6464#6,>d10=int6464#16
+# asm 2: movdqa <cd4=%xmm5,>d10=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 d10 *= *(int128 *)(a2b2p + 96)
+# asm 1: mulpd 96(<a2b2p=int64#6),<d10=int6464#16
+# asm 2: mulpd 96(<a2b2p=%r9),<d10=%xmm15
+mulpd 96(%r9),%xmm15
+
+# qhasm: float6464 r10 += d10
+# asm 1: addpd <d10=int6464#16,<r10=int6464#13
+# asm 2: addpd <d10=%xmm15,<r10=%xmm12
+addpd %xmm15,%xmm12
+
+# qhasm: t13 = ab4
+# asm 1: movdqa <ab4=int6464#5,>t13=int6464#16
+# asm 2: movdqa <ab4=%xmm4,>t13=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t13 *= *(int128 *)(b2a2p + 144)
+# asm 1: mulpd 144(<b2a2p=int64#3),<t13=int6464#16
+# asm 2: mulpd 144(<b2a2p=%rdx),<t13=%xmm15
+mulpd 144(%rdx),%xmm15
+
+# qhasm: float6464 r13 += t13
+# asm 1: addpd <t13=int6464#16,<r13=int6464#3
+# asm 2: addpd <t13=%xmm15,<r13=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: d13 = cd4
+# asm 1: movdqa <cd4=int6464#6,>d13=int6464#16
+# asm 2: movdqa <cd4=%xmm5,>d13=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 d13 *= *(int128 *)(a2b2p + 144)
+# asm 1: mulpd 144(<a2b2p=int64#6),<d13=int6464#16
+# asm 2: mulpd 144(<a2b2p=%r9),<d13=%xmm15
+mulpd 144(%r9),%xmm15
+
+# qhasm: float6464 r13 += d13
+# asm 1: addpd <d13=int6464#16,<r13=int6464#3
+# asm 2: addpd <d13=%xmm15,<r13=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: t14 = ab4
+# asm 1: movdqa <ab4=int6464#5,>t14=int6464#16
+# asm 2: movdqa <ab4=%xmm4,>t14=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t14 *= *(int128 *)(b2a2p + 160)
+# asm 1: mulpd 160(<b2a2p=int64#3),<t14=int6464#16
+# asm 2: mulpd 160(<b2a2p=%rdx),<t14=%xmm15
+mulpd 160(%rdx),%xmm15
+
+# qhasm: float6464 r14 += t14
+# asm 1: addpd <t14=int6464#16,<r14=int6464#4
+# asm 2: addpd <t14=%xmm15,<r14=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: d14 = cd4
+# asm 1: movdqa <cd4=int6464#6,>d14=int6464#16
+# asm 2: movdqa <cd4=%xmm5,>d14=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 d14 *= *(int128 *)(a2b2p + 160)
+# asm 1: mulpd 160(<a2b2p=int64#6),<d14=int6464#16
+# asm 2: mulpd 160(<a2b2p=%r9),<d14=%xmm15
+mulpd 160(%r9),%xmm15
+
+# qhasm: float6464 r14 += d14
+# asm 1: addpd <d14=int6464#16,<r14=int6464#4
+# asm 2: addpd <d14=%xmm15,<r14=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: r15 = ab4
+# asm 1: movdqa <ab4=int6464#5,>r15=int6464#5
+# asm 2: movdqa <ab4=%xmm4,>r15=%xmm4
+movdqa %xmm4,%xmm4
+
+# qhasm: float6464 r15 *= *(int128 *)(b2a2p + 176)
+# asm 1: mulpd 176(<b2a2p=int64#3),<r15=int6464#5
+# asm 2: mulpd 176(<b2a2p=%rdx),<r15=%xmm4
+mulpd 176(%rdx),%xmm4
+
+# qhasm: d15 = cd4
+# asm 1: movdqa <cd4=int6464#6,>d15=int6464#6
+# asm 2: movdqa <cd4=%xmm5,>d15=%xmm5
+movdqa %xmm5,%xmm5
+
+# qhasm: float6464 d15 *= *(int128 *)(a2b2p + 176)
+# asm 1: mulpd 176(<a2b2p=int64#6),<d15=int6464#6
+# asm 2: mulpd 176(<a2b2p=%r9),<d15=%xmm5
+mulpd 176(%r9),%xmm5
+
+# qhasm: float6464 r15 += d15
+# asm 1: addpd <d15=int6464#6,<r15=int6464#5
+# asm 2: addpd <d15=%xmm5,<r15=%xmm4
+addpd %xmm5,%xmm4
+
+# qhasm: t5 = ab4six
+# asm 1: movdqa <ab4six=int6464#14,>t5=int6464#6
+# asm 2: movdqa <ab4six=%xmm13,>t5=%xmm5
+movdqa %xmm13,%xmm5
+
+# qhasm: float6464 t5 *= *(int128 *)(b2a2p + 16)
+# asm 1: mulpd 16(<b2a2p=int64#3),<t5=int6464#6
+# asm 2: mulpd 16(<b2a2p=%rdx),<t5=%xmm5
+mulpd 16(%rdx),%xmm5
+
+# qhasm: float6464 r5 += t5
+# asm 1: addpd <t5=int6464#6,<r5=int6464#8
+# asm 2: addpd <t5=%xmm5,<r5=%xmm7
+addpd %xmm5,%xmm7
+
+# qhasm: d5 = cd4six
+# asm 1: movdqa <cd4six=int6464#15,>d5=int6464#6
+# asm 2: movdqa <cd4six=%xmm14,>d5=%xmm5
+movdqa %xmm14,%xmm5
+
+# qhasm: float6464 d5 *= *(int128 *)(a2b2p + 16)
+# asm 1: mulpd 16(<a2b2p=int64#6),<d5=int6464#6
+# asm 2: mulpd 16(<a2b2p=%r9),<d5=%xmm5
+mulpd 16(%r9),%xmm5
+
+# qhasm: float6464 r5 += d5
+# asm 1: addpd <d5=int6464#6,<r5=int6464#8
+# asm 2: addpd <d5=%xmm5,<r5=%xmm7
+addpd %xmm5,%xmm7
+
+# qhasm: t6 = ab4six
+# asm 1: movdqa <ab4six=int6464#14,>t6=int6464#6
+# asm 2: movdqa <ab4six=%xmm13,>t6=%xmm5
+movdqa %xmm13,%xmm5
+
+# qhasm: float6464 t6 *= *(int128 *)(b2a2p + 32)
+# asm 1: mulpd 32(<b2a2p=int64#3),<t6=int6464#6
+# asm 2: mulpd 32(<b2a2p=%rdx),<t6=%xmm5
+mulpd 32(%rdx),%xmm5
+
+# qhasm: float6464 r6 += t6
+# asm 1: addpd <t6=int6464#6,<r6=int6464#9
+# asm 2: addpd <t6=%xmm5,<r6=%xmm8
+addpd %xmm5,%xmm8
+
+# qhasm: d6 = cd4six
+# asm 1: movdqa <cd4six=int6464#15,>d6=int6464#6
+# asm 2: movdqa <cd4six=%xmm14,>d6=%xmm5
+movdqa %xmm14,%xmm5
+
+# qhasm: float6464 d6 *= *(int128 *)(a2b2p + 32)
+# asm 1: mulpd 32(<a2b2p=int64#6),<d6=int6464#6
+# asm 2: mulpd 32(<a2b2p=%r9),<d6=%xmm5
+mulpd 32(%r9),%xmm5
+
+# qhasm: float6464 r6 += d6
+# asm 1: addpd <d6=int6464#6,<r6=int6464#9
+# asm 2: addpd <d6=%xmm5,<r6=%xmm8
+addpd %xmm5,%xmm8
+
+# qhasm: t11 = ab4six
+# asm 1: movdqa <ab4six=int6464#14,>t11=int6464#6
+# asm 2: movdqa <ab4six=%xmm13,>t11=%xmm5
+movdqa %xmm13,%xmm5
+
+# qhasm: float6464 t11 *= *(int128 *)(b2a2p + 112)
+# asm 1: mulpd 112(<b2a2p=int64#3),<t11=int6464#6
+# asm 2: mulpd 112(<b2a2p=%rdx),<t11=%xmm5
+mulpd 112(%rdx),%xmm5
+
+# qhasm: float6464 r11 += t11
+# asm 1: addpd <t11=int6464#6,<r11=int6464#1
+# asm 2: addpd <t11=%xmm5,<r11=%xmm0
+addpd %xmm5,%xmm0
+
+# qhasm: d11 = cd4six
+# asm 1: movdqa <cd4six=int6464#15,>d11=int6464#6
+# asm 2: movdqa <cd4six=%xmm14,>d11=%xmm5
+movdqa %xmm14,%xmm5
+
+# qhasm: float6464 d11 *= *(int128 *)(a2b2p + 112)
+# asm 1: mulpd 112(<a2b2p=int64#6),<d11=int6464#6
+# asm 2: mulpd 112(<a2b2p=%r9),<d11=%xmm5
+mulpd 112(%r9),%xmm5
+
+# qhasm: float6464 r11 += d11
+# asm 1: addpd <d11=int6464#6,<r11=int6464#1
+# asm 2: addpd <d11=%xmm5,<r11=%xmm0
+addpd %xmm5,%xmm0
+
+# qhasm: t12 = ab4six
+# asm 1: movdqa <ab4six=int6464#14,>t12=int6464#6
+# asm 2: movdqa <ab4six=%xmm13,>t12=%xmm5
+movdqa %xmm13,%xmm5
+
+# qhasm: float6464 t12 *= *(int128 *)(b2a2p + 128)
+# asm 1: mulpd 128(<b2a2p=int64#3),<t12=int6464#6
+# asm 2: mulpd 128(<b2a2p=%rdx),<t12=%xmm5
+mulpd 128(%rdx),%xmm5
+
+# qhasm: float6464 r12 += t12
+# asm 1: addpd <t12=int6464#6,<r12=int6464#2
+# asm 2: addpd <t12=%xmm5,<r12=%xmm1
+addpd %xmm5,%xmm1
+
+# qhasm: d12 = cd4six
+# asm 1: movdqa <cd4six=int6464#15,>d12=int6464#6
+# asm 2: movdqa <cd4six=%xmm14,>d12=%xmm5
+movdqa %xmm14,%xmm5
+
+# qhasm: float6464 d12 *= *(int128 *)(a2b2p + 128)
+# asm 1: mulpd 128(<a2b2p=int64#6),<d12=int6464#6
+# asm 2: mulpd 128(<a2b2p=%r9),<d12=%xmm5
+mulpd 128(%r9),%xmm5
+
+# qhasm: float6464 r12 += d12
+# asm 1: addpd <d12=int6464#6,<r12=int6464#2
+# asm 2: addpd <d12=%xmm5,<r12=%xmm1
+addpd %xmm5,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 64) = r4
+# asm 1: movdqa <r4=int6464#7,64(<b1b1p=int64#4)
+# asm 2: movdqa <r4=%xmm6,64(<b1b1p=%rcx)
+movdqa %xmm6,64(%rcx)
+
+# qhasm: ab5 = *(int128 *)(b1b1p + 80)
+# asm 1: movdqa 80(<b1b1p=int64#4),>ab5=int6464#6
+# asm 2: movdqa 80(<b1b1p=%rcx),>ab5=%xmm5
+movdqa 80(%rcx),%xmm5
+
+# qhasm: cd5 = *(int128 *)(ma1a1p + 80)
+# asm 1: movdqa 80(<ma1a1p=int64#5),>cd5=int6464#7
+# asm 2: movdqa 80(<ma1a1p=%r8),>cd5=%xmm6
+movdqa 80(%r8),%xmm6
+
+# qhasm: ab5six = ab5
+# asm 1: movdqa <ab5=int6464#6,>ab5six=int6464#14
+# asm 2: movdqa <ab5=%xmm5,>ab5six=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: cd5six = cd5
+# asm 1: movdqa <cd5=int6464#7,>cd5six=int6464#15
+# asm 2: movdqa <cd5=%xmm6,>cd5six=%xmm14
+movdqa %xmm6,%xmm14
+
+# qhasm: float6464 ab5six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab5six=int6464#14
+# asm 2: mulpd SIX_SIX,<ab5six=%xmm13
+mulpd SIX_SIX,%xmm13
+
+# qhasm: float6464 cd5six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<cd5six=int6464#15
+# asm 2: mulpd SIX_SIX,<cd5six=%xmm14
+mulpd SIX_SIX,%xmm14
+
+# qhasm: t5 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t5=int6464#16
+# asm 2: movdqa <ab5=%xmm5,>t5=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t5 *= *(int128 *)(b2a2p + 0)
+# asm 1: mulpd 0(<b2a2p=int64#3),<t5=int6464#16
+# asm 2: mulpd 0(<b2a2p=%rdx),<t5=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r5 += t5
+# asm 1: addpd <t5=int6464#16,<r5=int6464#8
+# asm 2: addpd <t5=%xmm15,<r5=%xmm7
+addpd %xmm15,%xmm7
+
+# qhasm: d5 = cd5
+# asm 1: movdqa <cd5=int6464#7,>d5=int6464#16
+# asm 2: movdqa <cd5=%xmm6,>d5=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 d5 *= *(int128 *)(a2b2p + 0)
+# asm 1: mulpd 0(<a2b2p=int64#6),<d5=int6464#16
+# asm 2: mulpd 0(<a2b2p=%r9),<d5=%xmm15
+mulpd 0(%r9),%xmm15
+
+# qhasm: float6464 r5 += d5
+# asm 1: addpd <d5=int6464#16,<r5=int6464#8
+# asm 2: addpd <d5=%xmm15,<r5=%xmm7
+addpd %xmm15,%xmm7
+
+# qhasm: t7 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t7=int6464#16
+# asm 2: movdqa <ab5=%xmm5,>t7=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t7 *= *(int128 *)(b2a2p + 32)
+# asm 1: mulpd 32(<b2a2p=int64#3),<t7=int6464#16
+# asm 2: mulpd 32(<b2a2p=%rdx),<t7=%xmm15
+mulpd 32(%rdx),%xmm15
+
+# qhasm: float6464 r7 += t7
+# asm 1: addpd <t7=int6464#16,<r7=int6464#10
+# asm 2: addpd <t7=%xmm15,<r7=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: d7 = cd5
+# asm 1: movdqa <cd5=int6464#7,>d7=int6464#16
+# asm 2: movdqa <cd5=%xmm6,>d7=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 d7 *= *(int128 *)(a2b2p + 32)
+# asm 1: mulpd 32(<a2b2p=int64#6),<d7=int6464#16
+# asm 2: mulpd 32(<a2b2p=%r9),<d7=%xmm15
+mulpd 32(%r9),%xmm15
+
+# qhasm: float6464 r7 += d7
+# asm 1: addpd <d7=int6464#16,<r7=int6464#10
+# asm 2: addpd <d7=%xmm15,<r7=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: t8 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t8=int6464#16
+# asm 2: movdqa <ab5=%xmm5,>t8=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t8 *= *(int128 *)(b2a2p + 48)
+# asm 1: mulpd 48(<b2a2p=int64#3),<t8=int6464#16
+# asm 2: mulpd 48(<b2a2p=%rdx),<t8=%xmm15
+mulpd 48(%rdx),%xmm15
+
+# qhasm: float6464 r8 += t8
+# asm 1: addpd <t8=int6464#16,<r8=int6464#11
+# asm 2: addpd <t8=%xmm15,<r8=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: d8 = cd5
+# asm 1: movdqa <cd5=int6464#7,>d8=int6464#16
+# asm 2: movdqa <cd5=%xmm6,>d8=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 d8 *= *(int128 *)(a2b2p + 48)
+# asm 1: mulpd 48(<a2b2p=int64#6),<d8=int6464#16
+# asm 2: mulpd 48(<a2b2p=%r9),<d8=%xmm15
+mulpd 48(%r9),%xmm15
+
+# qhasm: float6464 r8 += d8
+# asm 1: addpd <d8=int6464#16,<r8=int6464#11
+# asm 2: addpd <d8=%xmm15,<r8=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: t9 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t9=int6464#16
+# asm 2: movdqa <ab5=%xmm5,>t9=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t9 *= *(int128 *)(b2a2p + 64)
+# asm 1: mulpd 64(<b2a2p=int64#3),<t9=int6464#16
+# asm 2: mulpd 64(<b2a2p=%rdx),<t9=%xmm15
+mulpd 64(%rdx),%xmm15
+
+# qhasm: float6464 r9 += t9
+# asm 1: addpd <t9=int6464#16,<r9=int6464#12
+# asm 2: addpd <t9=%xmm15,<r9=%xmm11
+addpd %xmm15,%xmm11
+
+# qhasm: d9 = cd5
+# asm 1: movdqa <cd5=int6464#7,>d9=int6464#16
+# asm 2: movdqa <cd5=%xmm6,>d9=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 d9 *= *(int128 *)(a2b2p + 64)
+# asm 1: mulpd 64(<a2b2p=int64#6),<d9=int6464#16
+# asm 2: mulpd 64(<a2b2p=%r9),<d9=%xmm15
+mulpd 64(%r9),%xmm15
+
+# qhasm: float6464 r9 += d9
+# asm 1: addpd <d9=int6464#16,<r9=int6464#12
+# asm 2: addpd <d9=%xmm15,<r9=%xmm11
+addpd %xmm15,%xmm11
+
+# qhasm: t10 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t10=int6464#16
+# asm 2: movdqa <ab5=%xmm5,>t10=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t10 *= *(int128 *)(b2a2p + 80)
+# asm 1: mulpd 80(<b2a2p=int64#3),<t10=int6464#16
+# asm 2: mulpd 80(<b2a2p=%rdx),<t10=%xmm15
+mulpd 80(%rdx),%xmm15
+
+# qhasm: float6464 r10 += t10
+# asm 1: addpd <t10=int6464#16,<r10=int6464#13
+# asm 2: addpd <t10=%xmm15,<r10=%xmm12
+addpd %xmm15,%xmm12
+
+# qhasm: d10 = cd5
+# asm 1: movdqa <cd5=int6464#7,>d10=int6464#16
+# asm 2: movdqa <cd5=%xmm6,>d10=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 d10 *= *(int128 *)(a2b2p + 80)
+# asm 1: mulpd 80(<a2b2p=int64#6),<d10=int6464#16
+# asm 2: mulpd 80(<a2b2p=%r9),<d10=%xmm15
+mulpd 80(%r9),%xmm15
+
+# qhasm: float6464 r10 += d10
+# asm 1: addpd <d10=int6464#16,<r10=int6464#13
+# asm 2: addpd <d10=%xmm15,<r10=%xmm12
+addpd %xmm15,%xmm12
+
+# qhasm: t11 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t11=int6464#16
+# asm 2: movdqa <ab5=%xmm5,>t11=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t11 *= *(int128 *)(b2a2p + 96)
+# asm 1: mulpd 96(<b2a2p=int64#3),<t11=int6464#16
+# asm 2: mulpd 96(<b2a2p=%rdx),<t11=%xmm15
+mulpd 96(%rdx),%xmm15
+
+# qhasm: float6464 r11 += t11
+# asm 1: addpd <t11=int6464#16,<r11=int6464#1
+# asm 2: addpd <t11=%xmm15,<r11=%xmm0
+addpd %xmm15,%xmm0
+
+# qhasm: d11 = cd5
+# asm 1: movdqa <cd5=int6464#7,>d11=int6464#16
+# asm 2: movdqa <cd5=%xmm6,>d11=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 d11 *= *(int128 *)(a2b2p + 96)
+# asm 1: mulpd 96(<a2b2p=int64#6),<d11=int6464#16
+# asm 2: mulpd 96(<a2b2p=%r9),<d11=%xmm15
+mulpd 96(%r9),%xmm15
+
+# qhasm: float6464 r11 += d11
+# asm 1: addpd <d11=int6464#16,<r11=int6464#1
+# asm 2: addpd <d11=%xmm15,<r11=%xmm0
+addpd %xmm15,%xmm0
+
+# qhasm: t13 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t13=int6464#16
+# asm 2: movdqa <ab5=%xmm5,>t13=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t13 *= *(int128 *)(b2a2p + 128)
+# asm 1: mulpd 128(<b2a2p=int64#3),<t13=int6464#16
+# asm 2: mulpd 128(<b2a2p=%rdx),<t13=%xmm15
+mulpd 128(%rdx),%xmm15
+
+# qhasm: float6464 r13 += t13
+# asm 1: addpd <t13=int6464#16,<r13=int6464#3
+# asm 2: addpd <t13=%xmm15,<r13=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: d13 = cd5
+# asm 1: movdqa <cd5=int6464#7,>d13=int6464#16
+# asm 2: movdqa <cd5=%xmm6,>d13=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 d13 *= *(int128 *)(a2b2p + 128)
+# asm 1: mulpd 128(<a2b2p=int64#6),<d13=int6464#16
+# asm 2: mulpd 128(<a2b2p=%r9),<d13=%xmm15
+mulpd 128(%r9),%xmm15
+
+# qhasm: float6464 r13 += d13
+# asm 1: addpd <d13=int6464#16,<r13=int6464#3
+# asm 2: addpd <d13=%xmm15,<r13=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: t14 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t14=int6464#16
+# asm 2: movdqa <ab5=%xmm5,>t14=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t14 *= *(int128 *)(b2a2p + 144)
+# asm 1: mulpd 144(<b2a2p=int64#3),<t14=int6464#16
+# asm 2: mulpd 144(<b2a2p=%rdx),<t14=%xmm15
+mulpd 144(%rdx),%xmm15
+
+# qhasm: float6464 r14 += t14
+# asm 1: addpd <t14=int6464#16,<r14=int6464#4
+# asm 2: addpd <t14=%xmm15,<r14=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: d14 = cd5
+# asm 1: movdqa <cd5=int6464#7,>d14=int6464#16
+# asm 2: movdqa <cd5=%xmm6,>d14=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 d14 *= *(int128 *)(a2b2p + 144)
+# asm 1: mulpd 144(<a2b2p=int64#6),<d14=int6464#16
+# asm 2: mulpd 144(<a2b2p=%r9),<d14=%xmm15
+mulpd 144(%r9),%xmm15
+
+# qhasm: float6464 r14 += d14
+# asm 1: addpd <d14=int6464#16,<r14=int6464#4
+# asm 2: addpd <d14=%xmm15,<r14=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: t15 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t15=int6464#16
+# asm 2: movdqa <ab5=%xmm5,>t15=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t15 *= *(int128 *)(b2a2p + 160)
+# asm 1: mulpd 160(<b2a2p=int64#3),<t15=int6464#16
+# asm 2: mulpd 160(<b2a2p=%rdx),<t15=%xmm15
+mulpd 160(%rdx),%xmm15
+
+# qhasm: float6464 r15 += t15
+# asm 1: addpd <t15=int6464#16,<r15=int6464#5
+# asm 2: addpd <t15=%xmm15,<r15=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: d15 = cd5
+# asm 1: movdqa <cd5=int6464#7,>d15=int6464#16
+# asm 2: movdqa <cd5=%xmm6,>d15=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 d15 *= *(int128 *)(a2b2p + 160)
+# asm 1: mulpd 160(<a2b2p=int64#6),<d15=int6464#16
+# asm 2: mulpd 160(<a2b2p=%r9),<d15=%xmm15
+mulpd 160(%r9),%xmm15
+
+# qhasm: float6464 r15 += d15
+# asm 1: addpd <d15=int6464#16,<r15=int6464#5
+# asm 2: addpd <d15=%xmm15,<r15=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: r16 = ab5
+# asm 1: movdqa <ab5=int6464#6,>r16=int6464#6
+# asm 2: movdqa <ab5=%xmm5,>r16=%xmm5
+movdqa %xmm5,%xmm5
+
+# qhasm: float6464 r16 *= *(int128 *)(b2a2p + 176)
+# asm 1: mulpd 176(<b2a2p=int64#3),<r16=int6464#6
+# asm 2: mulpd 176(<b2a2p=%rdx),<r16=%xmm5
+mulpd 176(%rdx),%xmm5
+
+# qhasm: d16 = cd5
+# asm 1: movdqa <cd5=int6464#7,>d16=int6464#7
+# asm 2: movdqa <cd5=%xmm6,>d16=%xmm6
+movdqa %xmm6,%xmm6
+
+# qhasm: float6464 d16 *= *(int128 *)(a2b2p + 176)
+# asm 1: mulpd 176(<a2b2p=int64#6),<d16=int6464#7
+# asm 2: mulpd 176(<a2b2p=%r9),<d16=%xmm6
+mulpd 176(%r9),%xmm6
+
+# qhasm: float6464 r16 += d16
+# asm 1: addpd <d16=int6464#7,<r16=int6464#6
+# asm 2: addpd <d16=%xmm6,<r16=%xmm5
+addpd %xmm6,%xmm5
+
+# qhasm: t6 = ab5six
+# asm 1: movdqa <ab5six=int6464#14,>t6=int6464#7
+# asm 2: movdqa <ab5six=%xmm13,>t6=%xmm6
+movdqa %xmm13,%xmm6
+
+# qhasm: float6464 t6 *= *(int128 *)(b2a2p + 16)
+# asm 1: mulpd 16(<b2a2p=int64#3),<t6=int6464#7
+# asm 2: mulpd 16(<b2a2p=%rdx),<t6=%xmm6
+mulpd 16(%rdx),%xmm6
+
+# qhasm: float6464 r6 += t6
+# asm 1: addpd <t6=int6464#7,<r6=int6464#9
+# asm 2: addpd <t6=%xmm6,<r6=%xmm8
+addpd %xmm6,%xmm8
+
+# qhasm: d6 = cd5six
+# asm 1: movdqa <cd5six=int6464#15,>d6=int6464#7
+# asm 2: movdqa <cd5six=%xmm14,>d6=%xmm6
+movdqa %xmm14,%xmm6
+
+# qhasm: float6464 d6 *= *(int128 *)(a2b2p + 16)
+# asm 1: mulpd 16(<a2b2p=int64#6),<d6=int6464#7
+# asm 2: mulpd 16(<a2b2p=%r9),<d6=%xmm6
+mulpd 16(%r9),%xmm6
+
+# qhasm: float6464 r6 += d6
+# asm 1: addpd <d6=int6464#7,<r6=int6464#9
+# asm 2: addpd <d6=%xmm6,<r6=%xmm8
+addpd %xmm6,%xmm8
+
+# qhasm: t12 = ab5six
+# asm 1: movdqa <ab5six=int6464#14,>t12=int6464#7
+# asm 2: movdqa <ab5six=%xmm13,>t12=%xmm6
+movdqa %xmm13,%xmm6
+
+# qhasm: float6464 t12 *= *(int128 *)(b2a2p + 112)
+# asm 1: mulpd 112(<b2a2p=int64#3),<t12=int6464#7
+# asm 2: mulpd 112(<b2a2p=%rdx),<t12=%xmm6
+mulpd 112(%rdx),%xmm6
+
+# qhasm: float6464 r12 += t12
+# asm 1: addpd <t12=int6464#7,<r12=int6464#2
+# asm 2: addpd <t12=%xmm6,<r12=%xmm1
+addpd %xmm6,%xmm1
+
+# qhasm: d12 = cd5six
+# asm 1: movdqa <cd5six=int6464#15,>d12=int6464#7
+# asm 2: movdqa <cd5six=%xmm14,>d12=%xmm6
+movdqa %xmm14,%xmm6
+
+# qhasm: float6464 d12 *= *(int128 *)(a2b2p + 112)
+# asm 1: mulpd 112(<a2b2p=int64#6),<d12=int6464#7
+# asm 2: mulpd 112(<a2b2p=%r9),<d12=%xmm6
+mulpd 112(%r9),%xmm6
+
+# qhasm: float6464 r12 += d12
+# asm 1: addpd <d12=int6464#7,<r12=int6464#2
+# asm 2: addpd <d12=%xmm6,<r12=%xmm1
+addpd %xmm6,%xmm1
+
+# qhasm: *(int128 *)(b1b1p + 80) = r5
+# asm 1: movdqa <r5=int6464#8,80(<b1b1p=int64#4)
+# asm 2: movdqa <r5=%xmm7,80(<b1b1p=%rcx)
+movdqa %xmm7,80(%rcx)
+
+# qhasm: ab6 = *(int128 *)(b1b1p + 96)
+# asm 1: movdqa 96(<b1b1p=int64#4),>ab6=int6464#7
+# asm 2: movdqa 96(<b1b1p=%rcx),>ab6=%xmm6
+movdqa 96(%rcx),%xmm6
+
+# qhasm: cd6 = *(int128 *)(ma1a1p + 96)
+# asm 1: movdqa 96(<ma1a1p=int64#5),>cd6=int6464#8
+# asm 2: movdqa 96(<ma1a1p=%r8),>cd6=%xmm7
+movdqa 96(%r8),%xmm7
+
+# qhasm: t6 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t6=int6464#14
+# asm 2: movdqa <ab6=%xmm6,>t6=%xmm13
+movdqa %xmm6,%xmm13
+
+# qhasm: float6464 t6 *= *(int128 *)(b2a2p + 0)
+# asm 1: mulpd 0(<b2a2p=int64#3),<t6=int6464#14
+# asm 2: mulpd 0(<b2a2p=%rdx),<t6=%xmm13
+mulpd 0(%rdx),%xmm13
+
+# qhasm: float6464 r6 += t6
+# asm 1: addpd <t6=int6464#14,<r6=int6464#9
+# asm 2: addpd <t6=%xmm13,<r6=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: d6 = cd6
+# asm 1: movdqa <cd6=int6464#8,>d6=int6464#14
+# asm 2: movdqa <cd6=%xmm7,>d6=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 d6 *= *(int128 *)(a2b2p + 0)
+# asm 1: mulpd 0(<a2b2p=int64#6),<d6=int6464#14
+# asm 2: mulpd 0(<a2b2p=%r9),<d6=%xmm13
+mulpd 0(%r9),%xmm13
+
+# qhasm: float6464 r6 += d6
+# asm 1: addpd <d6=int6464#14,<r6=int6464#9
+# asm 2: addpd <d6=%xmm13,<r6=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: t7 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t7=int6464#14
+# asm 2: movdqa <ab6=%xmm6,>t7=%xmm13
+movdqa %xmm6,%xmm13
+
+# qhasm: float6464 t7 *= *(int128 *)(b2a2p + 16)
+# asm 1: mulpd 16(<b2a2p=int64#3),<t7=int6464#14
+# asm 2: mulpd 16(<b2a2p=%rdx),<t7=%xmm13
+mulpd 16(%rdx),%xmm13
+
+# qhasm: float6464 r7 += t7
+# asm 1: addpd <t7=int6464#14,<r7=int6464#10
+# asm 2: addpd <t7=%xmm13,<r7=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: d7 = cd6
+# asm 1: movdqa <cd6=int6464#8,>d7=int6464#14
+# asm 2: movdqa <cd6=%xmm7,>d7=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 d7 *= *(int128 *)(a2b2p + 16)
+# asm 1: mulpd 16(<a2b2p=int64#6),<d7=int6464#14
+# asm 2: mulpd 16(<a2b2p=%r9),<d7=%xmm13
+mulpd 16(%r9),%xmm13
+
+# qhasm: float6464 r7 += d7
+# asm 1: addpd <d7=int6464#14,<r7=int6464#10
+# asm 2: addpd <d7=%xmm13,<r7=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: t8 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t8=int6464#14
+# asm 2: movdqa <ab6=%xmm6,>t8=%xmm13
+movdqa %xmm6,%xmm13
+
+# qhasm: float6464 t8 *= *(int128 *)(b2a2p + 32)
+# asm 1: mulpd 32(<b2a2p=int64#3),<t8=int6464#14
+# asm 2: mulpd 32(<b2a2p=%rdx),<t8=%xmm13
+mulpd 32(%rdx),%xmm13
+
+# qhasm: float6464 r8 += t8
+# asm 1: addpd <t8=int6464#14,<r8=int6464#11
+# asm 2: addpd <t8=%xmm13,<r8=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: d8 = cd6
+# asm 1: movdqa <cd6=int6464#8,>d8=int6464#14
+# asm 2: movdqa <cd6=%xmm7,>d8=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 d8 *= *(int128 *)(a2b2p + 32)
+# asm 1: mulpd 32(<a2b2p=int64#6),<d8=int6464#14
+# asm 2: mulpd 32(<a2b2p=%r9),<d8=%xmm13
+mulpd 32(%r9),%xmm13
+
+# qhasm: float6464 r8 += d8
+# asm 1: addpd <d8=int6464#14,<r8=int6464#11
+# asm 2: addpd <d8=%xmm13,<r8=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: t9 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t9=int6464#14
+# asm 2: movdqa <ab6=%xmm6,>t9=%xmm13
+movdqa %xmm6,%xmm13
+
+# qhasm: float6464 t9 *= *(int128 *)(b2a2p + 48)
+# asm 1: mulpd 48(<b2a2p=int64#3),<t9=int6464#14
+# asm 2: mulpd 48(<b2a2p=%rdx),<t9=%xmm13
+mulpd 48(%rdx),%xmm13
+
+# qhasm: float6464 r9 += t9
+# asm 1: addpd <t9=int6464#14,<r9=int6464#12
+# asm 2: addpd <t9=%xmm13,<r9=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: d9 = cd6
+# asm 1: movdqa <cd6=int6464#8,>d9=int6464#14
+# asm 2: movdqa <cd6=%xmm7,>d9=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 d9 *= *(int128 *)(a2b2p + 48)
+# asm 1: mulpd 48(<a2b2p=int64#6),<d9=int6464#14
+# asm 2: mulpd 48(<a2b2p=%r9),<d9=%xmm13
+mulpd 48(%r9),%xmm13
+
+# qhasm: float6464 r9 += d9
+# asm 1: addpd <d9=int6464#14,<r9=int6464#12
+# asm 2: addpd <d9=%xmm13,<r9=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: t10 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t10=int6464#14
+# asm 2: movdqa <ab6=%xmm6,>t10=%xmm13
+movdqa %xmm6,%xmm13
+
+# qhasm: float6464 t10 *= *(int128 *)(b2a2p + 64)
+# asm 1: mulpd 64(<b2a2p=int64#3),<t10=int6464#14
+# asm 2: mulpd 64(<b2a2p=%rdx),<t10=%xmm13
+mulpd 64(%rdx),%xmm13
+
+# qhasm: float6464 r10 += t10
+# asm 1: addpd <t10=int6464#14,<r10=int6464#13
+# asm 2: addpd <t10=%xmm13,<r10=%xmm12
+addpd %xmm13,%xmm12
+
+# qhasm: d10 = cd6
+# asm 1: movdqa <cd6=int6464#8,>d10=int6464#14
+# asm 2: movdqa <cd6=%xmm7,>d10=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 d10 *= *(int128 *)(a2b2p + 64)
+# asm 1: mulpd 64(<a2b2p=int64#6),<d10=int6464#14
+# asm 2: mulpd 64(<a2b2p=%r9),<d10=%xmm13
+mulpd 64(%r9),%xmm13
+
+# qhasm: float6464 r10 += d10
+# asm 1: addpd <d10=int6464#14,<r10=int6464#13
+# asm 2: addpd <d10=%xmm13,<r10=%xmm12
+addpd %xmm13,%xmm12
+
+# qhasm: t11 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t11=int6464#14
+# asm 2: movdqa <ab6=%xmm6,>t11=%xmm13
+movdqa %xmm6,%xmm13
+
+# qhasm: float6464 t11 *= *(int128 *)(b2a2p + 80)
+# asm 1: mulpd 80(<b2a2p=int64#3),<t11=int6464#14
+# asm 2: mulpd 80(<b2a2p=%rdx),<t11=%xmm13
+mulpd 80(%rdx),%xmm13
+
+# qhasm: float6464 r11 += t11
+# asm 1: addpd <t11=int6464#14,<r11=int6464#1
+# asm 2: addpd <t11=%xmm13,<r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: d11 = cd6
+# asm 1: movdqa <cd6=int6464#8,>d11=int6464#14
+# asm 2: movdqa <cd6=%xmm7,>d11=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 d11 *= *(int128 *)(a2b2p + 80)
+# asm 1: mulpd 80(<a2b2p=int64#6),<d11=int6464#14
+# asm 2: mulpd 80(<a2b2p=%r9),<d11=%xmm13
+mulpd 80(%r9),%xmm13
+
+# qhasm: float6464 r11 += d11
+# asm 1: addpd <d11=int6464#14,<r11=int6464#1
+# asm 2: addpd <d11=%xmm13,<r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: t12 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t12=int6464#14
+# asm 2: movdqa <ab6=%xmm6,>t12=%xmm13
+movdqa %xmm6,%xmm13
+
+# qhasm: float6464 t12 *= *(int128 *)(b2a2p + 96)
+# asm 1: mulpd 96(<b2a2p=int64#3),<t12=int6464#14
+# asm 2: mulpd 96(<b2a2p=%rdx),<t12=%xmm13
+mulpd 96(%rdx),%xmm13
+
+# qhasm: float6464 r12 += t12
+# asm 1: addpd <t12=int6464#14,<r12=int6464#2
+# asm 2: addpd <t12=%xmm13,<r12=%xmm1
+addpd %xmm13,%xmm1
+
+# qhasm: d12 = cd6
+# asm 1: movdqa <cd6=int6464#8,>d12=int6464#14
+# asm 2: movdqa <cd6=%xmm7,>d12=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 d12 *= *(int128 *)(a2b2p + 96)
+# asm 1: mulpd 96(<a2b2p=int64#6),<d12=int6464#14
+# asm 2: mulpd 96(<a2b2p=%r9),<d12=%xmm13
+mulpd 96(%r9),%xmm13
+
+# qhasm: float6464 r12 += d12
+# asm 1: addpd <d12=int6464#14,<r12=int6464#2
+# asm 2: addpd <d12=%xmm13,<r12=%xmm1
+addpd %xmm13,%xmm1
+
+# qhasm: t13 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t13=int6464#14
+# asm 2: movdqa <ab6=%xmm6,>t13=%xmm13
+movdqa %xmm6,%xmm13
+
+# qhasm: float6464 t13 *= *(int128 *)(b2a2p + 112)
+# asm 1: mulpd 112(<b2a2p=int64#3),<t13=int6464#14
+# asm 2: mulpd 112(<b2a2p=%rdx),<t13=%xmm13
+mulpd 112(%rdx),%xmm13
+
+# qhasm: float6464 r13 += t13
+# asm 1: addpd <t13=int6464#14,<r13=int6464#3
+# asm 2: addpd <t13=%xmm13,<r13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: d13 = cd6
+# asm 1: movdqa <cd6=int6464#8,>d13=int6464#14
+# asm 2: movdqa <cd6=%xmm7,>d13=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 d13 *= *(int128 *)(a2b2p + 112)
+# asm 1: mulpd 112(<a2b2p=int64#6),<d13=int6464#14
+# asm 2: mulpd 112(<a2b2p=%r9),<d13=%xmm13
+mulpd 112(%r9),%xmm13
+
+# qhasm: float6464 r13 += d13
+# asm 1: addpd <d13=int6464#14,<r13=int6464#3
+# asm 2: addpd <d13=%xmm13,<r13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: t14 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t14=int6464#14
+# asm 2: movdqa <ab6=%xmm6,>t14=%xmm13
+movdqa %xmm6,%xmm13
+
+# qhasm: float6464 t14 *= *(int128 *)(b2a2p + 128)
+# asm 1: mulpd 128(<b2a2p=int64#3),<t14=int6464#14
+# asm 2: mulpd 128(<b2a2p=%rdx),<t14=%xmm13
+mulpd 128(%rdx),%xmm13
+
+# qhasm: float6464 r14 += t14
+# asm 1: addpd <t14=int6464#14,<r14=int6464#4
+# asm 2: addpd <t14=%xmm13,<r14=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: d14 = cd6
+# asm 1: movdqa <cd6=int6464#8,>d14=int6464#14
+# asm 2: movdqa <cd6=%xmm7,>d14=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 d14 *= *(int128 *)(a2b2p + 128)
+# asm 1: mulpd 128(<a2b2p=int64#6),<d14=int6464#14
+# asm 2: mulpd 128(<a2b2p=%r9),<d14=%xmm13
+mulpd 128(%r9),%xmm13
+
+# qhasm: float6464 r14 += d14
+# asm 1: addpd <d14=int6464#14,<r14=int6464#4
+# asm 2: addpd <d14=%xmm13,<r14=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: t15 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t15=int6464#14
+# asm 2: movdqa <ab6=%xmm6,>t15=%xmm13
+movdqa %xmm6,%xmm13
+
+# qhasm: float6464 t15 *= *(int128 *)(b2a2p + 144)
+# asm 1: mulpd 144(<b2a2p=int64#3),<t15=int6464#14
+# asm 2: mulpd 144(<b2a2p=%rdx),<t15=%xmm13
+mulpd 144(%rdx),%xmm13
+
+# qhasm: float6464 r15 += t15
+# asm 1: addpd <t15=int6464#14,<r15=int6464#5
+# asm 2: addpd <t15=%xmm13,<r15=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: d15 = cd6
+# asm 1: movdqa <cd6=int6464#8,>d15=int6464#14
+# asm 2: movdqa <cd6=%xmm7,>d15=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 d15 *= *(int128 *)(a2b2p + 144)
+# asm 1: mulpd 144(<a2b2p=int64#6),<d15=int6464#14
+# asm 2: mulpd 144(<a2b2p=%r9),<d15=%xmm13
+mulpd 144(%r9),%xmm13
+
+# qhasm: float6464 r15 += d15
+# asm 1: addpd <d15=int6464#14,<r15=int6464#5
+# asm 2: addpd <d15=%xmm13,<r15=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: t16 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t16=int6464#14
+# asm 2: movdqa <ab6=%xmm6,>t16=%xmm13
+movdqa %xmm6,%xmm13
+
+# qhasm: float6464 t16 *= *(int128 *)(b2a2p + 160)
+# asm 1: mulpd 160(<b2a2p=int64#3),<t16=int6464#14
+# asm 2: mulpd 160(<b2a2p=%rdx),<t16=%xmm13
+mulpd 160(%rdx),%xmm13
+
+# qhasm: float6464 r16 += t16
+# asm 1: addpd <t16=int6464#14,<r16=int6464#6
+# asm 2: addpd <t16=%xmm13,<r16=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: d16 = cd6
+# asm 1: movdqa <cd6=int6464#8,>d16=int6464#14
+# asm 2: movdqa <cd6=%xmm7,>d16=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 d16 *= *(int128 *)(a2b2p + 160)
+# asm 1: mulpd 160(<a2b2p=int64#6),<d16=int6464#14
+# asm 2: mulpd 160(<a2b2p=%r9),<d16=%xmm13
+mulpd 160(%r9),%xmm13
+
+# qhasm: float6464 r16 += d16
+# asm 1: addpd <d16=int6464#14,<r16=int6464#6
+# asm 2: addpd <d16=%xmm13,<r16=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: r17 = ab6
+# asm 1: movdqa <ab6=int6464#7,>r17=int6464#7
+# asm 2: movdqa <ab6=%xmm6,>r17=%xmm6
+movdqa %xmm6,%xmm6
+
+# qhasm: float6464 r17 *= *(int128 *)(b2a2p + 176)
+# asm 1: mulpd 176(<b2a2p=int64#3),<r17=int6464#7
+# asm 2: mulpd 176(<b2a2p=%rdx),<r17=%xmm6
+mulpd 176(%rdx),%xmm6
+
+# qhasm: d17 = cd6
+# asm 1: movdqa <cd6=int6464#8,>d17=int6464#8
+# asm 2: movdqa <cd6=%xmm7,>d17=%xmm7
+movdqa %xmm7,%xmm7
+
+# qhasm: float6464 d17 *= *(int128 *)(a2b2p + 176)
+# asm 1: mulpd 176(<a2b2p=int64#6),<d17=int6464#8
+# asm 2: mulpd 176(<a2b2p=%r9),<d17=%xmm7
+mulpd 176(%r9),%xmm7
+
+# qhasm: float6464 r17 += d17
+# asm 1: addpd <d17=int6464#8,<r17=int6464#7
+# asm 2: addpd <d17=%xmm7,<r17=%xmm6
+addpd %xmm7,%xmm6
+
+# qhasm: *(int128 *)(b1b1p + 96) = r6
+# asm 1: movdqa <r6=int6464#9,96(<b1b1p=int64#4)
+# asm 2: movdqa <r6=%xmm8,96(<b1b1p=%rcx)
+movdqa %xmm8,96(%rcx)
+
+# qhasm: ab7 = *(int128 *)(b1b1p + 112)
+# asm 1: movdqa 112(<b1b1p=int64#4),>ab7=int6464#8
+# asm 2: movdqa 112(<b1b1p=%rcx),>ab7=%xmm7
+movdqa 112(%rcx),%xmm7
+
+# qhasm: cd7 = *(int128 *)(ma1a1p + 112)
+# asm 1: movdqa 112(<ma1a1p=int64#5),>cd7=int6464#9
+# asm 2: movdqa 112(<ma1a1p=%r8),>cd7=%xmm8
+movdqa 112(%r8),%xmm8
+
+# qhasm: ab7six = ab7
+# asm 1: movdqa <ab7=int6464#8,>ab7six=int6464#14
+# asm 2: movdqa <ab7=%xmm7,>ab7six=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: cd7six = cd7
+# asm 1: movdqa <cd7=int6464#9,>cd7six=int6464#15
+# asm 2: movdqa <cd7=%xmm8,>cd7six=%xmm14
+movdqa %xmm8,%xmm14
+
+# qhasm: float6464 ab7six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab7six=int6464#14
+# asm 2: mulpd SIX_SIX,<ab7six=%xmm13
+mulpd SIX_SIX,%xmm13
+
+# qhasm: float6464 cd7six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<cd7six=int6464#15
+# asm 2: mulpd SIX_SIX,<cd7six=%xmm14
+mulpd SIX_SIX,%xmm14
+
+# qhasm: t7 = ab7
+# asm 1: movdqa <ab7=int6464#8,>t7=int6464#16
+# asm 2: movdqa <ab7=%xmm7,>t7=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm: float6464 t7 *= *(int128 *)(b2a2p + 0)
+# asm 1: mulpd 0(<b2a2p=int64#3),<t7=int6464#16
+# asm 2: mulpd 0(<b2a2p=%rdx),<t7=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r7 += t7
+# asm 1: addpd <t7=int6464#16,<r7=int6464#10
+# asm 2: addpd <t7=%xmm15,<r7=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: d7 = cd7
+# asm 1: movdqa <cd7=int6464#9,>d7=int6464#16
+# asm 2: movdqa <cd7=%xmm8,>d7=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm: float6464 d7 *= *(int128 *)(a2b2p + 0)
+# asm 1: mulpd 0(<a2b2p=int64#6),<d7=int6464#16
+# asm 2: mulpd 0(<a2b2p=%r9),<d7=%xmm15
+mulpd 0(%r9),%xmm15
+
+# qhasm: float6464 r7 += d7
+# asm 1: addpd <d7=int6464#16,<r7=int6464#10
+# asm 2: addpd <d7=%xmm15,<r7=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: t13 = ab7
+# asm 1: movdqa <ab7=int6464#8,>t13=int6464#8
+# asm 2: movdqa <ab7=%xmm7,>t13=%xmm7
+movdqa %xmm7,%xmm7
+
+# qhasm: float6464 t13 *= *(int128 *)(b2a2p + 96)
+# asm 1: mulpd 96(<b2a2p=int64#3),<t13=int6464#8
+# asm 2: mulpd 96(<b2a2p=%rdx),<t13=%xmm7
+mulpd 96(%rdx),%xmm7
+
+# qhasm: float6464 r13 += t13
+# asm 1: addpd <t13=int6464#8,<r13=int6464#3
+# asm 2: addpd <t13=%xmm7,<r13=%xmm2
+addpd %xmm7,%xmm2
+
+# qhasm: d13 = cd7
+# asm 1: movdqa <cd7=int6464#9,>d13=int6464#8
+# asm 2: movdqa <cd7=%xmm8,>d13=%xmm7
+movdqa %xmm8,%xmm7
+
+# qhasm: float6464 d13 *= *(int128 *)(a2b2p + 96)
+# asm 1: mulpd 96(<a2b2p=int64#6),<d13=int6464#8
+# asm 2: mulpd 96(<a2b2p=%r9),<d13=%xmm7
+mulpd 96(%r9),%xmm7
+
+# qhasm: float6464 r13 += d13
+# asm 1: addpd <d13=int6464#8,<r13=int6464#3
+# asm 2: addpd <d13=%xmm7,<r13=%xmm2
+addpd %xmm7,%xmm2
+
+# qhasm: t8 = ab7six
+# asm 1: movdqa <ab7six=int6464#14,>t8=int6464#8
+# asm 2: movdqa <ab7six=%xmm13,>t8=%xmm7
+movdqa %xmm13,%xmm7
+
+# qhasm: float6464 t8 *= *(int128 *)(b2a2p + 16)
+# asm 1: mulpd 16(<b2a2p=int64#3),<t8=int6464#8
+# asm 2: mulpd 16(<b2a2p=%rdx),<t8=%xmm7
+mulpd 16(%rdx),%xmm7
+
+# qhasm: float6464 r8 += t8
+# asm 1: addpd <t8=int6464#8,<r8=int6464#11
+# asm 2: addpd <t8=%xmm7,<r8=%xmm10
+addpd %xmm7,%xmm10
+
+# qhasm: d8 = cd7six
+# asm 1: movdqa <cd7six=int6464#15,>d8=int6464#8
+# asm 2: movdqa <cd7six=%xmm14,>d8=%xmm7
+movdqa %xmm14,%xmm7
+
+# qhasm: float6464 d8 *= *(int128 *)(a2b2p + 16)
+# asm 1: mulpd 16(<a2b2p=int64#6),<d8=int6464#8
+# asm 2: mulpd 16(<a2b2p=%r9),<d8=%xmm7
+mulpd 16(%r9),%xmm7
+
+# qhasm: float6464 r8 += d8
+# asm 1: addpd <d8=int6464#8,<r8=int6464#11
+# asm 2: addpd <d8=%xmm7,<r8=%xmm10
+addpd %xmm7,%xmm10
+
+# qhasm: t9 = ab7six
+# asm 1: movdqa <ab7six=int6464#14,>t9=int6464#8
+# asm 2: movdqa <ab7six=%xmm13,>t9=%xmm7
+movdqa %xmm13,%xmm7
+
+# qhasm: float6464 t9 *= *(int128 *)(b2a2p + 32)
+# asm 1: mulpd 32(<b2a2p=int64#3),<t9=int6464#8
+# asm 2: mulpd 32(<b2a2p=%rdx),<t9=%xmm7
+mulpd 32(%rdx),%xmm7
+
+# qhasm: float6464 r9 += t9
+# asm 1: addpd <t9=int6464#8,<r9=int6464#12
+# asm 2: addpd <t9=%xmm7,<r9=%xmm11
+addpd %xmm7,%xmm11
+
+# qhasm: d9 = cd7six
+# asm 1: movdqa <cd7six=int6464#15,>d9=int6464#8
+# asm 2: movdqa <cd7six=%xmm14,>d9=%xmm7
+movdqa %xmm14,%xmm7
+
+# qhasm: float6464 d9 *= *(int128 *)(a2b2p + 32)
+# asm 1: mulpd 32(<a2b2p=int64#6),<d9=int6464#8
+# asm 2: mulpd 32(<a2b2p=%r9),<d9=%xmm7
+mulpd 32(%r9),%xmm7
+
+# qhasm: float6464 r9 += d9
+# asm 1: addpd <d9=int6464#8,<r9=int6464#12
+# asm 2: addpd <d9=%xmm7,<r9=%xmm11
+addpd %xmm7,%xmm11
+
+# qhasm: t10 = ab7six
+# asm 1: movdqa <ab7six=int6464#14,>t10=int6464#8
+# asm 2: movdqa <ab7six=%xmm13,>t10=%xmm7
+movdqa %xmm13,%xmm7
+
+# qhasm: float6464 t10 *= *(int128 *)(b2a2p + 48)
+# asm 1: mulpd 48(<b2a2p=int64#3),<t10=int6464#8
+# asm 2: mulpd 48(<b2a2p=%rdx),<t10=%xmm7
+mulpd 48(%rdx),%xmm7
+
+# qhasm: float6464 r10 += t10
+# asm 1: addpd <t10=int6464#8,<r10=int6464#13
+# asm 2: addpd <t10=%xmm7,<r10=%xmm12
+addpd %xmm7,%xmm12
+
+# qhasm: d10 = cd7six
+# asm 1: movdqa <cd7six=int6464#15,>d10=int6464#8
+# asm 2: movdqa <cd7six=%xmm14,>d10=%xmm7
+movdqa %xmm14,%xmm7
+
+# qhasm: float6464 d10 *= *(int128 *)(a2b2p + 48)
+# asm 1: mulpd 48(<a2b2p=int64#6),<d10=int6464#8
+# asm 2: mulpd 48(<a2b2p=%r9),<d10=%xmm7
+mulpd 48(%r9),%xmm7
+
+# qhasm: float6464 r10 += d10
+# asm 1: addpd <d10=int6464#8,<r10=int6464#13
+# asm 2: addpd <d10=%xmm7,<r10=%xmm12
+addpd %xmm7,%xmm12
+
+# qhasm: t11 = ab7six
+# asm 1: movdqa <ab7six=int6464#14,>t11=int6464#8
+# asm 2: movdqa <ab7six=%xmm13,>t11=%xmm7
+movdqa %xmm13,%xmm7
+
+# qhasm: float6464 t11 *= *(int128 *)(b2a2p + 64)
+# asm 1: mulpd 64(<b2a2p=int64#3),<t11=int6464#8
+# asm 2: mulpd 64(<b2a2p=%rdx),<t11=%xmm7
+mulpd 64(%rdx),%xmm7
+
+# qhasm: float6464 r11 += t11
+# asm 1: addpd <t11=int6464#8,<r11=int6464#1
+# asm 2: addpd <t11=%xmm7,<r11=%xmm0
+addpd %xmm7,%xmm0
+
+# qhasm: d11 = cd7six
+# asm 1: movdqa <cd7six=int6464#15,>d11=int6464#8
+# asm 2: movdqa <cd7six=%xmm14,>d11=%xmm7
+movdqa %xmm14,%xmm7
+
+# qhasm: float6464 d11 *= *(int128 *)(a2b2p + 64)
+# asm 1: mulpd 64(<a2b2p=int64#6),<d11=int6464#8
+# asm 2: mulpd 64(<a2b2p=%r9),<d11=%xmm7
+mulpd 64(%r9),%xmm7
+
+# qhasm: float6464 r11 += d11
+# asm 1: addpd <d11=int6464#8,<r11=int6464#1
+# asm 2: addpd <d11=%xmm7,<r11=%xmm0
+addpd %xmm7,%xmm0
+
+# qhasm: t12 = ab7six
+# asm 1: movdqa <ab7six=int6464#14,>t12=int6464#8
+# asm 2: movdqa <ab7six=%xmm13,>t12=%xmm7
+movdqa %xmm13,%xmm7
+
+# qhasm: float6464 t12 *= *(int128 *)(b2a2p + 80)
+# asm 1: mulpd 80(<b2a2p=int64#3),<t12=int6464#8
+# asm 2: mulpd 80(<b2a2p=%rdx),<t12=%xmm7
+mulpd 80(%rdx),%xmm7
+
+# qhasm: float6464 r12 += t12
+# asm 1: addpd <t12=int6464#8,<r12=int6464#2
+# asm 2: addpd <t12=%xmm7,<r12=%xmm1
+addpd %xmm7,%xmm1
+
+# qhasm: d12 = cd7six
+# asm 1: movdqa <cd7six=int6464#15,>d12=int6464#8
+# asm 2: movdqa <cd7six=%xmm14,>d12=%xmm7
+movdqa %xmm14,%xmm7
+
+# qhasm: float6464 d12 *= *(int128 *)(a2b2p + 80)
+# asm 1: mulpd 80(<a2b2p=int64#6),<d12=int6464#8
+# asm 2: mulpd 80(<a2b2p=%r9),<d12=%xmm7
+mulpd 80(%r9),%xmm7
+
+# qhasm: float6464 r12 += d12
+# asm 1: addpd <d12=int6464#8,<r12=int6464#2
+# asm 2: addpd <d12=%xmm7,<r12=%xmm1
+addpd %xmm7,%xmm1
+
+# qhasm: t14 = ab7six
+# asm 1: movdqa <ab7six=int6464#14,>t14=int6464#8
+# asm 2: movdqa <ab7six=%xmm13,>t14=%xmm7
+movdqa %xmm13,%xmm7
+
+# qhasm: float6464 t14 *= *(int128 *)(b2a2p + 112)
+# asm 1: mulpd 112(<b2a2p=int64#3),<t14=int6464#8
+# asm 2: mulpd 112(<b2a2p=%rdx),<t14=%xmm7
+mulpd 112(%rdx),%xmm7
+
+# qhasm: float6464 r14 += t14
+# asm 1: addpd <t14=int6464#8,<r14=int6464#4
+# asm 2: addpd <t14=%xmm7,<r14=%xmm3
+addpd %xmm7,%xmm3
+
+# qhasm: d14 = cd7six
+# asm 1: movdqa <cd7six=int6464#15,>d14=int6464#8
+# asm 2: movdqa <cd7six=%xmm14,>d14=%xmm7
+movdqa %xmm14,%xmm7
+
+# qhasm: float6464 d14 *= *(int128 *)(a2b2p + 112)
+# asm 1: mulpd 112(<a2b2p=int64#6),<d14=int6464#8
+# asm 2: mulpd 112(<a2b2p=%r9),<d14=%xmm7
+mulpd 112(%r9),%xmm7
+
+# qhasm: float6464 r14 += d14
+# asm 1: addpd <d14=int6464#8,<r14=int6464#4
+# asm 2: addpd <d14=%xmm7,<r14=%xmm3
+addpd %xmm7,%xmm3
+
+# qhasm: t15 = ab7six
+# asm 1: movdqa <ab7six=int6464#14,>t15=int6464#8
+# asm 2: movdqa <ab7six=%xmm13,>t15=%xmm7
+movdqa %xmm13,%xmm7
+
+# qhasm: float6464 t15 *= *(int128 *)(b2a2p + 128)
+# asm 1: mulpd 128(<b2a2p=int64#3),<t15=int6464#8
+# asm 2: mulpd 128(<b2a2p=%rdx),<t15=%xmm7
+mulpd 128(%rdx),%xmm7
+
+# qhasm: float6464 r15 += t15
+# asm 1: addpd <t15=int6464#8,<r15=int6464#5
+# asm 2: addpd <t15=%xmm7,<r15=%xmm4
+addpd %xmm7,%xmm4
+
+# qhasm: d15 = cd7six
+# asm 1: movdqa <cd7six=int6464#15,>d15=int6464#8
+# asm 2: movdqa <cd7six=%xmm14,>d15=%xmm7
+movdqa %xmm14,%xmm7
+
+# qhasm: float6464 d15 *= *(int128 *)(a2b2p + 128)
+# asm 1: mulpd 128(<a2b2p=int64#6),<d15=int6464#8
+# asm 2: mulpd 128(<a2b2p=%r9),<d15=%xmm7
+mulpd 128(%r9),%xmm7
+
+# qhasm: float6464 r15 += d15
+# asm 1: addpd <d15=int6464#8,<r15=int6464#5
+# asm 2: addpd <d15=%xmm7,<r15=%xmm4
+addpd %xmm7,%xmm4
+
+# qhasm: t16 = ab7six
+# asm 1: movdqa <ab7six=int6464#14,>t16=int6464#8
+# asm 2: movdqa <ab7six=%xmm13,>t16=%xmm7
+movdqa %xmm13,%xmm7
+
+# qhasm: float6464 t16 *= *(int128 *)(b2a2p + 144)
+# asm 1: mulpd 144(<b2a2p=int64#3),<t16=int6464#8
+# asm 2: mulpd 144(<b2a2p=%rdx),<t16=%xmm7
+mulpd 144(%rdx),%xmm7
+
+# qhasm: float6464 r16 += t16
+# asm 1: addpd <t16=int6464#8,<r16=int6464#6
+# asm 2: addpd <t16=%xmm7,<r16=%xmm5
+addpd %xmm7,%xmm5
+
+# qhasm: d16 = cd7six
+# asm 1: movdqa <cd7six=int6464#15,>d16=int6464#8
+# asm 2: movdqa <cd7six=%xmm14,>d16=%xmm7
+movdqa %xmm14,%xmm7
+
+# qhasm: float6464 d16 *= *(int128 *)(a2b2p + 144)
+# asm 1: mulpd 144(<a2b2p=int64#6),<d16=int6464#8
+# asm 2: mulpd 144(<a2b2p=%r9),<d16=%xmm7
+mulpd 144(%r9),%xmm7
+
+# qhasm: float6464 r16 += d16
+# asm 1: addpd <d16=int6464#8,<r16=int6464#6
+# asm 2: addpd <d16=%xmm7,<r16=%xmm5
+addpd %xmm7,%xmm5
+
+# qhasm: t17 = ab7six
+# asm 1: movdqa <ab7six=int6464#14,>t17=int6464#8
+# asm 2: movdqa <ab7six=%xmm13,>t17=%xmm7
+movdqa %xmm13,%xmm7
+
+# qhasm: float6464 t17 *= *(int128 *)(b2a2p + 160)
+# asm 1: mulpd 160(<b2a2p=int64#3),<t17=int6464#8
+# asm 2: mulpd 160(<b2a2p=%rdx),<t17=%xmm7
+mulpd 160(%rdx),%xmm7
+
+# qhasm: float6464 r17 += t17
+# asm 1: addpd <t17=int6464#8,<r17=int6464#7
+# asm 2: addpd <t17=%xmm7,<r17=%xmm6
+addpd %xmm7,%xmm6
+
+# qhasm: d17 = cd7six
+# asm 1: movdqa <cd7six=int6464#15,>d17=int6464#8
+# asm 2: movdqa <cd7six=%xmm14,>d17=%xmm7
+movdqa %xmm14,%xmm7
+
+# qhasm: float6464 d17 *= *(int128 *)(a2b2p + 160)
+# asm 1: mulpd 160(<a2b2p=int64#6),<d17=int6464#8
+# asm 2: mulpd 160(<a2b2p=%r9),<d17=%xmm7
+mulpd 160(%r9),%xmm7
+
+# qhasm: float6464 r17 += d17
+# asm 1: addpd <d17=int6464#8,<r17=int6464#7
+# asm 2: addpd <d17=%xmm7,<r17=%xmm6
+addpd %xmm7,%xmm6
+
+# qhasm: r18 = ab7six
+# asm 1: movdqa <ab7six=int6464#14,>r18=int6464#8
+# asm 2: movdqa <ab7six=%xmm13,>r18=%xmm7
+movdqa %xmm13,%xmm7
+
+# qhasm: float6464 r18 *= *(int128 *)(b2a2p + 176)
+# asm 1: mulpd 176(<b2a2p=int64#3),<r18=int6464#8
+# asm 2: mulpd 176(<b2a2p=%rdx),<r18=%xmm7
+mulpd 176(%rdx),%xmm7
+
+# qhasm: d18 = cd7six
+# asm 1: movdqa <cd7six=int6464#15,>d18=int6464#9
+# asm 2: movdqa <cd7six=%xmm14,>d18=%xmm8
+movdqa %xmm14,%xmm8
+
+# qhasm: float6464 d18 *= *(int128 *)(a2b2p + 176)
+# asm 1: mulpd 176(<a2b2p=int64#6),<d18=int6464#9
+# asm 2: mulpd 176(<a2b2p=%r9),<d18=%xmm8
+mulpd 176(%r9),%xmm8
+
+# qhasm: float6464 r18 += d18
+# asm 1: addpd <d18=int6464#9,<r18=int6464#8
+# asm 2: addpd <d18=%xmm8,<r18=%xmm7
+addpd %xmm8,%xmm7
+
+# qhasm: *(int128 *)(b1b1p + 112) = r7
+# asm 1: movdqa <r7=int6464#10,112(<b1b1p=int64#4)
+# asm 2: movdqa <r7=%xmm9,112(<b1b1p=%rcx)
+movdqa %xmm9,112(%rcx)
+
+# qhasm: ab8 = *(int128 *)(b1b1p + 128)
+# asm 1: movdqa 128(<b1b1p=int64#4),>ab8=int6464#9
+# asm 2: movdqa 128(<b1b1p=%rcx),>ab8=%xmm8
+movdqa 128(%rcx),%xmm8
+
+# qhasm: cd8 = *(int128 *)(ma1a1p + 128)
+# asm 1: movdqa 128(<ma1a1p=int64#5),>cd8=int6464#10
+# asm 2: movdqa 128(<ma1a1p=%r8),>cd8=%xmm9
+movdqa 128(%r8),%xmm9
+
+# qhasm: ab8six = ab8
+# asm 1: movdqa <ab8=int6464#9,>ab8six=int6464#14
+# asm 2: movdqa <ab8=%xmm8,>ab8six=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm: cd8six = cd8
+# asm 1: movdqa <cd8=int6464#10,>cd8six=int6464#15
+# asm 2: movdqa <cd8=%xmm9,>cd8six=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm: float6464 ab8six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab8six=int6464#14
+# asm 2: mulpd SIX_SIX,<ab8six=%xmm13
+mulpd SIX_SIX,%xmm13
+
+# qhasm: float6464 cd8six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<cd8six=int6464#15
+# asm 2: mulpd SIX_SIX,<cd8six=%xmm14
+mulpd SIX_SIX,%xmm14
+
+# qhasm: t8 = ab8
+# asm 1: movdqa <ab8=int6464#9,>t8=int6464#16
+# asm 2: movdqa <ab8=%xmm8,>t8=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm: float6464 t8 *= *(int128 *)(b2a2p + 0)
+# asm 1: mulpd 0(<b2a2p=int64#3),<t8=int6464#16
+# asm 2: mulpd 0(<b2a2p=%rdx),<t8=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r8 += t8
+# asm 1: addpd <t8=int6464#16,<r8=int6464#11
+# asm 2: addpd <t8=%xmm15,<r8=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: d8 = cd8
+# asm 1: movdqa <cd8=int6464#10,>d8=int6464#16
+# asm 2: movdqa <cd8=%xmm9,>d8=%xmm15
+movdqa %xmm9,%xmm15
+
+# qhasm: float6464 d8 *= *(int128 *)(a2b2p + 0)
+# asm 1: mulpd 0(<a2b2p=int64#6),<d8=int6464#16
+# asm 2: mulpd 0(<a2b2p=%r9),<d8=%xmm15
+mulpd 0(%r9),%xmm15
+
+# qhasm: float6464 r8 += d8
+# asm 1: addpd <d8=int6464#16,<r8=int6464#11
+# asm 2: addpd <d8=%xmm15,<r8=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: t13 = ab8
+# asm 1: movdqa <ab8=int6464#9,>t13=int6464#16
+# asm 2: movdqa <ab8=%xmm8,>t13=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm: float6464 t13 *= *(int128 *)(b2a2p + 80)
+# asm 1: mulpd 80(<b2a2p=int64#3),<t13=int6464#16
+# asm 2: mulpd 80(<b2a2p=%rdx),<t13=%xmm15
+mulpd 80(%rdx),%xmm15
+
+# qhasm: float6464 r13 += t13
+# asm 1: addpd <t13=int6464#16,<r13=int6464#3
+# asm 2: addpd <t13=%xmm15,<r13=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: d13 = cd8
+# asm 1: movdqa <cd8=int6464#10,>d13=int6464#16
+# asm 2: movdqa <cd8=%xmm9,>d13=%xmm15
+movdqa %xmm9,%xmm15
+
+# qhasm: float6464 d13 *= *(int128 *)(a2b2p + 80)
+# asm 1: mulpd 80(<a2b2p=int64#6),<d13=int6464#16
+# asm 2: mulpd 80(<a2b2p=%r9),<d13=%xmm15
+mulpd 80(%r9),%xmm15
+
+# qhasm: float6464 r13 += d13
+# asm 1: addpd <d13=int6464#16,<r13=int6464#3
+# asm 2: addpd <d13=%xmm15,<r13=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: t14 = ab8
+# asm 1: movdqa <ab8=int6464#9,>t14=int6464#16
+# asm 2: movdqa <ab8=%xmm8,>t14=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm: float6464 t14 *= *(int128 *)(b2a2p + 96)
+# asm 1: mulpd 96(<b2a2p=int64#3),<t14=int6464#16
+# asm 2: mulpd 96(<b2a2p=%rdx),<t14=%xmm15
+mulpd 96(%rdx),%xmm15
+
+# qhasm: float6464 r14 += t14
+# asm 1: addpd <t14=int6464#16,<r14=int6464#4
+# asm 2: addpd <t14=%xmm15,<r14=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: d14 = cd8
+# asm 1: movdqa <cd8=int6464#10,>d14=int6464#16
+# asm 2: movdqa <cd8=%xmm9,>d14=%xmm15
+movdqa %xmm9,%xmm15
+
+# qhasm: float6464 d14 *= *(int128 *)(a2b2p + 96)
+# asm 1: mulpd 96(<a2b2p=int64#6),<d14=int6464#16
+# asm 2: mulpd 96(<a2b2p=%r9),<d14=%xmm15
+mulpd 96(%r9),%xmm15
+
+# qhasm: float6464 r14 += d14
+# asm 1: addpd <d14=int6464#16,<r14=int6464#4
+# asm 2: addpd <d14=%xmm15,<r14=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: r19 = ab8
+# asm 1: movdqa <ab8=int6464#9,>r19=int6464#9
+# asm 2: movdqa <ab8=%xmm8,>r19=%xmm8
+movdqa %xmm8,%xmm8
+
+# qhasm: float6464 r19 *= *(int128 *)(b2a2p + 176)
+# asm 1: mulpd 176(<b2a2p=int64#3),<r19=int6464#9
+# asm 2: mulpd 176(<b2a2p=%rdx),<r19=%xmm8
+mulpd 176(%rdx),%xmm8
+
+# qhasm: d19 = cd8
+# asm 1: movdqa <cd8=int6464#10,>d19=int6464#10
+# asm 2: movdqa <cd8=%xmm9,>d19=%xmm9
+movdqa %xmm9,%xmm9
+
+# qhasm: float6464 d19 *= *(int128 *)(a2b2p + 176)
+# asm 1: mulpd 176(<a2b2p=int64#6),<d19=int6464#10
+# asm 2: mulpd 176(<a2b2p=%r9),<d19=%xmm9
+mulpd 176(%r9),%xmm9
+
+# qhasm: float6464 r19 += d19
+# asm 1: addpd <d19=int6464#10,<r19=int6464#9
+# asm 2: addpd <d19=%xmm9,<r19=%xmm8
+addpd %xmm9,%xmm8
+
+# qhasm: t9 = ab8six
+# asm 1: movdqa <ab8six=int6464#14,>t9=int6464#10
+# asm 2: movdqa <ab8six=%xmm13,>t9=%xmm9
+movdqa %xmm13,%xmm9
+
+# qhasm: float6464 t9 *= *(int128 *)(b2a2p + 16)
+# asm 1: mulpd 16(<b2a2p=int64#3),<t9=int6464#10
+# asm 2: mulpd 16(<b2a2p=%rdx),<t9=%xmm9
+mulpd 16(%rdx),%xmm9
+
+# qhasm: float6464 r9 += t9
+# asm 1: addpd <t9=int6464#10,<r9=int6464#12
+# asm 2: addpd <t9=%xmm9,<r9=%xmm11
+addpd %xmm9,%xmm11
+
+# qhasm: d9 = cd8six
+# asm 1: movdqa <cd8six=int6464#15,>d9=int6464#10
+# asm 2: movdqa <cd8six=%xmm14,>d9=%xmm9
+movdqa %xmm14,%xmm9
+
+# qhasm: float6464 d9 *= *(int128 *)(a2b2p + 16)
+# asm 1: mulpd 16(<a2b2p=int64#6),<d9=int6464#10
+# asm 2: mulpd 16(<a2b2p=%r9),<d9=%xmm9
+mulpd 16(%r9),%xmm9
+
+# qhasm: float6464 r9 += d9
+# asm 1: addpd <d9=int6464#10,<r9=int6464#12
+# asm 2: addpd <d9=%xmm9,<r9=%xmm11
+addpd %xmm9,%xmm11
+
+# qhasm: t10 = ab8six
+# asm 1: movdqa <ab8six=int6464#14,>t10=int6464#10
+# asm 2: movdqa <ab8six=%xmm13,>t10=%xmm9
+movdqa %xmm13,%xmm9
+
+# qhasm: float6464 t10 *= *(int128 *)(b2a2p + 32)
+# asm 1: mulpd 32(<b2a2p=int64#3),<t10=int6464#10
+# asm 2: mulpd 32(<b2a2p=%rdx),<t10=%xmm9
+mulpd 32(%rdx),%xmm9
+
+# qhasm: float6464 r10 += t10
+# asm 1: addpd <t10=int6464#10,<r10=int6464#13
+# asm 2: addpd <t10=%xmm9,<r10=%xmm12
+addpd %xmm9,%xmm12
+
+# qhasm: d10 = cd8six
+# asm 1: movdqa <cd8six=int6464#15,>d10=int6464#10
+# asm 2: movdqa <cd8six=%xmm14,>d10=%xmm9
+movdqa %xmm14,%xmm9
+
+# qhasm: float6464 d10 *= *(int128 *)(a2b2p + 32)
+# asm 1: mulpd 32(<a2b2p=int64#6),<d10=int6464#10
+# asm 2: mulpd 32(<a2b2p=%r9),<d10=%xmm9
+mulpd 32(%r9),%xmm9
+
+# qhasm: float6464 r10 += d10
+# asm 1: addpd <d10=int6464#10,<r10=int6464#13
+# asm 2: addpd <d10=%xmm9,<r10=%xmm12
+addpd %xmm9,%xmm12
+
+# qhasm: t11 = ab8six
+# asm 1: movdqa <ab8six=int6464#14,>t11=int6464#10
+# asm 2: movdqa <ab8six=%xmm13,>t11=%xmm9
+movdqa %xmm13,%xmm9
+
+# qhasm: float6464 t11 *= *(int128 *)(b2a2p + 48)
+# asm 1: mulpd 48(<b2a2p=int64#3),<t11=int6464#10
+# asm 2: mulpd 48(<b2a2p=%rdx),<t11=%xmm9
+mulpd 48(%rdx),%xmm9
+
+# qhasm: float6464 r11 += t11
+# asm 1: addpd <t11=int6464#10,<r11=int6464#1
+# asm 2: addpd <t11=%xmm9,<r11=%xmm0
+addpd %xmm9,%xmm0
+
+# qhasm: d11 = cd8six
+# asm 1: movdqa <cd8six=int6464#15,>d11=int6464#10
+# asm 2: movdqa <cd8six=%xmm14,>d11=%xmm9
+movdqa %xmm14,%xmm9
+
+# qhasm: float6464 d11 *= *(int128 *)(a2b2p + 48)
+# asm 1: mulpd 48(<a2b2p=int64#6),<d11=int6464#10
+# asm 2: mulpd 48(<a2b2p=%r9),<d11=%xmm9
+mulpd 48(%r9),%xmm9
+
+# qhasm: float6464 r11 += d11
+# asm 1: addpd <d11=int6464#10,<r11=int6464#1
+# asm 2: addpd <d11=%xmm9,<r11=%xmm0
+addpd %xmm9,%xmm0
+
+# qhasm: t12 = ab8six
+# asm 1: movdqa <ab8six=int6464#14,>t12=int6464#10
+# asm 2: movdqa <ab8six=%xmm13,>t12=%xmm9
+movdqa %xmm13,%xmm9
+
+# qhasm: float6464 t12 *= *(int128 *)(b2a2p + 64)
+# asm 1: mulpd 64(<b2a2p=int64#3),<t12=int6464#10
+# asm 2: mulpd 64(<b2a2p=%rdx),<t12=%xmm9
+mulpd 64(%rdx),%xmm9
+
+# qhasm: float6464 r12 += t12
+# asm 1: addpd <t12=int6464#10,<r12=int6464#2
+# asm 2: addpd <t12=%xmm9,<r12=%xmm1
+addpd %xmm9,%xmm1
+
+# qhasm: d12 = cd8six
+# asm 1: movdqa <cd8six=int6464#15,>d12=int6464#10
+# asm 2: movdqa <cd8six=%xmm14,>d12=%xmm9
+movdqa %xmm14,%xmm9
+
+# qhasm: float6464 d12 *= *(int128 *)(a2b2p + 64)
+# asm 1: mulpd 64(<a2b2p=int64#6),<d12=int6464#10
+# asm 2: mulpd 64(<a2b2p=%r9),<d12=%xmm9
+mulpd 64(%r9),%xmm9
+
+# qhasm: float6464 r12 += d12
+# asm 1: addpd <d12=int6464#10,<r12=int6464#2
+# asm 2: addpd <d12=%xmm9,<r12=%xmm1
+addpd %xmm9,%xmm1
+
+# qhasm: t15 = ab8six
+# asm 1: movdqa <ab8six=int6464#14,>t15=int6464#10
+# asm 2: movdqa <ab8six=%xmm13,>t15=%xmm9
+movdqa %xmm13,%xmm9
+
+# qhasm: float6464 t15 *= *(int128 *)(b2a2p + 112)
+# asm 1: mulpd 112(<b2a2p=int64#3),<t15=int6464#10
+# asm 2: mulpd 112(<b2a2p=%rdx),<t15=%xmm9
+mulpd 112(%rdx),%xmm9
+
+# qhasm: float6464 r15 += t15
+# asm 1: addpd <t15=int6464#10,<r15=int6464#5
+# asm 2: addpd <t15=%xmm9,<r15=%xmm4
+addpd %xmm9,%xmm4
+
+# qhasm: d15 = cd8six
+# asm 1: movdqa <cd8six=int6464#15,>d15=int6464#10
+# asm 2: movdqa <cd8six=%xmm14,>d15=%xmm9
+movdqa %xmm14,%xmm9
+
+# qhasm: float6464 d15 *= *(int128 *)(a2b2p + 112)
+# asm 1: mulpd 112(<a2b2p=int64#6),<d15=int6464#10
+# asm 2: mulpd 112(<a2b2p=%r9),<d15=%xmm9
+mulpd 112(%r9),%xmm9
+
+# qhasm: float6464 r15 += d15
+# asm 1: addpd <d15=int6464#10,<r15=int6464#5
+# asm 2: addpd <d15=%xmm9,<r15=%xmm4
+addpd %xmm9,%xmm4
+
+# qhasm: t16 = ab8six
+# asm 1: movdqa <ab8six=int6464#14,>t16=int6464#10
+# asm 2: movdqa <ab8six=%xmm13,>t16=%xmm9
+movdqa %xmm13,%xmm9
+
+# qhasm: float6464 t16 *= *(int128 *)(b2a2p + 128)
+# asm 1: mulpd 128(<b2a2p=int64#3),<t16=int6464#10
+# asm 2: mulpd 128(<b2a2p=%rdx),<t16=%xmm9
+mulpd 128(%rdx),%xmm9
+
+# qhasm: float6464 r16 += t16
+# asm 1: addpd <t16=int6464#10,<r16=int6464#6
+# asm 2: addpd <t16=%xmm9,<r16=%xmm5
+addpd %xmm9,%xmm5
+
+# qhasm: d16 = cd8six
+# asm 1: movdqa <cd8six=int6464#15,>d16=int6464#10
+# asm 2: movdqa <cd8six=%xmm14,>d16=%xmm9
+movdqa %xmm14,%xmm9
+
+# qhasm: float6464 d16 *= *(int128 *)(a2b2p + 128)
+# asm 1: mulpd 128(<a2b2p=int64#6),<d16=int6464#10
+# asm 2: mulpd 128(<a2b2p=%r9),<d16=%xmm9
+mulpd 128(%r9),%xmm9
+
+# qhasm: float6464 r16 += d16
+# asm 1: addpd <d16=int6464#10,<r16=int6464#6
+# asm 2: addpd <d16=%xmm9,<r16=%xmm5
+addpd %xmm9,%xmm5
+
+# qhasm: t17 = ab8six
+# asm 1: movdqa <ab8six=int6464#14,>t17=int6464#10
+# asm 2: movdqa <ab8six=%xmm13,>t17=%xmm9
+movdqa %xmm13,%xmm9
+
+# qhasm: float6464 t17 *= *(int128 *)(b2a2p + 144)
+# asm 1: mulpd 144(<b2a2p=int64#3),<t17=int6464#10
+# asm 2: mulpd 144(<b2a2p=%rdx),<t17=%xmm9
+mulpd 144(%rdx),%xmm9
+
+# qhasm: float6464 r17 += t17
+# asm 1: addpd <t17=int6464#10,<r17=int6464#7
+# asm 2: addpd <t17=%xmm9,<r17=%xmm6
+addpd %xmm9,%xmm6
+
+# qhasm: d17 = cd8six
+# asm 1: movdqa <cd8six=int6464#15,>d17=int6464#10
+# asm 2: movdqa <cd8six=%xmm14,>d17=%xmm9
+movdqa %xmm14,%xmm9
+
+# qhasm: float6464 d17 *= *(int128 *)(a2b2p + 144)
+# asm 1: mulpd 144(<a2b2p=int64#6),<d17=int6464#10
+# asm 2: mulpd 144(<a2b2p=%r9),<d17=%xmm9
+mulpd 144(%r9),%xmm9
+
+# qhasm: float6464 r17 += d17
+# asm 1: addpd <d17=int6464#10,<r17=int6464#7
+# asm 2: addpd <d17=%xmm9,<r17=%xmm6
+addpd %xmm9,%xmm6
+
+# qhasm: t18 = ab8six
+# asm 1: movdqa <ab8six=int6464#14,>t18=int6464#10
+# asm 2: movdqa <ab8six=%xmm13,>t18=%xmm9
+movdqa %xmm13,%xmm9
+
+# qhasm: float6464 t18 *= *(int128 *)(b2a2p + 160)
+# asm 1: mulpd 160(<b2a2p=int64#3),<t18=int6464#10
+# asm 2: mulpd 160(<b2a2p=%rdx),<t18=%xmm9
+mulpd 160(%rdx),%xmm9
+
+# qhasm: float6464 r18 += t18
+# asm 1: addpd <t18=int6464#10,<r18=int6464#8
+# asm 2: addpd <t18=%xmm9,<r18=%xmm7
+addpd %xmm9,%xmm7
+
+# qhasm: d18 = cd8six
+# asm 1: movdqa <cd8six=int6464#15,>d18=int6464#10
+# asm 2: movdqa <cd8six=%xmm14,>d18=%xmm9
+movdqa %xmm14,%xmm9
+
+# qhasm: float6464 d18 *= *(int128 *)(a2b2p + 160)
+# asm 1: mulpd 160(<a2b2p=int64#6),<d18=int6464#10
+# asm 2: mulpd 160(<a2b2p=%r9),<d18=%xmm9
+mulpd 160(%r9),%xmm9
+
+# qhasm: float6464 r18 += d18
+# asm 1: addpd <d18=int6464#10,<r18=int6464#8
+# asm 2: addpd <d18=%xmm9,<r18=%xmm7
+addpd %xmm9,%xmm7
+
+# qhasm: *(int128 *)(b1b1p + 128) = r8
+# asm 1: movdqa <r8=int6464#11,128(<b1b1p=int64#4)
+# asm 2: movdqa <r8=%xmm10,128(<b1b1p=%rcx)
+movdqa %xmm10,128(%rcx)
+
+# qhasm: ab9 = *(int128 *)(b1b1p + 144)
+# asm 1: movdqa 144(<b1b1p=int64#4),>ab9=int6464#10
+# asm 2: movdqa 144(<b1b1p=%rcx),>ab9=%xmm9
+movdqa 144(%rcx),%xmm9
+
+# qhasm: cd9 = *(int128 *)(ma1a1p + 144)
+# asm 1: movdqa 144(<ma1a1p=int64#5),>cd9=int6464#11
+# asm 2: movdqa 144(<ma1a1p=%r8),>cd9=%xmm10
+movdqa 144(%r8),%xmm10
+
+# qhasm: ab9six = ab9
+# asm 1: movdqa <ab9=int6464#10,>ab9six=int6464#14
+# asm 2: movdqa <ab9=%xmm9,>ab9six=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: cd9six = cd9
+# asm 1: movdqa <cd9=int6464#11,>cd9six=int6464#15
+# asm 2: movdqa <cd9=%xmm10,>cd9six=%xmm14
+movdqa %xmm10,%xmm14
+
+# qhasm: float6464 ab9six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab9six=int6464#14
+# asm 2: mulpd SIX_SIX,<ab9six=%xmm13
+mulpd SIX_SIX,%xmm13
+
+# qhasm: float6464 cd9six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<cd9six=int6464#15
+# asm 2: mulpd SIX_SIX,<cd9six=%xmm14
+mulpd SIX_SIX,%xmm14
+
+# qhasm: t9 = ab9
+# asm 1: movdqa <ab9=int6464#10,>t9=int6464#16
+# asm 2: movdqa <ab9=%xmm9,>t9=%xmm15
+movdqa %xmm9,%xmm15
+
+# qhasm: float6464 t9 *= *(int128 *)(b2a2p + 0)
+# asm 1: mulpd 0(<b2a2p=int64#3),<t9=int6464#16
+# asm 2: mulpd 0(<b2a2p=%rdx),<t9=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r9 += t9
+# asm 1: addpd <t9=int6464#16,<r9=int6464#12
+# asm 2: addpd <t9=%xmm15,<r9=%xmm11
+addpd %xmm15,%xmm11
+
+# qhasm: d9 = cd9
+# asm 1: movdqa <cd9=int6464#11,>d9=int6464#16
+# asm 2: movdqa <cd9=%xmm10,>d9=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 d9 *= *(int128 *)(a2b2p + 0)
+# asm 1: mulpd 0(<a2b2p=int64#6),<d9=int6464#16
+# asm 2: mulpd 0(<a2b2p=%r9),<d9=%xmm15
+mulpd 0(%r9),%xmm15
+
+# qhasm: float6464 r9 += d9
+# asm 1: addpd <d9=int6464#16,<r9=int6464#12
+# asm 2: addpd <d9=%xmm15,<r9=%xmm11
+addpd %xmm15,%xmm11
+
+# qhasm: t13 = ab9
+# asm 1: movdqa <ab9=int6464#10,>t13=int6464#16
+# asm 2: movdqa <ab9=%xmm9,>t13=%xmm15
+movdqa %xmm9,%xmm15
+
+# qhasm: float6464 t13 *= *(int128 *)(b2a2p + 64)
+# asm 1: mulpd 64(<b2a2p=int64#3),<t13=int6464#16
+# asm 2: mulpd 64(<b2a2p=%rdx),<t13=%xmm15
+mulpd 64(%rdx),%xmm15
+
+# qhasm: float6464 r13 += t13
+# asm 1: addpd <t13=int6464#16,<r13=int6464#3
+# asm 2: addpd <t13=%xmm15,<r13=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: d13 = cd9
+# asm 1: movdqa <cd9=int6464#11,>d13=int6464#16
+# asm 2: movdqa <cd9=%xmm10,>d13=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 d13 *= *(int128 *)(a2b2p + 64)
+# asm 1: mulpd 64(<a2b2p=int64#6),<d13=int6464#16
+# asm 2: mulpd 64(<a2b2p=%r9),<d13=%xmm15
+mulpd 64(%r9),%xmm15
+
+# qhasm: float6464 r13 += d13
+# asm 1: addpd <d13=int6464#16,<r13=int6464#3
+# asm 2: addpd <d13=%xmm15,<r13=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: t14 = ab9
+# asm 1: movdqa <ab9=int6464#10,>t14=int6464#16
+# asm 2: movdqa <ab9=%xmm9,>t14=%xmm15
+movdqa %xmm9,%xmm15
+
+# qhasm: float6464 t14 *= *(int128 *)(b2a2p + 80)
+# asm 1: mulpd 80(<b2a2p=int64#3),<t14=int6464#16
+# asm 2: mulpd 80(<b2a2p=%rdx),<t14=%xmm15
+mulpd 80(%rdx),%xmm15
+
+# qhasm: float6464 r14 += t14
+# asm 1: addpd <t14=int6464#16,<r14=int6464#4
+# asm 2: addpd <t14=%xmm15,<r14=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: d14 = cd9
+# asm 1: movdqa <cd9=int6464#11,>d14=int6464#16
+# asm 2: movdqa <cd9=%xmm10,>d14=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 d14 *= *(int128 *)(a2b2p + 80)
+# asm 1: mulpd 80(<a2b2p=int64#6),<d14=int6464#16
+# asm 2: mulpd 80(<a2b2p=%r9),<d14=%xmm15
+mulpd 80(%r9),%xmm15
+
+# qhasm: float6464 r14 += d14
+# asm 1: addpd <d14=int6464#16,<r14=int6464#4
+# asm 2: addpd <d14=%xmm15,<r14=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: t15 = ab9
+# asm 1: movdqa <ab9=int6464#10,>t15=int6464#16
+# asm 2: movdqa <ab9=%xmm9,>t15=%xmm15
+movdqa %xmm9,%xmm15
+
+# qhasm: float6464 t15 *= *(int128 *)(b2a2p + 96)
+# asm 1: mulpd 96(<b2a2p=int64#3),<t15=int6464#16
+# asm 2: mulpd 96(<b2a2p=%rdx),<t15=%xmm15
+mulpd 96(%rdx),%xmm15
+
+# qhasm: float6464 r15 += t15
+# asm 1: addpd <t15=int6464#16,<r15=int6464#5
+# asm 2: addpd <t15=%xmm15,<r15=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: d15 = cd9
+# asm 1: movdqa <cd9=int6464#11,>d15=int6464#16
+# asm 2: movdqa <cd9=%xmm10,>d15=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 d15 *= *(int128 *)(a2b2p + 96)
+# asm 1: mulpd 96(<a2b2p=int64#6),<d15=int6464#16
+# asm 2: mulpd 96(<a2b2p=%r9),<d15=%xmm15
+mulpd 96(%r9),%xmm15
+
+# qhasm: float6464 r15 += d15
+# asm 1: addpd <d15=int6464#16,<r15=int6464#5
+# asm 2: addpd <d15=%xmm15,<r15=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: t19 = ab9
+# asm 1: movdqa <ab9=int6464#10,>t19=int6464#16
+# asm 2: movdqa <ab9=%xmm9,>t19=%xmm15
+movdqa %xmm9,%xmm15
+
+# qhasm: float6464 t19 *= *(int128 *)(b2a2p + 160)
+# asm 1: mulpd 160(<b2a2p=int64#3),<t19=int6464#16
+# asm 2: mulpd 160(<b2a2p=%rdx),<t19=%xmm15
+mulpd 160(%rdx),%xmm15
+
+# qhasm: float6464 r19 += t19
+# asm 1: addpd <t19=int6464#16,<r19=int6464#9
+# asm 2: addpd <t19=%xmm15,<r19=%xmm8
+addpd %xmm15,%xmm8
+
+# qhasm: d19 = cd9
+# asm 1: movdqa <cd9=int6464#11,>d19=int6464#16
+# asm 2: movdqa <cd9=%xmm10,>d19=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 d19 *= *(int128 *)(a2b2p + 160)
+# asm 1: mulpd 160(<a2b2p=int64#6),<d19=int6464#16
+# asm 2: mulpd 160(<a2b2p=%r9),<d19=%xmm15
+mulpd 160(%r9),%xmm15
+
+# qhasm: float6464 r19 += d19
+# asm 1: addpd <d19=int6464#16,<r19=int6464#9
+# asm 2: addpd <d19=%xmm15,<r19=%xmm8
+addpd %xmm15,%xmm8
+
+# qhasm: r20 = ab9
+# asm 1: movdqa <ab9=int6464#10,>r20=int6464#10
+# asm 2: movdqa <ab9=%xmm9,>r20=%xmm9
+movdqa %xmm9,%xmm9
+
+# qhasm: float6464 r20 *= *(int128 *)(b2a2p + 176)
+# asm 1: mulpd 176(<b2a2p=int64#3),<r20=int6464#10
+# asm 2: mulpd 176(<b2a2p=%rdx),<r20=%xmm9
+mulpd 176(%rdx),%xmm9
+
+# qhasm: d20 = cd9
+# asm 1: movdqa <cd9=int6464#11,>d20=int6464#11
+# asm 2: movdqa <cd9=%xmm10,>d20=%xmm10
+movdqa %xmm10,%xmm10
+
+# qhasm: float6464 d20 *= *(int128 *)(a2b2p + 176)
+# asm 1: mulpd 176(<a2b2p=int64#6),<d20=int6464#11
+# asm 2: mulpd 176(<a2b2p=%r9),<d20=%xmm10
+mulpd 176(%r9),%xmm10
+
+# qhasm: float6464 r20 += d20
+# asm 1: addpd <d20=int6464#11,<r20=int6464#10
+# asm 2: addpd <d20=%xmm10,<r20=%xmm9
+addpd %xmm10,%xmm9
+
+# qhasm: t10 = ab9six
+# asm 1: movdqa <ab9six=int6464#14,>t10=int6464#11
+# asm 2: movdqa <ab9six=%xmm13,>t10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm: float6464 t10 *= *(int128 *)(b2a2p + 16)
+# asm 1: mulpd 16(<b2a2p=int64#3),<t10=int6464#11
+# asm 2: mulpd 16(<b2a2p=%rdx),<t10=%xmm10
+mulpd 16(%rdx),%xmm10
+
+# qhasm: float6464 r10 += t10
+# asm 1: addpd <t10=int6464#11,<r10=int6464#13
+# asm 2: addpd <t10=%xmm10,<r10=%xmm12
+addpd %xmm10,%xmm12
+
+# qhasm: d10 = cd9six
+# asm 1: movdqa <cd9six=int6464#15,>d10=int6464#11
+# asm 2: movdqa <cd9six=%xmm14,>d10=%xmm10
+movdqa %xmm14,%xmm10
+
+# qhasm: float6464 d10 *= *(int128 *)(a2b2p + 16)
+# asm 1: mulpd 16(<a2b2p=int64#6),<d10=int6464#11
+# asm 2: mulpd 16(<a2b2p=%r9),<d10=%xmm10
+mulpd 16(%r9),%xmm10
+
+# qhasm: float6464 r10 += d10
+# asm 1: addpd <d10=int6464#11,<r10=int6464#13
+# asm 2: addpd <d10=%xmm10,<r10=%xmm12
+addpd %xmm10,%xmm12
+
+# qhasm: t11 = ab9six
+# asm 1: movdqa <ab9six=int6464#14,>t11=int6464#11
+# asm 2: movdqa <ab9six=%xmm13,>t11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm: float6464 t11 *= *(int128 *)(b2a2p + 32)
+# asm 1: mulpd 32(<b2a2p=int64#3),<t11=int6464#11
+# asm 2: mulpd 32(<b2a2p=%rdx),<t11=%xmm10
+mulpd 32(%rdx),%xmm10
+
+# qhasm: float6464 r11 += t11
+# asm 1: addpd <t11=int6464#11,<r11=int6464#1
+# asm 2: addpd <t11=%xmm10,<r11=%xmm0
+addpd %xmm10,%xmm0
+
+# qhasm: d11 = cd9six
+# asm 1: movdqa <cd9six=int6464#15,>d11=int6464#11
+# asm 2: movdqa <cd9six=%xmm14,>d11=%xmm10
+movdqa %xmm14,%xmm10
+
+# qhasm: float6464 d11 *= *(int128 *)(a2b2p + 32)
+# asm 1: mulpd 32(<a2b2p=int64#6),<d11=int6464#11
+# asm 2: mulpd 32(<a2b2p=%r9),<d11=%xmm10
+mulpd 32(%r9),%xmm10
+
+# qhasm: float6464 r11 += d11
+# asm 1: addpd <d11=int6464#11,<r11=int6464#1
+# asm 2: addpd <d11=%xmm10,<r11=%xmm0
+addpd %xmm10,%xmm0
+
+# qhasm: t12 = ab9six
+# asm 1: movdqa <ab9six=int6464#14,>t12=int6464#11
+# asm 2: movdqa <ab9six=%xmm13,>t12=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm: float6464 t12 *= *(int128 *)(b2a2p + 48)
+# asm 1: mulpd 48(<b2a2p=int64#3),<t12=int6464#11
+# asm 2: mulpd 48(<b2a2p=%rdx),<t12=%xmm10
+mulpd 48(%rdx),%xmm10
+
+# qhasm: float6464 r12 += t12
+# asm 1: addpd <t12=int6464#11,<r12=int6464#2
+# asm 2: addpd <t12=%xmm10,<r12=%xmm1
+addpd %xmm10,%xmm1
+
+# qhasm: d12 = cd9six
+# asm 1: movdqa <cd9six=int6464#15,>d12=int6464#11
+# asm 2: movdqa <cd9six=%xmm14,>d12=%xmm10
+movdqa %xmm14,%xmm10
+
+# qhasm: float6464 d12 *= *(int128 *)(a2b2p + 48)
+# asm 1: mulpd 48(<a2b2p=int64#6),<d12=int6464#11
+# asm 2: mulpd 48(<a2b2p=%r9),<d12=%xmm10
+mulpd 48(%r9),%xmm10
+
+# qhasm: float6464 r12 += d12
+# asm 1: addpd <d12=int6464#11,<r12=int6464#2
+# asm 2: addpd <d12=%xmm10,<r12=%xmm1
+addpd %xmm10,%xmm1
+
+# qhasm: t16 = ab9six
+# asm 1: movdqa <ab9six=int6464#14,>t16=int6464#11
+# asm 2: movdqa <ab9six=%xmm13,>t16=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm: float6464 t16 *= *(int128 *)(b2a2p + 112)
+# asm 1: mulpd 112(<b2a2p=int64#3),<t16=int6464#11
+# asm 2: mulpd 112(<b2a2p=%rdx),<t16=%xmm10
+mulpd 112(%rdx),%xmm10
+
+# qhasm: float6464 r16 += t16
+# asm 1: addpd <t16=int6464#11,<r16=int6464#6
+# asm 2: addpd <t16=%xmm10,<r16=%xmm5
+addpd %xmm10,%xmm5
+
+# qhasm: d16 = cd9six
+# asm 1: movdqa <cd9six=int6464#15,>d16=int6464#11
+# asm 2: movdqa <cd9six=%xmm14,>d16=%xmm10
+movdqa %xmm14,%xmm10
+
+# qhasm: float6464 d16 *= *(int128 *)(a2b2p + 112)
+# asm 1: mulpd 112(<a2b2p=int64#6),<d16=int6464#11
+# asm 2: mulpd 112(<a2b2p=%r9),<d16=%xmm10
+mulpd 112(%r9),%xmm10
+
+# qhasm: float6464 r16 += d16
+# asm 1: addpd <d16=int6464#11,<r16=int6464#6
+# asm 2: addpd <d16=%xmm10,<r16=%xmm5
+addpd %xmm10,%xmm5
+
+# qhasm: t17 = ab9six
+# asm 1: movdqa <ab9six=int6464#14,>t17=int6464#11
+# asm 2: movdqa <ab9six=%xmm13,>t17=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm: float6464 t17 *= *(int128 *)(b2a2p + 128)
+# asm 1: mulpd 128(<b2a2p=int64#3),<t17=int6464#11
+# asm 2: mulpd 128(<b2a2p=%rdx),<t17=%xmm10
+mulpd 128(%rdx),%xmm10
+
+# qhasm: float6464 r17 += t17
+# asm 1: addpd <t17=int6464#11,<r17=int6464#7
+# asm 2: addpd <t17=%xmm10,<r17=%xmm6
+addpd %xmm10,%xmm6
+
+# qhasm: d17 = cd9six
+# asm 1: movdqa <cd9six=int6464#15,>d17=int6464#11
+# asm 2: movdqa <cd9six=%xmm14,>d17=%xmm10
+movdqa %xmm14,%xmm10
+
+# qhasm: float6464 d17 *= *(int128 *)(a2b2p + 128)
+# asm 1: mulpd 128(<a2b2p=int64#6),<d17=int6464#11
+# asm 2: mulpd 128(<a2b2p=%r9),<d17=%xmm10
+mulpd 128(%r9),%xmm10
+
+# qhasm: float6464 r17 += d17
+# asm 1: addpd <d17=int6464#11,<r17=int6464#7
+# asm 2: addpd <d17=%xmm10,<r17=%xmm6
+addpd %xmm10,%xmm6
+
+# qhasm: t18 = ab9six
+# asm 1: movdqa <ab9six=int6464#14,>t18=int6464#11
+# asm 2: movdqa <ab9six=%xmm13,>t18=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm: float6464 t18 *= *(int128 *)(b2a2p + 144)
+# asm 1: mulpd 144(<b2a2p=int64#3),<t18=int6464#11
+# asm 2: mulpd 144(<b2a2p=%rdx),<t18=%xmm10
+mulpd 144(%rdx),%xmm10
+
+# qhasm: float6464 r18 += t18
+# asm 1: addpd <t18=int6464#11,<r18=int6464#8
+# asm 2: addpd <t18=%xmm10,<r18=%xmm7
+addpd %xmm10,%xmm7
+
+# qhasm: d18 = cd9six
+# asm 1: movdqa <cd9six=int6464#15,>d18=int6464#11
+# asm 2: movdqa <cd9six=%xmm14,>d18=%xmm10
+movdqa %xmm14,%xmm10
+
+# qhasm: float6464 d18 *= *(int128 *)(a2b2p + 144)
+# asm 1: mulpd 144(<a2b2p=int64#6),<d18=int6464#11
+# asm 2: mulpd 144(<a2b2p=%r9),<d18=%xmm10
+mulpd 144(%r9),%xmm10
+
+# qhasm: float6464 r18 += d18
+# asm 1: addpd <d18=int6464#11,<r18=int6464#8
+# asm 2: addpd <d18=%xmm10,<r18=%xmm7
+addpd %xmm10,%xmm7
+
+# qhasm: *(int128 *)(b1b1p + 144) = r9
+# asm 1: movdqa <r9=int6464#12,144(<b1b1p=int64#4)
+# asm 2: movdqa <r9=%xmm11,144(<b1b1p=%rcx)
+movdqa %xmm11,144(%rcx)
+
+# qhasm: ab10 = *(int128 *)(b1b1p + 160)
+# asm 1: movdqa 160(<b1b1p=int64#4),>ab10=int6464#11
+# asm 2: movdqa 160(<b1b1p=%rcx),>ab10=%xmm10
+movdqa 160(%rcx),%xmm10
+
+# qhasm: cd10 = *(int128 *)(ma1a1p + 160)
+# asm 1: movdqa 160(<ma1a1p=int64#5),>cd10=int6464#12
+# asm 2: movdqa 160(<ma1a1p=%r8),>cd10=%xmm11
+movdqa 160(%r8),%xmm11
+
+# qhasm: ab10six = ab10
+# asm 1: movdqa <ab10=int6464#11,>ab10six=int6464#14
+# asm 2: movdqa <ab10=%xmm10,>ab10six=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: cd10six = cd10
+# asm 1: movdqa <cd10=int6464#12,>cd10six=int6464#15
+# asm 2: movdqa <cd10=%xmm11,>cd10six=%xmm14
+movdqa %xmm11,%xmm14
+
+# qhasm: float6464 ab10six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab10six=int6464#14
+# asm 2: mulpd SIX_SIX,<ab10six=%xmm13
+mulpd SIX_SIX,%xmm13
+
+# qhasm: float6464 cd10six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<cd10six=int6464#15
+# asm 2: mulpd SIX_SIX,<cd10six=%xmm14
+mulpd SIX_SIX,%xmm14
+
+# qhasm: t10 = ab10
+# asm 1: movdqa <ab10=int6464#11,>t10=int6464#16
+# asm 2: movdqa <ab10=%xmm10,>t10=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t10 *= *(int128 *)(b2a2p + 0)
+# asm 1: mulpd 0(<b2a2p=int64#3),<t10=int6464#16
+# asm 2: mulpd 0(<b2a2p=%rdx),<t10=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r10 += t10
+# asm 1: addpd <t10=int6464#16,<r10=int6464#13
+# asm 2: addpd <t10=%xmm15,<r10=%xmm12
+addpd %xmm15,%xmm12
+
+# qhasm: d10 = cd10
+# asm 1: movdqa <cd10=int6464#12,>d10=int6464#16
+# asm 2: movdqa <cd10=%xmm11,>d10=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 d10 *= *(int128 *)(a2b2p + 0)
+# asm 1: mulpd 0(<a2b2p=int64#6),<d10=int6464#16
+# asm 2: mulpd 0(<a2b2p=%r9),<d10=%xmm15
+mulpd 0(%r9),%xmm15
+
+# qhasm: float6464 r10 += d10
+# asm 1: addpd <d10=int6464#16,<r10=int6464#13
+# asm 2: addpd <d10=%xmm15,<r10=%xmm12
+addpd %xmm15,%xmm12
+
+# qhasm: t13 = ab10
+# asm 1: movdqa <ab10=int6464#11,>t13=int6464#16
+# asm 2: movdqa <ab10=%xmm10,>t13=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t13 *= *(int128 *)(b2a2p + 48)
+# asm 1: mulpd 48(<b2a2p=int64#3),<t13=int6464#16
+# asm 2: mulpd 48(<b2a2p=%rdx),<t13=%xmm15
+mulpd 48(%rdx),%xmm15
+
+# qhasm: float6464 r13 += t13
+# asm 1: addpd <t13=int6464#16,<r13=int6464#3
+# asm 2: addpd <t13=%xmm15,<r13=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: d13 = cd10
+# asm 1: movdqa <cd10=int6464#12,>d13=int6464#16
+# asm 2: movdqa <cd10=%xmm11,>d13=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 d13 *= *(int128 *)(a2b2p + 48)
+# asm 1: mulpd 48(<a2b2p=int64#6),<d13=int6464#16
+# asm 2: mulpd 48(<a2b2p=%r9),<d13=%xmm15
+mulpd 48(%r9),%xmm15
+
+# qhasm: float6464 r13 += d13
+# asm 1: addpd <d13=int6464#16,<r13=int6464#3
+# asm 2: addpd <d13=%xmm15,<r13=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: t14 = ab10
+# asm 1: movdqa <ab10=int6464#11,>t14=int6464#16
+# asm 2: movdqa <ab10=%xmm10,>t14=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t14 *= *(int128 *)(b2a2p + 64)
+# asm 1: mulpd 64(<b2a2p=int64#3),<t14=int6464#16
+# asm 2: mulpd 64(<b2a2p=%rdx),<t14=%xmm15
+mulpd 64(%rdx),%xmm15
+
+# qhasm: float6464 r14 += t14
+# asm 1: addpd <t14=int6464#16,<r14=int6464#4
+# asm 2: addpd <t14=%xmm15,<r14=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: d14 = cd10
+# asm 1: movdqa <cd10=int6464#12,>d14=int6464#16
+# asm 2: movdqa <cd10=%xmm11,>d14=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 d14 *= *(int128 *)(a2b2p + 64)
+# asm 1: mulpd 64(<a2b2p=int64#6),<d14=int6464#16
+# asm 2: mulpd 64(<a2b2p=%r9),<d14=%xmm15
+mulpd 64(%r9),%xmm15
+
+# qhasm: float6464 r14 += d14
+# asm 1: addpd <d14=int6464#16,<r14=int6464#4
+# asm 2: addpd <d14=%xmm15,<r14=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: t16 = ab10
+# asm 1: movdqa <ab10=int6464#11,>t16=int6464#16
+# asm 2: movdqa <ab10=%xmm10,>t16=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t16 *= *(int128 *)(b2a2p + 96)
+# asm 1: mulpd 96(<b2a2p=int64#3),<t16=int6464#16
+# asm 2: mulpd 96(<b2a2p=%rdx),<t16=%xmm15
+mulpd 96(%rdx),%xmm15
+
+# qhasm: float6464 r16 += t16
+# asm 1: addpd <t16=int6464#16,<r16=int6464#6
+# asm 2: addpd <t16=%xmm15,<r16=%xmm5
+addpd %xmm15,%xmm5
+
+# qhasm: d16 = cd10
+# asm 1: movdqa <cd10=int6464#12,>d16=int6464#16
+# asm 2: movdqa <cd10=%xmm11,>d16=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 d16 *= *(int128 *)(a2b2p + 96)
+# asm 1: mulpd 96(<a2b2p=int64#6),<d16=int6464#16
+# asm 2: mulpd 96(<a2b2p=%r9),<d16=%xmm15
+mulpd 96(%r9),%xmm15
+
+# qhasm: float6464 r16 += d16
+# asm 1: addpd <d16=int6464#16,<r16=int6464#6
+# asm 2: addpd <d16=%xmm15,<r16=%xmm5
+addpd %xmm15,%xmm5
+
+# qhasm: t15 = ab10
+# asm 1: movdqa <ab10=int6464#11,>t15=int6464#16
+# asm 2: movdqa <ab10=%xmm10,>t15=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t15 *= *(int128 *)(b2a2p + 80)
+# asm 1: mulpd 80(<b2a2p=int64#3),<t15=int6464#16
+# asm 2: mulpd 80(<b2a2p=%rdx),<t15=%xmm15
+mulpd 80(%rdx),%xmm15
+
+# qhasm: float6464 r15 += t15
+# asm 1: addpd <t15=int6464#16,<r15=int6464#5
+# asm 2: addpd <t15=%xmm15,<r15=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: d15 = cd10
+# asm 1: movdqa <cd10=int6464#12,>d15=int6464#16
+# asm 2: movdqa <cd10=%xmm11,>d15=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 d15 *= *(int128 *)(a2b2p + 80)
+# asm 1: mulpd 80(<a2b2p=int64#6),<d15=int6464#16
+# asm 2: mulpd 80(<a2b2p=%r9),<d15=%xmm15
+mulpd 80(%r9),%xmm15
+
+# qhasm: float6464 r15 += d15
+# asm 1: addpd <d15=int6464#16,<r15=int6464#5
+# asm 2: addpd <d15=%xmm15,<r15=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: t19 = ab10
+# asm 1: movdqa <ab10=int6464#11,>t19=int6464#16
+# asm 2: movdqa <ab10=%xmm10,>t19=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t19 *= *(int128 *)(b2a2p + 144)
+# asm 1: mulpd 144(<b2a2p=int64#3),<t19=int6464#16
+# asm 2: mulpd 144(<b2a2p=%rdx),<t19=%xmm15
+mulpd 144(%rdx),%xmm15
+
+# qhasm: float6464 r19 += t19
+# asm 1: addpd <t19=int6464#16,<r19=int6464#9
+# asm 2: addpd <t19=%xmm15,<r19=%xmm8
+addpd %xmm15,%xmm8
+
+# qhasm: d19 = cd10
+# asm 1: movdqa <cd10=int6464#12,>d19=int6464#16
+# asm 2: movdqa <cd10=%xmm11,>d19=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 d19 *= *(int128 *)(a2b2p + 144)
+# asm 1: mulpd 144(<a2b2p=int64#6),<d19=int6464#16
+# asm 2: mulpd 144(<a2b2p=%r9),<d19=%xmm15
+mulpd 144(%r9),%xmm15
+
+# qhasm: float6464 r19 += d19
+# asm 1: addpd <d19=int6464#16,<r19=int6464#9
+# asm 2: addpd <d19=%xmm15,<r19=%xmm8
+addpd %xmm15,%xmm8
+
+# qhasm: t20 = ab10
+# asm 1: movdqa <ab10=int6464#11,>t20=int6464#16
+# asm 2: movdqa <ab10=%xmm10,>t20=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t20 *= *(int128 *)(b2a2p + 160)
+# asm 1: mulpd 160(<b2a2p=int64#3),<t20=int6464#16
+# asm 2: mulpd 160(<b2a2p=%rdx),<t20=%xmm15
+mulpd 160(%rdx),%xmm15
+
+# qhasm: float6464 r20 += t20
+# asm 1: addpd <t20=int6464#16,<r20=int6464#10
+# asm 2: addpd <t20=%xmm15,<r20=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: d20 = cd10
+# asm 1: movdqa <cd10=int6464#12,>d20=int6464#16
+# asm 2: movdqa <cd10=%xmm11,>d20=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 d20 *= *(int128 *)(a2b2p + 160)
+# asm 1: mulpd 160(<a2b2p=int64#6),<d20=int6464#16
+# asm 2: mulpd 160(<a2b2p=%r9),<d20=%xmm15
+mulpd 160(%r9),%xmm15
+
+# qhasm: float6464 r20 += d20
+# asm 1: addpd <d20=int6464#16,<r20=int6464#10
+# asm 2: addpd <d20=%xmm15,<r20=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: r21 = ab10
+# asm 1: movdqa <ab10=int6464#11,>r21=int6464#11
+# asm 2: movdqa <ab10=%xmm10,>r21=%xmm10
+movdqa %xmm10,%xmm10
+
+# qhasm: float6464 r21 *= *(int128 *)(b2a2p + 176)
+# asm 1: mulpd 176(<b2a2p=int64#3),<r21=int6464#11
+# asm 2: mulpd 176(<b2a2p=%rdx),<r21=%xmm10
+mulpd 176(%rdx),%xmm10
+
+# qhasm: d21 = cd10
+# asm 1: movdqa <cd10=int6464#12,>d21=int6464#12
+# asm 2: movdqa <cd10=%xmm11,>d21=%xmm11
+movdqa %xmm11,%xmm11
+
+# qhasm: float6464 d21 *= *(int128 *)(a2b2p + 176)
+# asm 1: mulpd 176(<a2b2p=int64#6),<d21=int6464#12
+# asm 2: mulpd 176(<a2b2p=%r9),<d21=%xmm11
+mulpd 176(%r9),%xmm11
+
+# qhasm: float6464 r21 += d21
+# asm 1: addpd <d21=int6464#12,<r21=int6464#11
+# asm 2: addpd <d21=%xmm11,<r21=%xmm10
+addpd %xmm11,%xmm10
+
+# qhasm: t11 = ab10six
+# asm 1: movdqa <ab10six=int6464#14,>t11=int6464#12
+# asm 2: movdqa <ab10six=%xmm13,>t11=%xmm11
+movdqa %xmm13,%xmm11
+
+# qhasm: float6464 t11 *= *(int128 *)(b2a2p + 16)
+# asm 1: mulpd 16(<b2a2p=int64#3),<t11=int6464#12
+# asm 2: mulpd 16(<b2a2p=%rdx),<t11=%xmm11
+mulpd 16(%rdx),%xmm11
+
+# qhasm: float6464 r11 += t11
+# asm 1: addpd <t11=int6464#12,<r11=int6464#1
+# asm 2: addpd <t11=%xmm11,<r11=%xmm0
+addpd %xmm11,%xmm0
+
+# qhasm: d11 = cd10six
+# asm 1: movdqa <cd10six=int6464#15,>d11=int6464#12
+# asm 2: movdqa <cd10six=%xmm14,>d11=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm: float6464 d11 *= *(int128 *)(a2b2p + 16)
+# asm 1: mulpd 16(<a2b2p=int64#6),<d11=int6464#12
+# asm 2: mulpd 16(<a2b2p=%r9),<d11=%xmm11
+mulpd 16(%r9),%xmm11
+
+# qhasm: float6464 r11 += d11
+# asm 1: addpd <d11=int6464#12,<r11=int6464#1
+# asm 2: addpd <d11=%xmm11,<r11=%xmm0
+addpd %xmm11,%xmm0
+
+# qhasm: t12 = ab10six
+# asm 1: movdqa <ab10six=int6464#14,>t12=int6464#12
+# asm 2: movdqa <ab10six=%xmm13,>t12=%xmm11
+movdqa %xmm13,%xmm11
+
+# qhasm: float6464 t12 *= *(int128 *)(b2a2p + 32)
+# asm 1: mulpd 32(<b2a2p=int64#3),<t12=int6464#12
+# asm 2: mulpd 32(<b2a2p=%rdx),<t12=%xmm11
+mulpd 32(%rdx),%xmm11
+
+# qhasm: float6464 r12 += t12
+# asm 1: addpd <t12=int6464#12,<r12=int6464#2
+# asm 2: addpd <t12=%xmm11,<r12=%xmm1
+addpd %xmm11,%xmm1
+
+# qhasm: d12 = cd10six
+# asm 1: movdqa <cd10six=int6464#15,>d12=int6464#12
+# asm 2: movdqa <cd10six=%xmm14,>d12=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm: float6464 d12 *= *(int128 *)(a2b2p + 32)
+# asm 1: mulpd 32(<a2b2p=int64#6),<d12=int6464#12
+# asm 2: mulpd 32(<a2b2p=%r9),<d12=%xmm11
+mulpd 32(%r9),%xmm11
+
+# qhasm: float6464 r12 += d12
+# asm 1: addpd <d12=int6464#12,<r12=int6464#2
+# asm 2: addpd <d12=%xmm11,<r12=%xmm1
+addpd %xmm11,%xmm1
+
+# qhasm: t17 = ab10six
+# asm 1: movdqa <ab10six=int6464#14,>t17=int6464#12
+# asm 2: movdqa <ab10six=%xmm13,>t17=%xmm11
+movdqa %xmm13,%xmm11
+
+# qhasm: float6464 t17 *= *(int128 *)(b2a2p + 112)
+# asm 1: mulpd 112(<b2a2p=int64#3),<t17=int6464#12
+# asm 2: mulpd 112(<b2a2p=%rdx),<t17=%xmm11
+mulpd 112(%rdx),%xmm11
+
+# qhasm: float6464 r17 += t17
+# asm 1: addpd <t17=int6464#12,<r17=int6464#7
+# asm 2: addpd <t17=%xmm11,<r17=%xmm6
+addpd %xmm11,%xmm6
+
+# qhasm: d17 = cd10six
+# asm 1: movdqa <cd10six=int6464#15,>d17=int6464#12
+# asm 2: movdqa <cd10six=%xmm14,>d17=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm: float6464 d17 *= *(int128 *)(a2b2p + 112)
+# asm 1: mulpd 112(<a2b2p=int64#6),<d17=int6464#12
+# asm 2: mulpd 112(<a2b2p=%r9),<d17=%xmm11
+mulpd 112(%r9),%xmm11
+
+# qhasm: float6464 r17 += d17
+# asm 1: addpd <d17=int6464#12,<r17=int6464#7
+# asm 2: addpd <d17=%xmm11,<r17=%xmm6
+addpd %xmm11,%xmm6
+
+# qhasm: t18 = ab10six
+# asm 1: movdqa <ab10six=int6464#14,>t18=int6464#12
+# asm 2: movdqa <ab10six=%xmm13,>t18=%xmm11
+movdqa %xmm13,%xmm11
+
+# qhasm: float6464 t18 *= *(int128 *)(b2a2p + 128)
+# asm 1: mulpd 128(<b2a2p=int64#3),<t18=int6464#12
+# asm 2: mulpd 128(<b2a2p=%rdx),<t18=%xmm11
+mulpd 128(%rdx),%xmm11
+
+# qhasm: float6464 r18 += t18
+# asm 1: addpd <t18=int6464#12,<r18=int6464#8
+# asm 2: addpd <t18=%xmm11,<r18=%xmm7
+addpd %xmm11,%xmm7
+
+# qhasm: d18 = cd10six
+# asm 1: movdqa <cd10six=int6464#15,>d18=int6464#12
+# asm 2: movdqa <cd10six=%xmm14,>d18=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm: float6464 d18 *= *(int128 *)(a2b2p + 128)
+# asm 1: mulpd 128(<a2b2p=int64#6),<d18=int6464#12
+# asm 2: mulpd 128(<a2b2p=%r9),<d18=%xmm11
+mulpd 128(%r9),%xmm11
+
+# qhasm: float6464 r18 += d18
+# asm 1: addpd <d18=int6464#12,<r18=int6464#8
+# asm 2: addpd <d18=%xmm11,<r18=%xmm7
+addpd %xmm11,%xmm7
+
+# qhasm: *(int128 *)(b1b1p + 160) = r10
+# asm 1: movdqa <r10=int6464#13,160(<b1b1p=int64#4)
+# asm 2: movdqa <r10=%xmm12,160(<b1b1p=%rcx)
+movdqa %xmm12,160(%rcx)
+
+# qhasm: ab11 = *(int128 *)(b1b1p + 176)
+# asm 1: movdqa 176(<b1b1p=int64#4),>ab11=int6464#12
+# asm 2: movdqa 176(<b1b1p=%rcx),>ab11=%xmm11
+movdqa 176(%rcx),%xmm11
+
+# qhasm: cd11 = *(int128 *)(ma1a1p + 176)
+# asm 1: movdqa 176(<ma1a1p=int64#5),>cd11=int6464#13
+# asm 2: movdqa 176(<ma1a1p=%r8),>cd11=%xmm12
+movdqa 176(%r8),%xmm12
+
+# qhasm: ab11six = ab11
+# asm 1: movdqa <ab11=int6464#12,>ab11six=int6464#14
+# asm 2: movdqa <ab11=%xmm11,>ab11six=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: cd11six = cd11
+# asm 1: movdqa <cd11=int6464#13,>cd11six=int6464#15
+# asm 2: movdqa <cd11=%xmm12,>cd11six=%xmm14
+movdqa %xmm12,%xmm14
+
+# qhasm: float6464 ab11six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab11six=int6464#14
+# asm 2: mulpd SIX_SIX,<ab11six=%xmm13
+mulpd SIX_SIX,%xmm13
+
+# qhasm: float6464 cd11six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<cd11six=int6464#15
+# asm 2: mulpd SIX_SIX,<cd11six=%xmm14
+mulpd SIX_SIX,%xmm14
+
+# qhasm: t11 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t11=int6464#16
+# asm 2: movdqa <ab11=%xmm11,>t11=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t11 *= *(int128 *)(b2a2p + 0)
+# asm 1: mulpd 0(<b2a2p=int64#3),<t11=int6464#16
+# asm 2: mulpd 0(<b2a2p=%rdx),<t11=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r11 += t11
+# asm 1: addpd <t11=int6464#16,<r11=int6464#1
+# asm 2: addpd <t11=%xmm15,<r11=%xmm0
+addpd %xmm15,%xmm0
+
+# qhasm: d11 = cd11
+# asm 1: movdqa <cd11=int6464#13,>d11=int6464#16
+# asm 2: movdqa <cd11=%xmm12,>d11=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm: float6464 d11 *= *(int128 *)(a2b2p + 0)
+# asm 1: mulpd 0(<a2b2p=int64#6),<d11=int6464#16
+# asm 2: mulpd 0(<a2b2p=%r9),<d11=%xmm15
+mulpd 0(%r9),%xmm15
+
+# qhasm: float6464 r11 += d11
+# asm 1: addpd <d11=int6464#16,<r11=int6464#1
+# asm 2: addpd <d11=%xmm15,<r11=%xmm0
+addpd %xmm15,%xmm0
+
+# qhasm: t13 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t13=int6464#16
+# asm 2: movdqa <ab11=%xmm11,>t13=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t13 *= *(int128 *)(b2a2p + 32)
+# asm 1: mulpd 32(<b2a2p=int64#3),<t13=int6464#16
+# asm 2: mulpd 32(<b2a2p=%rdx),<t13=%xmm15
+mulpd 32(%rdx),%xmm15
+
+# qhasm: float6464 r13 += t13
+# asm 1: addpd <t13=int6464#16,<r13=int6464#3
+# asm 2: addpd <t13=%xmm15,<r13=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: d13 = cd11
+# asm 1: movdqa <cd11=int6464#13,>d13=int6464#16
+# asm 2: movdqa <cd11=%xmm12,>d13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm: float6464 d13 *= *(int128 *)(a2b2p + 32)
+# asm 1: mulpd 32(<a2b2p=int64#6),<d13=int6464#16
+# asm 2: mulpd 32(<a2b2p=%r9),<d13=%xmm15
+mulpd 32(%r9),%xmm15
+
+# qhasm: float6464 r13 += d13
+# asm 1: addpd <d13=int6464#16,<r13=int6464#3
+# asm 2: addpd <d13=%xmm15,<r13=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: t14 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t14=int6464#16
+# asm 2: movdqa <ab11=%xmm11,>t14=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t14 *= *(int128 *)(b2a2p + 48)
+# asm 1: mulpd 48(<b2a2p=int64#3),<t14=int6464#16
+# asm 2: mulpd 48(<b2a2p=%rdx),<t14=%xmm15
+mulpd 48(%rdx),%xmm15
+
+# qhasm: float6464 r14 += t14
+# asm 1: addpd <t14=int6464#16,<r14=int6464#4
+# asm 2: addpd <t14=%xmm15,<r14=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: d14 = cd11
+# asm 1: movdqa <cd11=int6464#13,>d14=int6464#16
+# asm 2: movdqa <cd11=%xmm12,>d14=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm: float6464 d14 *= *(int128 *)(a2b2p + 48)
+# asm 1: mulpd 48(<a2b2p=int64#6),<d14=int6464#16
+# asm 2: mulpd 48(<a2b2p=%r9),<d14=%xmm15
+mulpd 48(%r9),%xmm15
+
+# qhasm: float6464 r14 += d14
+# asm 1: addpd <d14=int6464#16,<r14=int6464#4
+# asm 2: addpd <d14=%xmm15,<r14=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: t15 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t15=int6464#16
+# asm 2: movdqa <ab11=%xmm11,>t15=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t15 *= *(int128 *)(b2a2p + 64)
+# asm 1: mulpd 64(<b2a2p=int64#3),<t15=int6464#16
+# asm 2: mulpd 64(<b2a2p=%rdx),<t15=%xmm15
+mulpd 64(%rdx),%xmm15
+
+# qhasm: float6464 r15 += t15
+# asm 1: addpd <t15=int6464#16,<r15=int6464#5
+# asm 2: addpd <t15=%xmm15,<r15=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: d15 = cd11
+# asm 1: movdqa <cd11=int6464#13,>d15=int6464#16
+# asm 2: movdqa <cd11=%xmm12,>d15=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm: float6464 d15 *= *(int128 *)(a2b2p + 64)
+# asm 1: mulpd 64(<a2b2p=int64#6),<d15=int6464#16
+# asm 2: mulpd 64(<a2b2p=%r9),<d15=%xmm15
+mulpd 64(%r9),%xmm15
+
+# qhasm: float6464 r15 += d15
+# asm 1: addpd <d15=int6464#16,<r15=int6464#5
+# asm 2: addpd <d15=%xmm15,<r15=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: t16 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t16=int6464#16
+# asm 2: movdqa <ab11=%xmm11,>t16=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t16 *= *(int128 *)(b2a2p + 80)
+# asm 1: mulpd 80(<b2a2p=int64#3),<t16=int6464#16
+# asm 2: mulpd 80(<b2a2p=%rdx),<t16=%xmm15
+mulpd 80(%rdx),%xmm15
+
+# qhasm: float6464 r16 += t16
+# asm 1: addpd <t16=int6464#16,<r16=int6464#6
+# asm 2: addpd <t16=%xmm15,<r16=%xmm5
+addpd %xmm15,%xmm5
+
+# qhasm: d16 = cd11
+# asm 1: movdqa <cd11=int6464#13,>d16=int6464#16
+# asm 2: movdqa <cd11=%xmm12,>d16=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm: float6464 d16 *= *(int128 *)(a2b2p + 80)
+# asm 1: mulpd 80(<a2b2p=int64#6),<d16=int6464#16
+# asm 2: mulpd 80(<a2b2p=%r9),<d16=%xmm15
+mulpd 80(%r9),%xmm15
+
+# qhasm: float6464 r16 += d16
+# asm 1: addpd <d16=int6464#16,<r16=int6464#6
+# asm 2: addpd <d16=%xmm15,<r16=%xmm5
+addpd %xmm15,%xmm5
+
+# qhasm: t17 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t17=int6464#16
+# asm 2: movdqa <ab11=%xmm11,>t17=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t17 *= *(int128 *)(b2a2p + 96)
+# asm 1: mulpd 96(<b2a2p=int64#3),<t17=int6464#16
+# asm 2: mulpd 96(<b2a2p=%rdx),<t17=%xmm15
+mulpd 96(%rdx),%xmm15
+
+# qhasm: float6464 r17 += t17
+# asm 1: addpd <t17=int6464#16,<r17=int6464#7
+# asm 2: addpd <t17=%xmm15,<r17=%xmm6
+addpd %xmm15,%xmm6
+
+# qhasm: d17 = cd11
+# asm 1: movdqa <cd11=int6464#13,>d17=int6464#16
+# asm 2: movdqa <cd11=%xmm12,>d17=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm: float6464 d17 *= *(int128 *)(a2b2p + 96)
+# asm 1: mulpd 96(<a2b2p=int64#6),<d17=int6464#16
+# asm 2: mulpd 96(<a2b2p=%r9),<d17=%xmm15
+mulpd 96(%r9),%xmm15
+
+# qhasm: float6464 r17 += d17
+# asm 1: addpd <d17=int6464#16,<r17=int6464#7
+# asm 2: addpd <d17=%xmm15,<r17=%xmm6
+addpd %xmm15,%xmm6
+
+# qhasm: t19 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t19=int6464#16
+# asm 2: movdqa <ab11=%xmm11,>t19=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t19 *= *(int128 *)(b2a2p + 128)
+# asm 1: mulpd 128(<b2a2p=int64#3),<t19=int6464#16
+# asm 2: mulpd 128(<b2a2p=%rdx),<t19=%xmm15
+mulpd 128(%rdx),%xmm15
+
+# qhasm: float6464 r19 += t19
+# asm 1: addpd <t19=int6464#16,<r19=int6464#9
+# asm 2: addpd <t19=%xmm15,<r19=%xmm8
+addpd %xmm15,%xmm8
+
+# qhasm: d19 = cd11
+# asm 1: movdqa <cd11=int6464#13,>d19=int6464#16
+# asm 2: movdqa <cd11=%xmm12,>d19=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm: float6464 d19 *= *(int128 *)(a2b2p + 128)
+# asm 1: mulpd 128(<a2b2p=int64#6),<d19=int6464#16
+# asm 2: mulpd 128(<a2b2p=%r9),<d19=%xmm15
+mulpd 128(%r9),%xmm15
+
+# qhasm: float6464 r19 += d19
+# asm 1: addpd <d19=int6464#16,<r19=int6464#9
+# asm 2: addpd <d19=%xmm15,<r19=%xmm8
+addpd %xmm15,%xmm8
+
+# qhasm: t20 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t20=int6464#16
+# asm 2: movdqa <ab11=%xmm11,>t20=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t20 *= *(int128 *)(b2a2p + 144)
+# asm 1: mulpd 144(<b2a2p=int64#3),<t20=int6464#16
+# asm 2: mulpd 144(<b2a2p=%rdx),<t20=%xmm15
+mulpd 144(%rdx),%xmm15
+
+# qhasm: float6464 r20 += t20
+# asm 1: addpd <t20=int6464#16,<r20=int6464#10
+# asm 2: addpd <t20=%xmm15,<r20=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: d20 = cd11
+# asm 1: movdqa <cd11=int6464#13,>d20=int6464#16
+# asm 2: movdqa <cd11=%xmm12,>d20=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm: float6464 d20 *= *(int128 *)(a2b2p + 144)
+# asm 1: mulpd 144(<a2b2p=int64#6),<d20=int6464#16
+# asm 2: mulpd 144(<a2b2p=%r9),<d20=%xmm15
+mulpd 144(%r9),%xmm15
+
+# qhasm: float6464 r20 += d20
+# asm 1: addpd <d20=int6464#16,<r20=int6464#10
+# asm 2: addpd <d20=%xmm15,<r20=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: t21 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t21=int6464#16
+# asm 2: movdqa <ab11=%xmm11,>t21=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t21 *= *(int128 *)(b2a2p + 160)
+# asm 1: mulpd 160(<b2a2p=int64#3),<t21=int6464#16
+# asm 2: mulpd 160(<b2a2p=%rdx),<t21=%xmm15
+mulpd 160(%rdx),%xmm15
+
+# qhasm: float6464 r21 += t21
+# asm 1: addpd <t21=int6464#16,<r21=int6464#11
+# asm 2: addpd <t21=%xmm15,<r21=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: d21 = cd11
+# asm 1: movdqa <cd11=int6464#13,>d21=int6464#16
+# asm 2: movdqa <cd11=%xmm12,>d21=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm: float6464 d21 *= *(int128 *)(a2b2p + 160)
+# asm 1: mulpd 160(<a2b2p=int64#6),<d21=int6464#16
+# asm 2: mulpd 160(<a2b2p=%r9),<d21=%xmm15
+mulpd 160(%r9),%xmm15
+
+# qhasm: float6464 r21 += d21
+# asm 1: addpd <d21=int6464#16,<r21=int6464#11
+# asm 2: addpd <d21=%xmm15,<r21=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: r22 = ab11
+# asm 1: movdqa <ab11=int6464#12,>r22=int6464#12
+# asm 2: movdqa <ab11=%xmm11,>r22=%xmm11
+movdqa %xmm11,%xmm11
+
+# qhasm: float6464 r22 *= *(int128 *)(b2a2p + 176)
+# asm 1: mulpd 176(<b2a2p=int64#3),<r22=int6464#12
+# asm 2: mulpd 176(<b2a2p=%rdx),<r22=%xmm11
+mulpd 176(%rdx),%xmm11
+
+# qhasm: d22 = cd11
+# asm 1: movdqa <cd11=int6464#13,>d22=int6464#13
+# asm 2: movdqa <cd11=%xmm12,>d22=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 d22 *= *(int128 *)(a2b2p + 176)
+# asm 1: mulpd 176(<a2b2p=int64#6),<d22=int6464#13
+# asm 2: mulpd 176(<a2b2p=%r9),<d22=%xmm12
+mulpd 176(%r9),%xmm12
+
+# qhasm: float6464 r22 += d22
+# asm 1: addpd <d22=int6464#13,<r22=int6464#12
+# asm 2: addpd <d22=%xmm12,<r22=%xmm11
+addpd %xmm12,%xmm11
+
+# qhasm: t12 = ab11six
+# asm 1: movdqa <ab11six=int6464#14,>t12=int6464#13
+# asm 2: movdqa <ab11six=%xmm13,>t12=%xmm12
+movdqa %xmm13,%xmm12
+
+# qhasm: float6464 t12 *= *(int128 *)(b2a2p + 16)
+# asm 1: mulpd 16(<b2a2p=int64#3),<t12=int6464#13
+# asm 2: mulpd 16(<b2a2p=%rdx),<t12=%xmm12
+mulpd 16(%rdx),%xmm12
+
+# qhasm: float6464 r12 += t12
+# asm 1: addpd <t12=int6464#13,<r12=int6464#2
+# asm 2: addpd <t12=%xmm12,<r12=%xmm1
+addpd %xmm12,%xmm1
+
+# qhasm: d12 = cd11six
+# asm 1: movdqa <cd11six=int6464#15,>d12=int6464#13
+# asm 2: movdqa <cd11six=%xmm14,>d12=%xmm12
+movdqa %xmm14,%xmm12
+
+# qhasm: float6464 d12 *= *(int128 *)(a2b2p + 16)
+# asm 1: mulpd 16(<a2b2p=int64#6),<d12=int6464#13
+# asm 2: mulpd 16(<a2b2p=%r9),<d12=%xmm12
+mulpd 16(%r9),%xmm12
+
+# qhasm: float6464 r12 += d12
+# asm 1: addpd <d12=int6464#13,<r12=int6464#2
+# asm 2: addpd <d12=%xmm12,<r12=%xmm1
+addpd %xmm12,%xmm1
+
+# qhasm: t18 = ab11six
+# asm 1: movdqa <ab11six=int6464#14,>t18=int6464#13
+# asm 2: movdqa <ab11six=%xmm13,>t18=%xmm12
+movdqa %xmm13,%xmm12
+
+# qhasm: float6464 t18 *= *(int128 *)(b2a2p + 112)
+# asm 1: mulpd 112(<b2a2p=int64#3),<t18=int6464#13
+# asm 2: mulpd 112(<b2a2p=%rdx),<t18=%xmm12
+mulpd 112(%rdx),%xmm12
+
+# qhasm: float6464 r18 += t18
+# asm 1: addpd <t18=int6464#13,<r18=int6464#8
+# asm 2: addpd <t18=%xmm12,<r18=%xmm7
+addpd %xmm12,%xmm7
+
+# qhasm: d18 = cd11six
+# asm 1: movdqa <cd11six=int6464#15,>d18=int6464#13
+# asm 2: movdqa <cd11six=%xmm14,>d18=%xmm12
+movdqa %xmm14,%xmm12
+
+# qhasm: float6464 d18 *= *(int128 *)(a2b2p + 112)
+# asm 1: mulpd 112(<a2b2p=int64#6),<d18=int6464#13
+# asm 2: mulpd 112(<a2b2p=%r9),<d18=%xmm12
+mulpd 112(%r9),%xmm12
+
+# qhasm: float6464 r18 += d18
+# asm 1: addpd <d18=int6464#13,<r18=int6464#8
+# asm 2: addpd <d18=%xmm12,<r18=%xmm7
+addpd %xmm12,%xmm7
+
+# qhasm: *(int128 *)(b1b1p + 176) = r11
+# asm 1: movdqa <r11=int6464#1,176(<b1b1p=int64#4)
+# asm 2: movdqa <r11=%xmm0,176(<b1b1p=%rcx)
+movdqa %xmm0,176(%rcx)
+
+# qhasm: r0 = *(int128 *)(b1b1p + 0)
+# asm 1: movdqa 0(<b1b1p=int64#4),>r0=int6464#1
+# asm 2: movdqa 0(<b1b1p=%rcx),>r0=%xmm0
+movdqa 0(%rcx),%xmm0
+
+# qhasm: float6464 r0 -= r12
+# asm 1: subpd <r12=int6464#2,<r0=int6464#1
+# asm 2: subpd <r12=%xmm1,<r0=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: t15 = r15
+# asm 1: movdqa <r15=int6464#5,>t15=int6464#13
+# asm 2: movdqa <r15=%xmm4,>t15=%xmm12
+movdqa %xmm4,%xmm12
+
+# qhasm: float6464 t15 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t15=int6464#13
+# asm 2: mulpd SIX_SIX,<t15=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: float6464 r0 += t15
+# asm 1: addpd <t15=int6464#13,<r0=int6464#1
+# asm 2: addpd <t15=%xmm12,<r0=%xmm0
+addpd %xmm12,%xmm0
+
+# qhasm: t18 = r18
+# asm 1: movdqa <r18=int6464#8,>t18=int6464#13
+# asm 2: movdqa <r18=%xmm7,>t18=%xmm12
+movdqa %xmm7,%xmm12
+
+# qhasm: float6464 t18 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<t18=int6464#13
+# asm 2: mulpd TWO_TWO,<t18=%xmm12
+mulpd TWO_TWO,%xmm12
+
+# qhasm: float6464 r0 -= t18
+# asm 1: subpd <t18=int6464#13,<r0=int6464#1
+# asm 2: subpd <t18=%xmm12,<r0=%xmm0
+subpd %xmm12,%xmm0
+
+# qhasm: t21 = r21
+# asm 1: movdqa <r21=int6464#11,>t21=int6464#13
+# asm 2: movdqa <r21=%xmm10,>t21=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm: float6464 t21 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t21=int6464#13
+# asm 2: mulpd SIX_SIX,<t21=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: float6464 r0 -= t21
+# asm 1: subpd <t21=int6464#13,<r0=int6464#1
+# asm 2: subpd <t21=%xmm12,<r0=%xmm0
+subpd %xmm12,%xmm0
+
+# qhasm: r3 = *(int128 *)(b1b1p + 48)
+# asm 1: movdqa 48(<b1b1p=int64#4),>r3=int6464#13
+# asm 2: movdqa 48(<b1b1p=%rcx),>r3=%xmm12
+movdqa 48(%rcx),%xmm12
+
+# qhasm: float6464 r3 -= r12
+# asm 1: subpd <r12=int6464#2,<r3=int6464#13
+# asm 2: subpd <r12=%xmm1,<r3=%xmm12
+subpd %xmm1,%xmm12
+
+# qhasm: t15 = r15
+# asm 1: movdqa <r15=int6464#5,>t15=int6464#14
+# asm 2: movdqa <r15=%xmm4,>t15=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 t15 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<t15=int6464#14
+# asm 2: mulpd FIVE_FIVE,<t15=%xmm13
+mulpd FIVE_FIVE,%xmm13
+
+# qhasm: float6464 r3 += t15
+# asm 1: addpd <t15=int6464#14,<r3=int6464#13
+# asm 2: addpd <t15=%xmm13,<r3=%xmm12
+addpd %xmm13,%xmm12
+
+# qhasm: float6464 r3 -= r18
+# asm 1: subpd <r18=int6464#8,<r3=int6464#13
+# asm 2: subpd <r18=%xmm7,<r3=%xmm12
+subpd %xmm7,%xmm12
+
+# qhasm: t21 = r21
+# asm 1: movdqa <r21=int6464#11,>t21=int6464#14
+# asm 2: movdqa <r21=%xmm10,>t21=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 t21 *= EIGHT_EIGHT
+# asm 1: mulpd EIGHT_EIGHT,<t21=int6464#14
+# asm 2: mulpd EIGHT_EIGHT,<t21=%xmm13
+mulpd EIGHT_EIGHT,%xmm13
+
+# qhasm: float6464 r3 -= t21
+# asm 1: subpd <t21=int6464#14,<r3=int6464#13
+# asm 2: subpd <t21=%xmm13,<r3=%xmm12
+subpd %xmm13,%xmm12
+
+# qhasm: r6 = *(int128 *)(b1b1p + 96)
+# asm 1: movdqa 96(<b1b1p=int64#4),>r6=int6464#14
+# asm 2: movdqa 96(<b1b1p=%rcx),>r6=%xmm13
+movdqa 96(%rcx),%xmm13
+
+# qhasm: t12 = r12
+# asm 1: movdqa <r12=int6464#2,>t12=int6464#15
+# asm 2: movdqa <r12=%xmm1,>t12=%xmm14
+movdqa %xmm1,%xmm14
+
+# qhasm: float6464 t12 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<t12=int6464#15
+# asm 2: mulpd FOUR_FOUR,<t12=%xmm14
+mulpd FOUR_FOUR,%xmm14
+
+# qhasm: float6464 r6 -= t12
+# asm 1: subpd <t12=int6464#15,<r6=int6464#14
+# asm 2: subpd <t12=%xmm14,<r6=%xmm13
+subpd %xmm14,%xmm13
+
+# qhasm: t15 = r15
+# asm 1: movdqa <r15=int6464#5,>t15=int6464#15
+# asm 2: movdqa <r15=%xmm4,>t15=%xmm14
+movdqa %xmm4,%xmm14
+
+# qhasm: float6464 t15 *= EIGHTEEN_EIGHTEEN
+# asm 1: mulpd EIGHTEEN_EIGHTEEN,<t15=int6464#15
+# asm 2: mulpd EIGHTEEN_EIGHTEEN,<t15=%xmm14
+mulpd EIGHTEEN_EIGHTEEN,%xmm14
+
+# qhasm: float6464 r6 += t15
+# asm 1: addpd <t15=int6464#15,<r6=int6464#14
+# asm 2: addpd <t15=%xmm14,<r6=%xmm13
+addpd %xmm14,%xmm13
+
+# qhasm: t18 = r18
+# asm 1: movdqa <r18=int6464#8,>t18=int6464#15
+# asm 2: movdqa <r18=%xmm7,>t18=%xmm14
+movdqa %xmm7,%xmm14
+
+# qhasm: float6464 t18 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<t18=int6464#15
+# asm 2: mulpd THREE_THREE,<t18=%xmm14
+mulpd THREE_THREE,%xmm14
+
+# qhasm: float6464 r6 -= t18
+# asm 1: subpd <t18=int6464#15,<r6=int6464#14
+# asm 2: subpd <t18=%xmm14,<r6=%xmm13
+subpd %xmm14,%xmm13
+
+# qhasm: t21 = r21
+# asm 1: movdqa <r21=int6464#11,>t21=int6464#15
+# asm 2: movdqa <r21=%xmm10,>t21=%xmm14
+movdqa %xmm10,%xmm14
+
+# qhasm: float6464 t21 *= THIRTY_THIRTY
+# asm 1: mulpd THIRTY_THIRTY,<t21=int6464#15
+# asm 2: mulpd THIRTY_THIRTY,<t21=%xmm14
+mulpd THIRTY_THIRTY,%xmm14
+
+# qhasm: float6464 r6 -= t21
+# asm 1: subpd <t21=int6464#15,<r6=int6464#14
+# asm 2: subpd <t21=%xmm14,<r6=%xmm13
+subpd %xmm14,%xmm13
+
+# qhasm: r9 = *(int128 *)(b1b1p + 144)
+# asm 1: movdqa 144(<b1b1p=int64#4),>r9=int6464#15
+# asm 2: movdqa 144(<b1b1p=%rcx),>r9=%xmm14
+movdqa 144(%rcx),%xmm14
+
+# qhasm: float6464 r9 -= r12
+# asm 1: subpd <r12=int6464#2,<r9=int6464#15
+# asm 2: subpd <r12=%xmm1,<r9=%xmm14
+subpd %xmm1,%xmm14
+
+# qhasm: t15 = r15
+# asm 1: movdqa <r15=int6464#5,>t15=int6464#2
+# asm 2: movdqa <r15=%xmm4,>t15=%xmm1
+movdqa %xmm4,%xmm1
+
+# qhasm: float6464 t15 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<t15=int6464#2
+# asm 2: mulpd TWO_TWO,<t15=%xmm1
+mulpd TWO_TWO,%xmm1
+
+# qhasm: float6464 r9 += t15
+# asm 1: addpd <t15=int6464#2,<r9=int6464#15
+# asm 2: addpd <t15=%xmm1,<r9=%xmm14
+addpd %xmm1,%xmm14
+
+# qhasm: float6464 r9 += r18
+# asm 1: addpd <r18=int6464#8,<r9=int6464#15
+# asm 2: addpd <r18=%xmm7,<r9=%xmm14
+addpd %xmm7,%xmm14
+
+# qhasm: t21 = r21
+# asm 1: movdqa <r21=int6464#11,>t21=int6464#2
+# asm 2: movdqa <r21=%xmm10,>t21=%xmm1
+movdqa %xmm10,%xmm1
+
+# qhasm: float6464 t21 *= NINE_NINE
+# asm 1: mulpd NINE_NINE,<t21=int6464#2
+# asm 2: mulpd NINE_NINE,<t21=%xmm1
+mulpd NINE_NINE,%xmm1
+
+# qhasm: float6464 r9 -= t21
+# asm 1: subpd <t21=int6464#2,<r9=int6464#15
+# asm 2: subpd <t21=%xmm1,<r9=%xmm14
+subpd %xmm1,%xmm14
+
+# qhasm: r1 = *(int128 *)(b1b1p + 16)
+# asm 1: movdqa 16(<b1b1p=int64#4),>r1=int6464#2
+# asm 2: movdqa 16(<b1b1p=%rcx),>r1=%xmm1
+movdqa 16(%rcx),%xmm1
+
+# qhasm: float6464 r1 -= r13
+# asm 1: subpd <r13=int6464#3,<r1=int6464#2
+# asm 2: subpd <r13=%xmm2,<r1=%xmm1
+subpd %xmm2,%xmm1
+
+# qhasm: float6464 r1 += r16
+# asm 1: addpd <r16=int6464#6,<r1=int6464#2
+# asm 2: addpd <r16=%xmm5,<r1=%xmm1
+addpd %xmm5,%xmm1
+
+# qhasm: t19 = r19
+# asm 1: movdqa <r19=int6464#9,>t19=int6464#5
+# asm 2: movdqa <r19=%xmm8,>t19=%xmm4
+movdqa %xmm8,%xmm4
+
+# qhasm: float6464 t19 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<t19=int6464#5
+# asm 2: mulpd TWO_TWO,<t19=%xmm4
+mulpd TWO_TWO,%xmm4
+
+# qhasm: float6464 r1 -= t19
+# asm 1: subpd <t19=int6464#5,<r1=int6464#2
+# asm 2: subpd <t19=%xmm4,<r1=%xmm1
+subpd %xmm4,%xmm1
+
+# qhasm: float6464 r1 -= r22
+# asm 1: subpd <r22=int6464#12,<r1=int6464#2
+# asm 2: subpd <r22=%xmm11,<r1=%xmm1
+subpd %xmm11,%xmm1
+
+# qhasm: r4 = *(int128 *)(b1b1p + 64)
+# asm 1: movdqa 64(<b1b1p=int64#4),>r4=int6464#5
+# asm 2: movdqa 64(<b1b1p=%rcx),>r4=%xmm4
+movdqa 64(%rcx),%xmm4
+
+# qhasm: t13 = r13
+# asm 1: movdqa <r13=int6464#3,>t13=int6464#8
+# asm 2: movdqa <r13=%xmm2,>t13=%xmm7
+movdqa %xmm2,%xmm7
+
+# qhasm: float6464 t13 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t13=int6464#8
+# asm 2: mulpd SIX_SIX,<t13=%xmm7
+mulpd SIX_SIX,%xmm7
+
+# qhasm: float6464 r4 -= t13
+# asm 1: subpd <t13=int6464#8,<r4=int6464#5
+# asm 2: subpd <t13=%xmm7,<r4=%xmm4
+subpd %xmm7,%xmm4
+
+# qhasm: t16 = r16
+# asm 1: movdqa <r16=int6464#6,>t16=int6464#8
+# asm 2: movdqa <r16=%xmm5,>t16=%xmm7
+movdqa %xmm5,%xmm7
+
+# qhasm: float6464 t16 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<t16=int6464#8
+# asm 2: mulpd FIVE_FIVE,<t16=%xmm7
+mulpd FIVE_FIVE,%xmm7
+
+# qhasm: float6464 r4 += t16
+# asm 1: addpd <t16=int6464#8,<r4=int6464#5
+# asm 2: addpd <t16=%xmm7,<r4=%xmm4
+addpd %xmm7,%xmm4
+
+# qhasm: t19 = r19
+# asm 1: movdqa <r19=int6464#9,>t19=int6464#8
+# asm 2: movdqa <r19=%xmm8,>t19=%xmm7
+movdqa %xmm8,%xmm7
+
+# qhasm: float6464 t19 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t19=int6464#8
+# asm 2: mulpd SIX_SIX,<t19=%xmm7
+mulpd SIX_SIX,%xmm7
+
+# qhasm: float6464 r4 -= t19
+# asm 1: subpd <t19=int6464#8,<r4=int6464#5
+# asm 2: subpd <t19=%xmm7,<r4=%xmm4
+subpd %xmm7,%xmm4
+
+# qhasm: t22 = r22
+# asm 1: movdqa <r22=int6464#12,>t22=int6464#8
+# asm 2: movdqa <r22=%xmm11,>t22=%xmm7
+movdqa %xmm11,%xmm7
+
+# qhasm: float6464 t22 *= EIGHT_EIGHT
+# asm 1: mulpd EIGHT_EIGHT,<t22=int6464#8
+# asm 2: mulpd EIGHT_EIGHT,<t22=%xmm7
+mulpd EIGHT_EIGHT,%xmm7
+
+# qhasm: float6464 r4 -= t22
+# asm 1: subpd <t22=int6464#8,<r4=int6464#5
+# asm 2: subpd <t22=%xmm7,<r4=%xmm4
+subpd %xmm7,%xmm4
+
+# qhasm: r7 = *(int128 *)(b1b1p + 112)
+# asm 1: movdqa 112(<b1b1p=int64#4),>r7=int6464#8
+# asm 2: movdqa 112(<b1b1p=%rcx),>r7=%xmm7
+movdqa 112(%rcx),%xmm7
+
+# qhasm: t13 = r13
+# asm 1: movdqa <r13=int6464#3,>t13=int6464#11
+# asm 2: movdqa <r13=%xmm2,>t13=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm: float6464 t13 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<t13=int6464#11
+# asm 2: mulpd FOUR_FOUR,<t13=%xmm10
+mulpd FOUR_FOUR,%xmm10
+
+# qhasm: float6464 r7 -= t13
+# asm 1: subpd <t13=int6464#11,<r7=int6464#8
+# asm 2: subpd <t13=%xmm10,<r7=%xmm7
+subpd %xmm10,%xmm7
+
+# qhasm: t16 = r16
+# asm 1: movdqa <r16=int6464#6,>t16=int6464#11
+# asm 2: movdqa <r16=%xmm5,>t16=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm: float6464 t16 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<t16=int6464#11
+# asm 2: mulpd THREE_THREE,<t16=%xmm10
+mulpd THREE_THREE,%xmm10
+
+# qhasm: float6464 r7 += t16
+# asm 1: addpd <t16=int6464#11,<r7=int6464#8
+# asm 2: addpd <t16=%xmm10,<r7=%xmm7
+addpd %xmm10,%xmm7
+
+# qhasm: t19 = r19
+# asm 1: movdqa <r19=int6464#9,>t19=int6464#11
+# asm 2: movdqa <r19=%xmm8,>t19=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm: float6464 t19 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<t19=int6464#11
+# asm 2: mulpd THREE_THREE,<t19=%xmm10
+mulpd THREE_THREE,%xmm10
+
+# qhasm: float6464 r7 -= t19
+# asm 1: subpd <t19=int6464#11,<r7=int6464#8
+# asm 2: subpd <t19=%xmm10,<r7=%xmm7
+subpd %xmm10,%xmm7
+
+# qhasm: t22 = r22
+# asm 1: movdqa <r22=int6464#12,>t22=int6464#11
+# asm 2: movdqa <r22=%xmm11,>t22=%xmm10
+movdqa %xmm11,%xmm10
+
+# qhasm: float6464 t22 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<t22=int6464#11
+# asm 2: mulpd FIVE_FIVE,<t22=%xmm10
+mulpd FIVE_FIVE,%xmm10
+
+# qhasm: float6464 r7 -= t22
+# asm 1: subpd <t22=int6464#11,<r7=int6464#8
+# asm 2: subpd <t22=%xmm10,<r7=%xmm7
+subpd %xmm10,%xmm7
+
+# qhasm: r10 = *(int128 *)(b1b1p + 160)
+# asm 1: movdqa 160(<b1b1p=int64#4),>r10=int6464#11
+# asm 2: movdqa 160(<b1b1p=%rcx),>r10=%xmm10
+movdqa 160(%rcx),%xmm10
+
+# qhasm: t13 = r13
+# asm 1: movdqa <r13=int6464#3,>t13=int6464#3
+# asm 2: movdqa <r13=%xmm2,>t13=%xmm2
+movdqa %xmm2,%xmm2
+
+# qhasm: float6464 t13 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t13=int6464#3
+# asm 2: mulpd SIX_SIX,<t13=%xmm2
+mulpd SIX_SIX,%xmm2
+
+# qhasm: float6464 r10 -= t13
+# asm 1: subpd <t13=int6464#3,<r10=int6464#11
+# asm 2: subpd <t13=%xmm2,<r10=%xmm10
+subpd %xmm2,%xmm10
+
+# qhasm: t16 = r16
+# asm 1: movdqa <r16=int6464#6,>t16=int6464#3
+# asm 2: movdqa <r16=%xmm5,>t16=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm: float6464 t16 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<t16=int6464#3
+# asm 2: mulpd TWO_TWO,<t16=%xmm2
+mulpd TWO_TWO,%xmm2
+
+# qhasm: float6464 r10 += t16
+# asm 1: addpd <t16=int6464#3,<r10=int6464#11
+# asm 2: addpd <t16=%xmm2,<r10=%xmm10
+addpd %xmm2,%xmm10
+
+# qhasm: t19 = r19
+# asm 1: movdqa <r19=int6464#9,>t19=int6464#3
+# asm 2: movdqa <r19=%xmm8,>t19=%xmm2
+movdqa %xmm8,%xmm2
+
+# qhasm: float6464 t19 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t19=int6464#3
+# asm 2: mulpd SIX_SIX,<t19=%xmm2
+mulpd SIX_SIX,%xmm2
+
+# qhasm: float6464 r10 += t19
+# asm 1: addpd <t19=int6464#3,<r10=int6464#11
+# asm 2: addpd <t19=%xmm2,<r10=%xmm10
+addpd %xmm2,%xmm10
+
+# qhasm: t22 = r22
+# asm 1: movdqa <r22=int6464#12,>t22=int6464#3
+# asm 2: movdqa <r22=%xmm11,>t22=%xmm2
+movdqa %xmm11,%xmm2
+
+# qhasm: float6464 t22 *= NINE_NINE
+# asm 1: mulpd NINE_NINE,<t22=int6464#3
+# asm 2: mulpd NINE_NINE,<t22=%xmm2
+mulpd NINE_NINE,%xmm2
+
+# qhasm: float6464 r10 -= t22
+# asm 1: subpd <t22=int6464#3,<r10=int6464#11
+# asm 2: subpd <t22=%xmm2,<r10=%xmm10
+subpd %xmm2,%xmm10
+
+# qhasm: r2 = *(int128 *)(b1b1p + 32)
+# asm 1: movdqa 32(<b1b1p=int64#4),>r2=int6464#3
+# asm 2: movdqa 32(<b1b1p=%rcx),>r2=%xmm2
+movdqa 32(%rcx),%xmm2
+
+# qhasm: float6464 r2 -= r14
+# asm 1: subpd <r14=int6464#4,<r2=int6464#3
+# asm 2: subpd <r14=%xmm3,<r2=%xmm2
+subpd %xmm3,%xmm2
+
+# qhasm: float6464 r2 += r17
+# asm 1: addpd <r17=int6464#7,<r2=int6464#3
+# asm 2: addpd <r17=%xmm6,<r2=%xmm2
+addpd %xmm6,%xmm2
+
+# qhasm: t20 = r20
+# asm 1: movdqa <r20=int6464#10,>t20=int6464#6
+# asm 2: movdqa <r20=%xmm9,>t20=%xmm5
+movdqa %xmm9,%xmm5
+
+# qhasm: float6464 t20 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<t20=int6464#6
+# asm 2: mulpd TWO_TWO,<t20=%xmm5
+mulpd TWO_TWO,%xmm5
+
+# qhasm: float6464 r2 -= t20
+# asm 1: subpd <t20=int6464#6,<r2=int6464#3
+# asm 2: subpd <t20=%xmm5,<r2=%xmm2
+subpd %xmm5,%xmm2
+
+# qhasm: r5 = *(int128 *)(b1b1p + 80)
+# asm 1: movdqa 80(<b1b1p=int64#4),>r5=int6464#6
+# asm 2: movdqa 80(<b1b1p=%rcx),>r5=%xmm5
+movdqa 80(%rcx),%xmm5
+
+# qhasm: t14 = r14
+# asm 1: movdqa <r14=int6464#4,>t14=int6464#9
+# asm 2: movdqa <r14=%xmm3,>t14=%xmm8
+movdqa %xmm3,%xmm8
+
+# qhasm: float6464 t14 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t14=int6464#9
+# asm 2: mulpd SIX_SIX,<t14=%xmm8
+mulpd SIX_SIX,%xmm8
+
+# qhasm: float6464 r5 -= t14
+# asm 1: subpd <t14=int6464#9,<r5=int6464#6
+# asm 2: subpd <t14=%xmm8,<r5=%xmm5
+subpd %xmm8,%xmm5
+
+# qhasm: t17 = r17
+# asm 1: movdqa <r17=int6464#7,>t17=int6464#9
+# asm 2: movdqa <r17=%xmm6,>t17=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm: float6464 t17 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<t17=int6464#9
+# asm 2: mulpd FIVE_FIVE,<t17=%xmm8
+mulpd FIVE_FIVE,%xmm8
+
+# qhasm: float6464 r5 += t17
+# asm 1: addpd <t17=int6464#9,<r5=int6464#6
+# asm 2: addpd <t17=%xmm8,<r5=%xmm5
+addpd %xmm8,%xmm5
+
+# qhasm: t20 = r20
+# asm 1: movdqa <r20=int6464#10,>t20=int6464#9
+# asm 2: movdqa <r20=%xmm9,>t20=%xmm8
+movdqa %xmm9,%xmm8
+
+# qhasm: float6464 t20 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t20=int6464#9
+# asm 2: mulpd SIX_SIX,<t20=%xmm8
+mulpd SIX_SIX,%xmm8
+
+# qhasm: float6464 r5 -= t20
+# asm 1: subpd <t20=int6464#9,<r5=int6464#6
+# asm 2: subpd <t20=%xmm8,<r5=%xmm5
+subpd %xmm8,%xmm5
+
+# qhasm: r8 = *(int128 *)(b1b1p + 128)
+# asm 1: movdqa 128(<b1b1p=int64#4),>r8=int6464#9
+# asm 2: movdqa 128(<b1b1p=%rcx),>r8=%xmm8
+movdqa 128(%rcx),%xmm8
+
+# qhasm: t14 = r14
+# asm 1: movdqa <r14=int6464#4,>t14=int6464#12
+# asm 2: movdqa <r14=%xmm3,>t14=%xmm11
+movdqa %xmm3,%xmm11
+
+# qhasm: float6464 t14 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<t14=int6464#12
+# asm 2: mulpd FOUR_FOUR,<t14=%xmm11
+mulpd FOUR_FOUR,%xmm11
+
+# qhasm: float6464 r8 -= t14
+# asm 1: subpd <t14=int6464#12,<r8=int6464#9
+# asm 2: subpd <t14=%xmm11,<r8=%xmm8
+subpd %xmm11,%xmm8
+
+# qhasm: t17 = r17
+# asm 1: movdqa <r17=int6464#7,>t17=int6464#12
+# asm 2: movdqa <r17=%xmm6,>t17=%xmm11
+movdqa %xmm6,%xmm11
+
+# qhasm: float6464 t17 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<t17=int6464#12
+# asm 2: mulpd THREE_THREE,<t17=%xmm11
+mulpd THREE_THREE,%xmm11
+
+# qhasm: float6464 r8 += t17
+# asm 1: addpd <t17=int6464#12,<r8=int6464#9
+# asm 2: addpd <t17=%xmm11,<r8=%xmm8
+addpd %xmm11,%xmm8
+
+# qhasm: t20 = r20
+# asm 1: movdqa <r20=int6464#10,>t20=int6464#12
+# asm 2: movdqa <r20=%xmm9,>t20=%xmm11
+movdqa %xmm9,%xmm11
+
+# qhasm: float6464 t20 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<t20=int6464#12
+# asm 2: mulpd THREE_THREE,<t20=%xmm11
+mulpd THREE_THREE,%xmm11
+
+# qhasm: float6464 r8 -= t20
+# asm 1: subpd <t20=int6464#12,<r8=int6464#9
+# asm 2: subpd <t20=%xmm11,<r8=%xmm8
+subpd %xmm11,%xmm8
+
+# qhasm: r11 = *(int128 *)(b1b1p + 176)
+# asm 1: movdqa 176(<b1b1p=int64#4),>r11=int6464#12
+# asm 2: movdqa 176(<b1b1p=%rcx),>r11=%xmm11
+movdqa 176(%rcx),%xmm11
+
+# qhasm: t14 = r14
+# asm 1: movdqa <r14=int6464#4,>t14=int6464#4
+# asm 2: movdqa <r14=%xmm3,>t14=%xmm3
+movdqa %xmm3,%xmm3
+
+# qhasm: float6464 t14 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t14=int6464#4
+# asm 2: mulpd SIX_SIX,<t14=%xmm3
+mulpd SIX_SIX,%xmm3
+
+# qhasm: float6464 r11 -= t14
+# asm 1: subpd <t14=int6464#4,<r11=int6464#12
+# asm 2: subpd <t14=%xmm3,<r11=%xmm11
+subpd %xmm3,%xmm11
+
+# qhasm: t17 = r17
+# asm 1: movdqa <r17=int6464#7,>t17=int6464#4
+# asm 2: movdqa <r17=%xmm6,>t17=%xmm3
+movdqa %xmm6,%xmm3
+
+# qhasm: float6464 t17 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<t17=int6464#4
+# asm 2: mulpd TWO_TWO,<t17=%xmm3
+mulpd TWO_TWO,%xmm3
+
+# qhasm: float6464 r11 += t17
+# asm 1: addpd <t17=int6464#4,<r11=int6464#12
+# asm 2: addpd <t17=%xmm3,<r11=%xmm11
+addpd %xmm3,%xmm11
+
+# qhasm: t20 = r20
+# asm 1: movdqa <r20=int6464#10,>t20=int6464#4
+# asm 2: movdqa <r20=%xmm9,>t20=%xmm3
+movdqa %xmm9,%xmm3
+
+# qhasm: float6464 t20 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t20=int6464#4
+# asm 2: mulpd SIX_SIX,<t20=%xmm3
+mulpd SIX_SIX,%xmm3
+
+# qhasm: float6464 r11 += t20
+# asm 1: addpd <t20=int6464#4,<r11=int6464#12
+# asm 2: addpd <t20=%xmm3,<r11=%xmm11
+addpd %xmm3,%xmm11
+
+# qhasm: round = ROUND_ROUND
+# asm 1: movdqa ROUND_ROUND,<round=int6464#4
+# asm 2: movdqa ROUND_ROUND,<round=%xmm3
+movdqa ROUND_ROUND,%xmm3
+
+# qhasm: carry = r1
+# asm 1: movdqa <r1=int6464#2,>carry=int6464#7
+# asm 2: movdqa <r1=%xmm1,>carry=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r2 += carry
+# asm 1: addpd <carry=int6464#7,<r2=int6464#3
+# asm 2: addpd <carry=%xmm6,<r2=%xmm2
+addpd %xmm6,%xmm2
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r1 -= carry
+# asm 1: subpd <carry=int6464#7,<r1=int6464#2
+# asm 2: subpd <carry=%xmm6,<r1=%xmm1
+subpd %xmm6,%xmm1
+
+# qhasm: carry = r4
+# asm 1: movdqa <r4=int6464#5,>carry=int6464#7
+# asm 2: movdqa <r4=%xmm4,>carry=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r5 += carry
+# asm 1: addpd <carry=int6464#7,<r5=int6464#6
+# asm 2: addpd <carry=%xmm6,<r5=%xmm5
+addpd %xmm6,%xmm5
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r4 -= carry
+# asm 1: subpd <carry=int6464#7,<r4=int6464#5
+# asm 2: subpd <carry=%xmm6,<r4=%xmm4
+subpd %xmm6,%xmm4
+
+# qhasm: carry = r7
+# asm 1: movdqa <r7=int6464#8,>carry=int6464#7
+# asm 2: movdqa <r7=%xmm7,>carry=%xmm6
+movdqa %xmm7,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r8 += carry
+# asm 1: addpd <carry=int6464#7,<r8=int6464#9
+# asm 2: addpd <carry=%xmm6,<r8=%xmm8
+addpd %xmm6,%xmm8
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r7 -= carry
+# asm 1: subpd <carry=int6464#7,<r7=int6464#8
+# asm 2: subpd <carry=%xmm6,<r7=%xmm7
+subpd %xmm6,%xmm7
+
+# qhasm: carry = r10
+# asm 1: movdqa <r10=int6464#11,>carry=int6464#7
+# asm 2: movdqa <r10=%xmm10,>carry=%xmm6
+movdqa %xmm10,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r11 += carry
+# asm 1: addpd <carry=int6464#7,<r11=int6464#12
+# asm 2: addpd <carry=%xmm6,<r11=%xmm11
+addpd %xmm6,%xmm11
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r10 -= carry
+# asm 1: subpd <carry=int6464#7,<r10=int6464#11
+# asm 2: subpd <carry=%xmm6,<r10=%xmm10
+subpd %xmm6,%xmm10
+
+# qhasm: carry = r2
+# asm 1: movdqa <r2=int6464#3,>carry=int6464#7
+# asm 2: movdqa <r2=%xmm2,>carry=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r3 += carry
+# asm 1: addpd <carry=int6464#7,<r3=int6464#13
+# asm 2: addpd <carry=%xmm6,<r3=%xmm12
+addpd %xmm6,%xmm12
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r2 -= carry
+# asm 1: subpd <carry=int6464#7,<r2=int6464#3
+# asm 2: subpd <carry=%xmm6,<r2=%xmm2
+subpd %xmm6,%xmm2
+
+# qhasm: carry = r5
+# asm 1: movdqa <r5=int6464#6,>carry=int6464#7
+# asm 2: movdqa <r5=%xmm5,>carry=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r6 += carry
+# asm 1: addpd <carry=int6464#7,<r6=int6464#14
+# asm 2: addpd <carry=%xmm6,<r6=%xmm13
+addpd %xmm6,%xmm13
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r5 -= carry
+# asm 1: subpd <carry=int6464#7,<r5=int6464#6
+# asm 2: subpd <carry=%xmm6,<r5=%xmm5
+subpd %xmm6,%xmm5
+
+# qhasm: carry = r8
+# asm 1: movdqa <r8=int6464#9,>carry=int6464#7
+# asm 2: movdqa <r8=%xmm8,>carry=%xmm6
+movdqa %xmm8,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r9 += carry
+# asm 1: addpd <carry=int6464#7,<r9=int6464#15
+# asm 2: addpd <carry=%xmm6,<r9=%xmm14
+addpd %xmm6,%xmm14
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r8 -= carry
+# asm 1: subpd <carry=int6464#7,<r8=int6464#9
+# asm 2: subpd <carry=%xmm6,<r8=%xmm8
+subpd %xmm6,%xmm8
+
+# qhasm: carry = r11
+# asm 1: movdqa <r11=int6464#12,>carry=int6464#7
+# asm 2: movdqa <r11=%xmm11,>carry=%xmm6
+movdqa %xmm11,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r0 -= carry
+# asm 1: subpd <carry=int6464#7,<r0=int6464#1
+# asm 2: subpd <carry=%xmm6,<r0=%xmm0
+subpd %xmm6,%xmm0
+
+# qhasm: float6464 r3 -= carry
+# asm 1: subpd <carry=int6464#7,<r3=int6464#13
+# asm 2: subpd <carry=%xmm6,<r3=%xmm12
+subpd %xmm6,%xmm12
+
+# qhasm: t6 = carry
+# asm 1: movdqa <carry=int6464#7,>t6=int6464#10
+# asm 2: movdqa <carry=%xmm6,>t6=%xmm9
+movdqa %xmm6,%xmm9
+
+# qhasm: float6464 t6 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<t6=int6464#10
+# asm 2: mulpd FOUR_FOUR,<t6=%xmm9
+mulpd FOUR_FOUR,%xmm9
+
+# qhasm: float6464 r6 -= t6
+# asm 1: subpd <t6=int6464#10,<r6=int6464#14
+# asm 2: subpd <t6=%xmm9,<r6=%xmm13
+subpd %xmm9,%xmm13
+
+# qhasm: float6464 r9 -= carry
+# asm 1: subpd <carry=int6464#7,<r9=int6464#15
+# asm 2: subpd <carry=%xmm6,<r9=%xmm14
+subpd %xmm6,%xmm14
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r11 -= carry
+# asm 1: subpd <carry=int6464#7,<r11=int6464#12
+# asm 2: subpd <carry=%xmm6,<r11=%xmm11
+subpd %xmm6,%xmm11
+
+# qhasm: carry = r0
+# asm 1: movdqa <r0=int6464#1,>carry=int6464#7
+# asm 2: movdqa <r0=%xmm0,>carry=%xmm6
+movdqa %xmm0,%xmm6
+
+# qhasm: float6464 carry *= V6INV_V6INV
+# asm 1: mulpd V6INV_V6INV,<carry=int6464#7
+# asm 2: mulpd V6INV_V6INV,<carry=%xmm6
+mulpd V6INV_V6INV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r1 += carry
+# asm 1: addpd <carry=int6464#7,<r1=int6464#2
+# asm 2: addpd <carry=%xmm6,<r1=%xmm1
+addpd %xmm6,%xmm1
+
+# qhasm: float6464 carry *= V6_V6
+# asm 1: mulpd V6_V6,<carry=int6464#7
+# asm 2: mulpd V6_V6,<carry=%xmm6
+mulpd V6_V6,%xmm6
+
+# qhasm: float6464 r0 -= carry
+# asm 1: subpd <carry=int6464#7,<r0=int6464#1
+# asm 2: subpd <carry=%xmm6,<r0=%xmm0
+subpd %xmm6,%xmm0
+
+# qhasm: *(int128 *)(rop +   0) =  r0
+# asm 1: movdqa <r0=int6464#1,0(<rop=int64#1)
+# asm 2: movdqa <r0=%xmm0,0(<rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: carry = r3
+# asm 1: movdqa <r3=int6464#13,>carry=int6464#1
+# asm 2: movdqa <r3=%xmm12,>carry=%xmm0
+movdqa %xmm12,%xmm0
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#1
+# asm 2: mulpd VINV_VINV,<carry=%xmm0
+mulpd VINV_VINV,%xmm0
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#1
+# asm 2: addpd <round=%xmm3,<carry=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#1
+# asm 2: subpd <round=%xmm3,<carry=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 r4 += carry
+# asm 1: addpd <carry=int6464#1,<r4=int6464#5
+# asm 2: addpd <carry=%xmm0,<r4=%xmm4
+addpd %xmm0,%xmm4
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#1
+# asm 2: mulpd V_V,<carry=%xmm0
+mulpd V_V,%xmm0
+
+# qhasm: float6464 r3 -= carry
+# asm 1: subpd <carry=int6464#1,<r3=int6464#13
+# asm 2: subpd <carry=%xmm0,<r3=%xmm12
+subpd %xmm0,%xmm12
+
+# qhasm: *(int128 *)(rop +  48) =  r3
+# asm 1: movdqa <r3=int6464#13,48(<rop=int64#1)
+# asm 2: movdqa <r3=%xmm12,48(<rop=%rdi)
+movdqa %xmm12,48(%rdi)
+
+# qhasm: carry = r6
+# asm 1: movdqa <r6=int6464#14,>carry=int6464#1
+# asm 2: movdqa <r6=%xmm13,>carry=%xmm0
+movdqa %xmm13,%xmm0
+
+# qhasm: float6464 carry *= V6INV_V6INV
+# asm 1: mulpd V6INV_V6INV,<carry=int6464#1
+# asm 2: mulpd V6INV_V6INV,<carry=%xmm0
+mulpd V6INV_V6INV,%xmm0
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#1
+# asm 2: addpd <round=%xmm3,<carry=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#1
+# asm 2: subpd <round=%xmm3,<carry=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 r7 += carry
+# asm 1: addpd <carry=int6464#1,<r7=int6464#8
+# asm 2: addpd <carry=%xmm0,<r7=%xmm7
+addpd %xmm0,%xmm7
+
+# qhasm: float6464 carry *= V6_V6
+# asm 1: mulpd V6_V6,<carry=int6464#1
+# asm 2: mulpd V6_V6,<carry=%xmm0
+mulpd V6_V6,%xmm0
+
+# qhasm: float6464 r6 -= carry
+# asm 1: subpd <carry=int6464#1,<r6=int6464#14
+# asm 2: subpd <carry=%xmm0,<r6=%xmm13
+subpd %xmm0,%xmm13
+
+# qhasm: *(int128 *)(rop +  96) =  r6
+# asm 1: movdqa <r6=int6464#14,96(<rop=int64#1)
+# asm 2: movdqa <r6=%xmm13,96(<rop=%rdi)
+movdqa %xmm13,96(%rdi)
+
+# qhasm: carry = r9
+# asm 1: movdqa <r9=int6464#15,>carry=int6464#1
+# asm 2: movdqa <r9=%xmm14,>carry=%xmm0
+movdqa %xmm14,%xmm0
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#1
+# asm 2: mulpd VINV_VINV,<carry=%xmm0
+mulpd VINV_VINV,%xmm0
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#1
+# asm 2: addpd <round=%xmm3,<carry=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#1
+# asm 2: subpd <round=%xmm3,<carry=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 r10 += carry
+# asm 1: addpd <carry=int6464#1,<r10=int6464#11
+# asm 2: addpd <carry=%xmm0,<r10=%xmm10
+addpd %xmm0,%xmm10
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#1
+# asm 2: mulpd V_V,<carry=%xmm0
+mulpd V_V,%xmm0
+
+# qhasm: float6464 r9 -= carry
+# asm 1: subpd <carry=int6464#1,<r9=int6464#15
+# asm 2: subpd <carry=%xmm0,<r9=%xmm14
+subpd %xmm0,%xmm14
+
+# qhasm: *(int128 *)(rop + 144) =  r9
+# asm 1: movdqa <r9=int6464#15,144(<rop=int64#1)
+# asm 2: movdqa <r9=%xmm14,144(<rop=%rdi)
+movdqa %xmm14,144(%rdi)
+
+# qhasm: carry = r1
+# asm 1: movdqa <r1=int6464#2,>carry=int6464#1
+# asm 2: movdqa <r1=%xmm1,>carry=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#1
+# asm 2: mulpd VINV_VINV,<carry=%xmm0
+mulpd VINV_VINV,%xmm0
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#1
+# asm 2: addpd <round=%xmm3,<carry=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#1
+# asm 2: subpd <round=%xmm3,<carry=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 r2 += carry
+# asm 1: addpd <carry=int6464#1,<r2=int6464#3
+# asm 2: addpd <carry=%xmm0,<r2=%xmm2
+addpd %xmm0,%xmm2
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#1
+# asm 2: mulpd V_V,<carry=%xmm0
+mulpd V_V,%xmm0
+
+# qhasm: float6464 r1 -= carry
+# asm 1: subpd <carry=int6464#1,<r1=int6464#2
+# asm 2: subpd <carry=%xmm0,<r1=%xmm1
+subpd %xmm0,%xmm1
+
+# qhasm: *(int128 *)(rop +  16) =  r1
+# asm 1: movdqa <r1=int6464#2,16(<rop=int64#1)
+# asm 2: movdqa <r1=%xmm1,16(<rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(rop +  32) =  r2
+# asm 1: movdqa <r2=int6464#3,32(<rop=int64#1)
+# asm 2: movdqa <r2=%xmm2,32(<rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: carry = r4
+# asm 1: movdqa <r4=int6464#5,>carry=int6464#1
+# asm 2: movdqa <r4=%xmm4,>carry=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#1
+# asm 2: mulpd VINV_VINV,<carry=%xmm0
+mulpd VINV_VINV,%xmm0
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#1
+# asm 2: addpd <round=%xmm3,<carry=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#1
+# asm 2: subpd <round=%xmm3,<carry=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 r5 += carry
+# asm 1: addpd <carry=int6464#1,<r5=int6464#6
+# asm 2: addpd <carry=%xmm0,<r5=%xmm5
+addpd %xmm0,%xmm5
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#1
+# asm 2: mulpd V_V,<carry=%xmm0
+mulpd V_V,%xmm0
+
+# qhasm: float6464 r4 -= carry
+# asm 1: subpd <carry=int6464#1,<r4=int6464#5
+# asm 2: subpd <carry=%xmm0,<r4=%xmm4
+subpd %xmm0,%xmm4
+
+# qhasm: *(int128 *)(rop +  64) =  r4
+# asm 1: movdqa <r4=int6464#5,64(<rop=int64#1)
+# asm 2: movdqa <r4=%xmm4,64(<rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(rop +  80) =  r5
+# asm 1: movdqa <r5=int6464#6,80(<rop=int64#1)
+# asm 2: movdqa <r5=%xmm5,80(<rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: carry = r7
+# asm 1: movdqa <r7=int6464#8,>carry=int6464#1
+# asm 2: movdqa <r7=%xmm7,>carry=%xmm0
+movdqa %xmm7,%xmm0
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#1
+# asm 2: mulpd VINV_VINV,<carry=%xmm0
+mulpd VINV_VINV,%xmm0
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#1
+# asm 2: addpd <round=%xmm3,<carry=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#1
+# asm 2: subpd <round=%xmm3,<carry=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 r8 += carry
+# asm 1: addpd <carry=int6464#1,<r8=int6464#9
+# asm 2: addpd <carry=%xmm0,<r8=%xmm8
+addpd %xmm0,%xmm8
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#1
+# asm 2: mulpd V_V,<carry=%xmm0
+mulpd V_V,%xmm0
+
+# qhasm: float6464 r7 -= carry
+# asm 1: subpd <carry=int6464#1,<r7=int6464#8
+# asm 2: subpd <carry=%xmm0,<r7=%xmm7
+subpd %xmm0,%xmm7
+
+# qhasm: *(int128 *)(rop + 112) =  r7
+# asm 1: movdqa <r7=int6464#8,112(<rop=int64#1)
+# asm 2: movdqa <r7=%xmm7,112(<rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(rop + 128) =  r8
+# asm 1: movdqa <r8=int6464#9,128(<rop=int64#1)
+# asm 2: movdqa <r8=%xmm8,128(<rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: carry = r10
+# asm 1: movdqa <r10=int6464#11,>carry=int6464#1
+# asm 2: movdqa <r10=%xmm10,>carry=%xmm0
+movdqa %xmm10,%xmm0
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#1
+# asm 2: mulpd VINV_VINV,<carry=%xmm0
+mulpd VINV_VINV,%xmm0
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#1
+# asm 2: addpd <round=%xmm3,<carry=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#1
+# asm 2: subpd <round=%xmm3,<carry=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 r11 += carry
+# asm 1: addpd <carry=int6464#1,<r11=int6464#12
+# asm 2: addpd <carry=%xmm0,<r11=%xmm11
+addpd %xmm0,%xmm11
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#1
+# asm 2: mulpd V_V,<carry=%xmm0
+mulpd V_V,%xmm0
+
+# qhasm: float6464 r10 -= carry
+# asm 1: subpd <carry=int6464#1,<r10=int6464#11
+# asm 2: subpd <carry=%xmm0,<r10=%xmm10
+subpd %xmm0,%xmm10
+
+# qhasm: *(int128 *)(rop + 160) = r10
+# asm 1: movdqa <r10=int6464#11,160(<rop=int64#1)
+# asm 2: movdqa <r10=%xmm10,160(<rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(rop + 176) = r11
+# asm 1: movdqa <r11=int6464#12,176(<rop=int64#1)
+# asm 2: movdqa <r11=%xmm11,176(<rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 4233 - 0
dclxvi-20130329/fp2e_mul_fpe.s

@@ -0,0 +1,4233 @@
+# File:   dclxvi-20130329/fp2e_mul_fpe.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fp2e_mul_fpe_qhasm
+.text
+.p2align 5
+.globl _fp2e_mul_fpe_qhasm
+.globl fp2e_mul_fpe_qhasm
+_fp2e_mul_fpe_qhasm:
+fp2e_mul_fpe_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $768,%r11
+sub %r11,%rsp
+
+# qhasm: int64 rop
+
+# qhasm: int64 op1
+
+# qhasm: int64 op2
+
+# qhasm: input rop
+
+# qhasm: input op1
+
+# qhasm: input op2
+
+# qhasm: stack6144 0mys
+
+# qhasm: int64 0mysp
+
+# qhasm: 0mysp = &0mys
+# asm 1: leaq <0mys=stack6144#1,>0mysp=int64#4
+# asm 2: leaq <0mys=0(%rsp),>0mysp=%rcx
+leaq 0(%rsp),%rcx
+
+# qhasm: int64 c1
+
+# qhasm: int64 c2
+
+# qhasm: int64 c3
+
+# qhasm: int64 c4
+
+# qhasm: int64 c5
+
+# qhasm: int64 c6
+
+# qhasm: int64 c7
+
+# qhasm: caller c1
+
+# qhasm: caller c2
+
+# qhasm: caller c3
+
+# qhasm: caller c4
+
+# qhasm: caller c5
+
+# qhasm: caller c6
+
+# qhasm: caller c7
+
+# qhasm: stack64 c1_stack
+
+# qhasm: stack64 c2_stack
+
+# qhasm: stack64 c3_stack
+
+# qhasm: stack64 c4_stack
+
+# qhasm: stack64 c5_stack
+
+# qhasm: stack64 c6_stack
+
+# qhasm: stack64 c7_stack
+
+# qhasm: int6464 r0
+
+# qhasm: int6464 r1
+
+# qhasm: int6464 r2
+
+# qhasm: int6464 r3
+
+# qhasm: int6464 r4
+
+# qhasm: int6464 r5
+
+# qhasm: int6464 r6
+
+# qhasm: int6464 r7
+
+# qhasm: int6464 r8
+
+# qhasm: int6464 r9
+
+# qhasm: int6464 r10
+
+# qhasm: int6464 r11
+
+# qhasm: int6464 0t12
+
+# qhasm: int6464 0t13
+
+# qhasm: int6464 0t14
+
+# qhasm: int6464 0t15
+
+# qhasm: int6464 0t16
+
+# qhasm: int6464 0t17
+
+# qhasm: int6464 0t18
+
+# qhasm: int6464 0t19
+
+# qhasm: int6464 0t20
+
+# qhasm: int6464 0t21
+
+# qhasm: int6464 0t22
+
+# qhasm: int6464 0t
+
+# qhasm: int64 1mysp
+
+# qhasm: int64 0arg1p
+
+# qhasm: 1mysp = 0mysp
+# asm 1: mov  <0mysp=int64#4,>1mysp=int64#4
+# asm 2: mov  <0mysp=%rcx,>1mysp=%rcx
+mov  %rcx,%rcx
+
+# qhasm: 0arg1p = 1mysp+576
+# asm 1: lea  576(<1mysp=int64#4),>0arg1p=int64#5
+# asm 2: lea  576(<1mysp=%rcx),>0arg1p=%r8
+lea  576(%rcx),%r8
+
+# qhasm: float6464 0t[0] = 0t[1] = *(float64 *)(op2 + 0)
+# asm 1: movddup 0(<op2=int64#3),>0t=int6464#1
+# asm 2: movddup 0(<op2=%rdx),>0t=%xmm0
+movddup 0(%rdx),%xmm0
+
+# qhasm: *(int128 *)(0arg1p + 0) = 0t
+# asm 1: movdqa <0t=int6464#1,0(<0arg1p=int64#5)
+# asm 2: movdqa <0t=%xmm0,0(<0arg1p=%r8)
+movdqa %xmm0,0(%r8)
+
+# qhasm: float6464 0t[0] = 0t[1] = *(float64 *)(op2 + 8)
+# asm 1: movddup 8(<op2=int64#3),>0t=int6464#1
+# asm 2: movddup 8(<op2=%rdx),>0t=%xmm0
+movddup 8(%rdx),%xmm0
+
+# qhasm: *(int128 *)(0arg1p + 16) = 0t
+# asm 1: movdqa <0t=int6464#1,16(<0arg1p=int64#5)
+# asm 2: movdqa <0t=%xmm0,16(<0arg1p=%r8)
+movdqa %xmm0,16(%r8)
+
+# qhasm: float6464 0t[0] = 0t[1] = *(float64 *)(op2 + 16)
+# asm 1: movddup 16(<op2=int64#3),>0t=int6464#1
+# asm 2: movddup 16(<op2=%rdx),>0t=%xmm0
+movddup 16(%rdx),%xmm0
+
+# qhasm: *(int128 *)(0arg1p + 32) = 0t
+# asm 1: movdqa <0t=int6464#1,32(<0arg1p=int64#5)
+# asm 2: movdqa <0t=%xmm0,32(<0arg1p=%r8)
+movdqa %xmm0,32(%r8)
+
+# qhasm: float6464 0t[0] = 0t[1] = *(float64 *)(op2 + 24)
+# asm 1: movddup 24(<op2=int64#3),>0t=int6464#1
+# asm 2: movddup 24(<op2=%rdx),>0t=%xmm0
+movddup 24(%rdx),%xmm0
+
+# qhasm: *(int128 *)(0arg1p + 48) = 0t
+# asm 1: movdqa <0t=int6464#1,48(<0arg1p=int64#5)
+# asm 2: movdqa <0t=%xmm0,48(<0arg1p=%r8)
+movdqa %xmm0,48(%r8)
+
+# qhasm: float6464 0t[0] = 0t[1] = *(float64 *)(op2 + 32)
+# asm 1: movddup 32(<op2=int64#3),>0t=int6464#1
+# asm 2: movddup 32(<op2=%rdx),>0t=%xmm0
+movddup 32(%rdx),%xmm0
+
+# qhasm: *(int128 *)(0arg1p + 64) = 0t
+# asm 1: movdqa <0t=int6464#1,64(<0arg1p=int64#5)
+# asm 2: movdqa <0t=%xmm0,64(<0arg1p=%r8)
+movdqa %xmm0,64(%r8)
+
+# qhasm: float6464 0t[0] = 0t[1] = *(float64 *)(op2 + 40)
+# asm 1: movddup 40(<op2=int64#3),>0t=int6464#1
+# asm 2: movddup 40(<op2=%rdx),>0t=%xmm0
+movddup 40(%rdx),%xmm0
+
+# qhasm: *(int128 *)(0arg1p + 80) = 0t
+# asm 1: movdqa <0t=int6464#1,80(<0arg1p=int64#5)
+# asm 2: movdqa <0t=%xmm0,80(<0arg1p=%r8)
+movdqa %xmm0,80(%r8)
+
+# qhasm: float6464 0t[0] = 0t[1] = *(float64 *)(op2 + 48)
+# asm 1: movddup 48(<op2=int64#3),>0t=int6464#1
+# asm 2: movddup 48(<op2=%rdx),>0t=%xmm0
+movddup 48(%rdx),%xmm0
+
+# qhasm: *(int128 *)(0arg1p + 96) = 0t
+# asm 1: movdqa <0t=int6464#1,96(<0arg1p=int64#5)
+# asm 2: movdqa <0t=%xmm0,96(<0arg1p=%r8)
+movdqa %xmm0,96(%r8)
+
+# qhasm: float6464 0t[0] = 0t[1] = *(float64 *)(op2 + 56)
+# asm 1: movddup 56(<op2=int64#3),>0t=int6464#1
+# asm 2: movddup 56(<op2=%rdx),>0t=%xmm0
+movddup 56(%rdx),%xmm0
+
+# qhasm: *(int128 *)(0arg1p + 112) = 0t
+# asm 1: movdqa <0t=int6464#1,112(<0arg1p=int64#5)
+# asm 2: movdqa <0t=%xmm0,112(<0arg1p=%r8)
+movdqa %xmm0,112(%r8)
+
+# qhasm: float6464 0t[0] = 0t[1] = *(float64 *)(op2 + 64)
+# asm 1: movddup 64(<op2=int64#3),>0t=int6464#1
+# asm 2: movddup 64(<op2=%rdx),>0t=%xmm0
+movddup 64(%rdx),%xmm0
+
+# qhasm: *(int128 *)(0arg1p + 128) = 0t
+# asm 1: movdqa <0t=int6464#1,128(<0arg1p=int64#5)
+# asm 2: movdqa <0t=%xmm0,128(<0arg1p=%r8)
+movdqa %xmm0,128(%r8)
+
+# qhasm: float6464 0t[0] = 0t[1] = *(float64 *)(op2 + 72)
+# asm 1: movddup 72(<op2=int64#3),>0t=int6464#1
+# asm 2: movddup 72(<op2=%rdx),>0t=%xmm0
+movddup 72(%rdx),%xmm0
+
+# qhasm: *(int128 *)(0arg1p + 144) = 0t
+# asm 1: movdqa <0t=int6464#1,144(<0arg1p=int64#5)
+# asm 2: movdqa <0t=%xmm0,144(<0arg1p=%r8)
+movdqa %xmm0,144(%r8)
+
+# qhasm: float6464 0t[0] = 0t[1] = *(float64 *)(op2 + 80)
+# asm 1: movddup 80(<op2=int64#3),>0t=int6464#1
+# asm 2: movddup 80(<op2=%rdx),>0t=%xmm0
+movddup 80(%rdx),%xmm0
+
+# qhasm: *(int128 *)(0arg1p + 160) = 0t
+# asm 1: movdqa <0t=int6464#1,160(<0arg1p=int64#5)
+# asm 2: movdqa <0t=%xmm0,160(<0arg1p=%r8)
+movdqa %xmm0,160(%r8)
+
+# qhasm: float6464 0t[0] = 0t[1] = *(float64 *)(op2 + 88)
+# asm 1: movddup 88(<op2=int64#3),>0t=int6464#1
+# asm 2: movddup 88(<op2=%rdx),>0t=%xmm0
+movddup 88(%rdx),%xmm0
+
+# qhasm: *(int128 *)(0arg1p + 176) = 0t
+# asm 1: movdqa <0t=int6464#1,176(<0arg1p=int64#5)
+# asm 2: movdqa <0t=%xmm0,176(<0arg1p=%r8)
+movdqa %xmm0,176(%r8)
+
+# qhasm: int6464 0yoff
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: int6464 0t0
+
+# qhasm: int6464 0t1
+
+# qhasm: int6464 0t2
+
+# qhasm: int6464 0t3
+
+# qhasm: int6464 0t4
+
+# qhasm: int6464 0t5
+
+# qhasm: int6464 0t6
+
+# qhasm: int6464 0t7
+
+# qhasm: int6464 0t8
+
+# qhasm: int6464 0t9
+
+# qhasm: int6464 0t10
+
+# qhasm: int6464 0t11
+
+# qhasm: int6464 1t12
+
+# qhasm: int6464 1t13
+
+# qhasm: int6464 1t14
+
+# qhasm: int6464 1t15
+
+# qhasm: int6464 1t16
+
+# qhasm: int6464 1t17
+
+# qhasm: int6464 1t18
+
+# qhasm: int6464 1t19
+
+# qhasm: int6464 1t20
+
+# qhasm: int6464 1t21
+
+# qhasm: int6464 1t22
+
+# qhasm: int6464 0ab0
+
+# qhasm: int6464 0ab1
+
+# qhasm: int6464 0ab2
+
+# qhasm: int6464 0ab3
+
+# qhasm: int6464 0ab4
+
+# qhasm: int6464 0ab5
+
+# qhasm: int6464 0ab6
+
+# qhasm: int6464 0ab7
+
+# qhasm: int6464 0ab8
+
+# qhasm: int6464 0ab9
+
+# qhasm: int6464 0ab10
+
+# qhasm: int6464 0ab11
+
+# qhasm: int6464 0ab0six
+
+# qhasm: int6464 0ab1six
+
+# qhasm: int6464 0ab2six
+
+# qhasm: int6464 0ab3six
+
+# qhasm: int6464 0ab4six
+
+# qhasm: int6464 0ab5six
+
+# qhasm: int6464 0ab6six
+
+# qhasm: int6464 0ab7six
+
+# qhasm: int6464 0ab8six
+
+# qhasm: int6464 0ab9six
+
+# qhasm: int6464 0ab10six
+
+# qhasm: int6464 0ab11six
+
+# qhasm: 0ab0 = *(int128 *)(0arg1p + 0)
+# asm 1: movdqa 0(<0arg1p=int64#5),>0ab0=int6464#1
+# asm 2: movdqa 0(<0arg1p=%r8),>0ab0=%xmm0
+movdqa 0(%r8),%xmm0
+
+# qhasm: 0t0 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t0=int6464#2
+# asm 2: movdqa <0ab0=%xmm0,>0t0=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: float6464 0t0 *= *(int128 *)(op1 + 0)
+# asm 1: mulpd 0(<op1=int64#2),<0t0=int6464#2
+# asm 2: mulpd 0(<op1=%rsi),<0t0=%xmm1
+mulpd 0(%rsi),%xmm1
+
+# qhasm: 0r0 =0t0
+# asm 1: movdqa <0t0=int6464#2,>0r0=int6464#2
+# asm 2: movdqa <0t0=%xmm1,>0r0=%xmm1
+movdqa %xmm1,%xmm1
+
+# qhasm: 0t1 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t1=int6464#3
+# asm 2: movdqa <0ab0=%xmm0,>0t1=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: float6464 0t1 *= *(int128 *)(op1 + 16)
+# asm 1: mulpd 16(<op1=int64#2),<0t1=int6464#3
+# asm 2: mulpd 16(<op1=%rsi),<0t1=%xmm2
+mulpd 16(%rsi),%xmm2
+
+# qhasm: 0r1 =0t1
+# asm 1: movdqa <0t1=int6464#3,>0r1=int6464#3
+# asm 2: movdqa <0t1=%xmm2,>0r1=%xmm2
+movdqa %xmm2,%xmm2
+
+# qhasm: 0t2 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t2=int6464#4
+# asm 2: movdqa <0ab0=%xmm0,>0t2=%xmm3
+movdqa %xmm0,%xmm3
+
+# qhasm: float6464 0t2 *= *(int128 *)(op1 + 32)
+# asm 1: mulpd 32(<op1=int64#2),<0t2=int6464#4
+# asm 2: mulpd 32(<op1=%rsi),<0t2=%xmm3
+mulpd 32(%rsi),%xmm3
+
+# qhasm: 0r2 =0t2
+# asm 1: movdqa <0t2=int6464#4,>0r2=int6464#4
+# asm 2: movdqa <0t2=%xmm3,>0r2=%xmm3
+movdqa %xmm3,%xmm3
+
+# qhasm: 0t3 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t3=int6464#5
+# asm 2: movdqa <0ab0=%xmm0,>0t3=%xmm4
+movdqa %xmm0,%xmm4
+
+# qhasm: float6464 0t3 *= *(int128 *)(op1 + 48)
+# asm 1: mulpd 48(<op1=int64#2),<0t3=int6464#5
+# asm 2: mulpd 48(<op1=%rsi),<0t3=%xmm4
+mulpd 48(%rsi),%xmm4
+
+# qhasm: 0r3 =0t3
+# asm 1: movdqa <0t3=int6464#5,>0r3=int6464#5
+# asm 2: movdqa <0t3=%xmm4,>0r3=%xmm4
+movdqa %xmm4,%xmm4
+
+# qhasm: 0t4 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t4=int6464#6
+# asm 2: movdqa <0ab0=%xmm0,>0t4=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm: float6464 0t4 *= *(int128 *)(op1 + 64)
+# asm 1: mulpd 64(<op1=int64#2),<0t4=int6464#6
+# asm 2: mulpd 64(<op1=%rsi),<0t4=%xmm5
+mulpd 64(%rsi),%xmm5
+
+# qhasm: 0r4 =0t4
+# asm 1: movdqa <0t4=int6464#6,>0r4=int6464#6
+# asm 2: movdqa <0t4=%xmm5,>0r4=%xmm5
+movdqa %xmm5,%xmm5
+
+# qhasm: 0t5 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t5=int6464#7
+# asm 2: movdqa <0ab0=%xmm0,>0t5=%xmm6
+movdqa %xmm0,%xmm6
+
+# qhasm: float6464 0t5 *= *(int128 *)(op1 + 80)
+# asm 1: mulpd 80(<op1=int64#2),<0t5=int6464#7
+# asm 2: mulpd 80(<op1=%rsi),<0t5=%xmm6
+mulpd 80(%rsi),%xmm6
+
+# qhasm: 0r5 =0t5
+# asm 1: movdqa <0t5=int6464#7,>0r5=int6464#7
+# asm 2: movdqa <0t5=%xmm6,>0r5=%xmm6
+movdqa %xmm6,%xmm6
+
+# qhasm: 0t6 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t6=int6464#8
+# asm 2: movdqa <0ab0=%xmm0,>0t6=%xmm7
+movdqa %xmm0,%xmm7
+
+# qhasm: float6464 0t6 *= *(int128 *)(op1 + 96)
+# asm 1: mulpd 96(<op1=int64#2),<0t6=int6464#8
+# asm 2: mulpd 96(<op1=%rsi),<0t6=%xmm7
+mulpd 96(%rsi),%xmm7
+
+# qhasm: 0r6 =0t6
+# asm 1: movdqa <0t6=int6464#8,>0r6=int6464#8
+# asm 2: movdqa <0t6=%xmm7,>0r6=%xmm7
+movdqa %xmm7,%xmm7
+
+# qhasm: 0t7 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t7=int6464#9
+# asm 2: movdqa <0ab0=%xmm0,>0t7=%xmm8
+movdqa %xmm0,%xmm8
+
+# qhasm: float6464 0t7 *= *(int128 *)(op1 + 112)
+# asm 1: mulpd 112(<op1=int64#2),<0t7=int6464#9
+# asm 2: mulpd 112(<op1=%rsi),<0t7=%xmm8
+mulpd 112(%rsi),%xmm8
+
+# qhasm: 0r7 =0t7
+# asm 1: movdqa <0t7=int6464#9,>0r7=int6464#9
+# asm 2: movdqa <0t7=%xmm8,>0r7=%xmm8
+movdqa %xmm8,%xmm8
+
+# qhasm: 0t8 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t8=int6464#10
+# asm 2: movdqa <0ab0=%xmm0,>0t8=%xmm9
+movdqa %xmm0,%xmm9
+
+# qhasm: float6464 0t8 *= *(int128 *)(op1 + 128)
+# asm 1: mulpd 128(<op1=int64#2),<0t8=int6464#10
+# asm 2: mulpd 128(<op1=%rsi),<0t8=%xmm9
+mulpd 128(%rsi),%xmm9
+
+# qhasm: 0r8 =0t8
+# asm 1: movdqa <0t8=int6464#10,>0r8=int6464#10
+# asm 2: movdqa <0t8=%xmm9,>0r8=%xmm9
+movdqa %xmm9,%xmm9
+
+# qhasm: 0t9 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t9=int6464#11
+# asm 2: movdqa <0ab0=%xmm0,>0t9=%xmm10
+movdqa %xmm0,%xmm10
+
+# qhasm: float6464 0t9 *= *(int128 *)(op1 + 144)
+# asm 1: mulpd 144(<op1=int64#2),<0t9=int6464#11
+# asm 2: mulpd 144(<op1=%rsi),<0t9=%xmm10
+mulpd 144(%rsi),%xmm10
+
+# qhasm: 0r9 =0t9
+# asm 1: movdqa <0t9=int6464#11,>0r9=int6464#11
+# asm 2: movdqa <0t9=%xmm10,>0r9=%xmm10
+movdqa %xmm10,%xmm10
+
+# qhasm: 0t10 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t10=int6464#12
+# asm 2: movdqa <0ab0=%xmm0,>0t10=%xmm11
+movdqa %xmm0,%xmm11
+
+# qhasm: float6464 0t10 *= *(int128 *)(op1 + 160)
+# asm 1: mulpd 160(<op1=int64#2),<0t10=int6464#12
+# asm 2: mulpd 160(<op1=%rsi),<0t10=%xmm11
+mulpd 160(%rsi),%xmm11
+
+# qhasm: 0r10 =0t10
+# asm 1: movdqa <0t10=int6464#12,>0r10=int6464#12
+# asm 2: movdqa <0t10=%xmm11,>0r10=%xmm11
+movdqa %xmm11,%xmm11
+
+# qhasm: 0t11 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t11=int6464#1
+# asm 2: movdqa <0ab0=%xmm0,>0t11=%xmm0
+movdqa %xmm0,%xmm0
+
+# qhasm: float6464 0t11 *= *(int128 *)(op1 + 176)
+# asm 1: mulpd 176(<op1=int64#2),<0t11=int6464#1
+# asm 2: mulpd 176(<op1=%rsi),<0t11=%xmm0
+mulpd 176(%rsi),%xmm0
+
+# qhasm: 0r11 =0t11
+# asm 1: movdqa <0t11=int6464#1,>0r11=int6464#1
+# asm 2: movdqa <0t11=%xmm0,>0r11=%xmm0
+movdqa %xmm0,%xmm0
+
+# qhasm: *(int128 *)(1mysp + 0) = 0r0
+# asm 1: movdqa <0r0=int6464#2,0(<1mysp=int64#4)
+# asm 2: movdqa <0r0=%xmm1,0(<1mysp=%rcx)
+movdqa %xmm1,0(%rcx)
+
+# qhasm: 0ab1 = *(int128 *)(0arg1p + 16)
+# asm 1: movdqa 16(<0arg1p=int64#5),>0ab1=int6464#2
+# asm 2: movdqa 16(<0arg1p=%r8),>0ab1=%xmm1
+movdqa 16(%r8),%xmm1
+
+# qhasm: 0ab1six = 0ab1
+# asm 1: movdqa <0ab1=int6464#2,>0ab1six=int6464#13
+# asm 2: movdqa <0ab1=%xmm1,>0ab1six=%xmm12
+movdqa %xmm1,%xmm12
+
+# qhasm: float6464 0ab1six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0ab1six=int6464#13
+# asm 2: mulpd SIX_SIX,<0ab1six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: 0t1 = 0ab1
+# asm 1: movdqa <0ab1=int6464#2,>0t1=int6464#14
+# asm 2: movdqa <0ab1=%xmm1,>0t1=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm: float6464 0t1 *= *(int128 *)(op1 + 0)
+# asm 1: mulpd 0(<op1=int64#2),<0t1=int6464#14
+# asm 2: mulpd 0(<op1=%rsi),<0t1=%xmm13
+mulpd 0(%rsi),%xmm13
+
+# qhasm: float6464 0r1 +=0t1
+# asm 1: addpd <0t1=int6464#14,<0r1=int6464#3
+# asm 2: addpd <0t1=%xmm13,<0r1=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: 0t7 = 0ab1
+# asm 1: movdqa <0ab1=int6464#2,>0t7=int6464#2
+# asm 2: movdqa <0ab1=%xmm1,>0t7=%xmm1
+movdqa %xmm1,%xmm1
+
+# qhasm: float6464 0t7 *= *(int128 *)(op1 + 96)
+# asm 1: mulpd 96(<op1=int64#2),<0t7=int6464#2
+# asm 2: mulpd 96(<op1=%rsi),<0t7=%xmm1
+mulpd 96(%rsi),%xmm1
+
+# qhasm: float6464 0r7 +=0t7
+# asm 1: addpd <0t7=int6464#2,<0r7=int6464#9
+# asm 2: addpd <0t7=%xmm1,<0r7=%xmm8
+addpd %xmm1,%xmm8
+
+# qhasm: 0t2 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t2=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t2=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t2 *= *(int128 *)(op1 + 16)
+# asm 1: mulpd 16(<op1=int64#2),<0t2=int6464#2
+# asm 2: mulpd 16(<op1=%rsi),<0t2=%xmm1
+mulpd 16(%rsi),%xmm1
+
+# qhasm: float6464 0r2 +=0t2
+# asm 1: addpd <0t2=int6464#2,<0r2=int6464#4
+# asm 2: addpd <0t2=%xmm1,<0r2=%xmm3
+addpd %xmm1,%xmm3
+
+# qhasm: 0t3 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t3=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t3=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t3 *= *(int128 *)(op1 + 32)
+# asm 1: mulpd 32(<op1=int64#2),<0t3=int6464#2
+# asm 2: mulpd 32(<op1=%rsi),<0t3=%xmm1
+mulpd 32(%rsi),%xmm1
+
+# qhasm: float6464 0r3 +=0t3
+# asm 1: addpd <0t3=int6464#2,<0r3=int6464#5
+# asm 2: addpd <0t3=%xmm1,<0r3=%xmm4
+addpd %xmm1,%xmm4
+
+# qhasm: 0t4 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t4=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t4=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t4 *= *(int128 *)(op1 + 48)
+# asm 1: mulpd 48(<op1=int64#2),<0t4=int6464#2
+# asm 2: mulpd 48(<op1=%rsi),<0t4=%xmm1
+mulpd 48(%rsi),%xmm1
+
+# qhasm: float6464 0r4 +=0t4
+# asm 1: addpd <0t4=int6464#2,<0r4=int6464#6
+# asm 2: addpd <0t4=%xmm1,<0r4=%xmm5
+addpd %xmm1,%xmm5
+
+# qhasm: 0t5 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t5=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t5=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t5 *= *(int128 *)(op1 + 64)
+# asm 1: mulpd 64(<op1=int64#2),<0t5=int6464#2
+# asm 2: mulpd 64(<op1=%rsi),<0t5=%xmm1
+mulpd 64(%rsi),%xmm1
+
+# qhasm: float6464 0r5 +=0t5
+# asm 1: addpd <0t5=int6464#2,<0r5=int6464#7
+# asm 2: addpd <0t5=%xmm1,<0r5=%xmm6
+addpd %xmm1,%xmm6
+
+# qhasm: 0t6 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t6=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t6=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t6 *= *(int128 *)(op1 + 80)
+# asm 1: mulpd 80(<op1=int64#2),<0t6=int6464#2
+# asm 2: mulpd 80(<op1=%rsi),<0t6=%xmm1
+mulpd 80(%rsi),%xmm1
+
+# qhasm: float6464 0r6 +=0t6
+# asm 1: addpd <0t6=int6464#2,<0r6=int6464#8
+# asm 2: addpd <0t6=%xmm1,<0r6=%xmm7
+addpd %xmm1,%xmm7
+
+# qhasm: 0t8 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t8=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t8=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t8 *= *(int128 *)(op1 + 112)
+# asm 1: mulpd 112(<op1=int64#2),<0t8=int6464#2
+# asm 2: mulpd 112(<op1=%rsi),<0t8=%xmm1
+mulpd 112(%rsi),%xmm1
+
+# qhasm: float6464 0r8 +=0t8
+# asm 1: addpd <0t8=int6464#2,<0r8=int6464#10
+# asm 2: addpd <0t8=%xmm1,<0r8=%xmm9
+addpd %xmm1,%xmm9
+
+# qhasm: 0t9 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t9=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t9=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t9 *= *(int128 *)(op1 + 128)
+# asm 1: mulpd 128(<op1=int64#2),<0t9=int6464#2
+# asm 2: mulpd 128(<op1=%rsi),<0t9=%xmm1
+mulpd 128(%rsi),%xmm1
+
+# qhasm: float6464 0r9 +=0t9
+# asm 1: addpd <0t9=int6464#2,<0r9=int6464#11
+# asm 2: addpd <0t9=%xmm1,<0r9=%xmm10
+addpd %xmm1,%xmm10
+
+# qhasm: 0t10 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t10=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t10=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t10 *= *(int128 *)(op1 + 144)
+# asm 1: mulpd 144(<op1=int64#2),<0t10=int6464#2
+# asm 2: mulpd 144(<op1=%rsi),<0t10=%xmm1
+mulpd 144(%rsi),%xmm1
+
+# qhasm: float6464 0r10 +=0t10
+# asm 1: addpd <0t10=int6464#2,<0r10=int6464#12
+# asm 2: addpd <0t10=%xmm1,<0r10=%xmm11
+addpd %xmm1,%xmm11
+
+# qhasm: 0t11 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t11=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t11=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t11 *= *(int128 *)(op1 + 160)
+# asm 1: mulpd 160(<op1=int64#2),<0t11=int6464#2
+# asm 2: mulpd 160(<op1=%rsi),<0t11=%xmm1
+mulpd 160(%rsi),%xmm1
+
+# qhasm: float6464 0r11 +=0t11
+# asm 1: addpd <0t11=int6464#2,<0r11=int6464#1
+# asm 2: addpd <0t11=%xmm1,<0r11=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 1t12 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>1t12=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>1t12=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 1t12 *= *(int128 *)(op1 + 176)
+# asm 1: mulpd 176(<op1=int64#2),<1t12=int6464#2
+# asm 2: mulpd 176(<op1=%rsi),<1t12=%xmm1
+mulpd 176(%rsi),%xmm1
+
+# qhasm: 0t12 =1t12
+# asm 1: movdqa <1t12=int6464#2,>0t12=int6464#2
+# asm 2: movdqa <1t12=%xmm1,>0t12=%xmm1
+movdqa %xmm1,%xmm1
+
+# qhasm: *(int128 *)(1mysp + 16) = 0r1
+# asm 1: movdqa <0r1=int6464#3,16(<1mysp=int64#4)
+# asm 2: movdqa <0r1=%xmm2,16(<1mysp=%rcx)
+movdqa %xmm2,16(%rcx)
+
+# qhasm: 0ab2 = *(int128 *)(0arg1p + 32)
+# asm 1: movdqa 32(<0arg1p=int64#5),>0ab2=int6464#3
+# asm 2: movdqa 32(<0arg1p=%r8),>0ab2=%xmm2
+movdqa 32(%r8),%xmm2
+
+# qhasm: 0ab2six = 0ab2
+# asm 1: movdqa <0ab2=int6464#3,>0ab2six=int6464#13
+# asm 2: movdqa <0ab2=%xmm2,>0ab2six=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm: float6464 0ab2six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0ab2six=int6464#13
+# asm 2: mulpd SIX_SIX,<0ab2six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: 0t2 = 0ab2
+# asm 1: movdqa <0ab2=int6464#3,>0t2=int6464#14
+# asm 2: movdqa <0ab2=%xmm2,>0t2=%xmm13
+movdqa %xmm2,%xmm13
+
+# qhasm: float6464 0t2 *= *(int128 *)(op1 + 0)
+# asm 1: mulpd 0(<op1=int64#2),<0t2=int6464#14
+# asm 2: mulpd 0(<op1=%rsi),<0t2=%xmm13
+mulpd 0(%rsi),%xmm13
+
+# qhasm: float6464 0r2 +=0t2
+# asm 1: addpd <0t2=int6464#14,<0r2=int6464#4
+# asm 2: addpd <0t2=%xmm13,<0r2=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: 0t7 = 0ab2
+# asm 1: movdqa <0ab2=int6464#3,>0t7=int6464#14
+# asm 2: movdqa <0ab2=%xmm2,>0t7=%xmm13
+movdqa %xmm2,%xmm13
+
+# qhasm: float6464 0t7 *= *(int128 *)(op1 + 80)
+# asm 1: mulpd 80(<op1=int64#2),<0t7=int6464#14
+# asm 2: mulpd 80(<op1=%rsi),<0t7=%xmm13
+mulpd 80(%rsi),%xmm13
+
+# qhasm: float6464 0r7 +=0t7
+# asm 1: addpd <0t7=int6464#14,<0r7=int6464#9
+# asm 2: addpd <0t7=%xmm13,<0r7=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: 0t8 = 0ab2
+# asm 1: movdqa <0ab2=int6464#3,>0t8=int6464#14
+# asm 2: movdqa <0ab2=%xmm2,>0t8=%xmm13
+movdqa %xmm2,%xmm13
+
+# qhasm: float6464 0t8 *= *(int128 *)(op1 + 96)
+# asm 1: mulpd 96(<op1=int64#2),<0t8=int6464#14
+# asm 2: mulpd 96(<op1=%rsi),<0t8=%xmm13
+mulpd 96(%rsi),%xmm13
+
+# qhasm: float6464 0r8 +=0t8
+# asm 1: addpd <0t8=int6464#14,<0r8=int6464#10
+# asm 2: addpd <0t8=%xmm13,<0r8=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: 1t13 = 0ab2
+# asm 1: movdqa <0ab2=int6464#3,>1t13=int6464#3
+# asm 2: movdqa <0ab2=%xmm2,>1t13=%xmm2
+movdqa %xmm2,%xmm2
+
+# qhasm: float6464 1t13 *= *(int128 *)(op1 + 176)
+# asm 1: mulpd 176(<op1=int64#2),<1t13=int6464#3
+# asm 2: mulpd 176(<op1=%rsi),<1t13=%xmm2
+mulpd 176(%rsi),%xmm2
+
+# qhasm: 0t13 =1t13
+# asm 1: movdqa <1t13=int6464#3,>0t13=int6464#3
+# asm 2: movdqa <1t13=%xmm2,>0t13=%xmm2
+movdqa %xmm2,%xmm2
+
+# qhasm: 0t3 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>0t3=int6464#14
+# asm 2: movdqa <0ab2six=%xmm12,>0t3=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t3 *= *(int128 *)(op1 + 16)
+# asm 1: mulpd 16(<op1=int64#2),<0t3=int6464#14
+# asm 2: mulpd 16(<op1=%rsi),<0t3=%xmm13
+mulpd 16(%rsi),%xmm13
+
+# qhasm: float6464 0r3 +=0t3
+# asm 1: addpd <0t3=int6464#14,<0r3=int6464#5
+# asm 2: addpd <0t3=%xmm13,<0r3=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: 0t4 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>0t4=int6464#14
+# asm 2: movdqa <0ab2six=%xmm12,>0t4=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t4 *= *(int128 *)(op1 + 32)
+# asm 1: mulpd 32(<op1=int64#2),<0t4=int6464#14
+# asm 2: mulpd 32(<op1=%rsi),<0t4=%xmm13
+mulpd 32(%rsi),%xmm13
+
+# qhasm: float6464 0r4 +=0t4
+# asm 1: addpd <0t4=int6464#14,<0r4=int6464#6
+# asm 2: addpd <0t4=%xmm13,<0r4=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: 0t5 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>0t5=int6464#14
+# asm 2: movdqa <0ab2six=%xmm12,>0t5=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t5 *= *(int128 *)(op1 + 48)
+# asm 1: mulpd 48(<op1=int64#2),<0t5=int6464#14
+# asm 2: mulpd 48(<op1=%rsi),<0t5=%xmm13
+mulpd 48(%rsi),%xmm13
+
+# qhasm: float6464 0r5 +=0t5
+# asm 1: addpd <0t5=int6464#14,<0r5=int6464#7
+# asm 2: addpd <0t5=%xmm13,<0r5=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: 0t6 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>0t6=int6464#14
+# asm 2: movdqa <0ab2six=%xmm12,>0t6=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t6 *= *(int128 *)(op1 + 64)
+# asm 1: mulpd 64(<op1=int64#2),<0t6=int6464#14
+# asm 2: mulpd 64(<op1=%rsi),<0t6=%xmm13
+mulpd 64(%rsi),%xmm13
+
+# qhasm: float6464 0r6 +=0t6
+# asm 1: addpd <0t6=int6464#14,<0r6=int6464#8
+# asm 2: addpd <0t6=%xmm13,<0r6=%xmm7
+addpd %xmm13,%xmm7
+
+# qhasm: 0t9 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>0t9=int6464#14
+# asm 2: movdqa <0ab2six=%xmm12,>0t9=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t9 *= *(int128 *)(op1 + 112)
+# asm 1: mulpd 112(<op1=int64#2),<0t9=int6464#14
+# asm 2: mulpd 112(<op1=%rsi),<0t9=%xmm13
+mulpd 112(%rsi),%xmm13
+
+# qhasm: float6464 0r9 +=0t9
+# asm 1: addpd <0t9=int6464#14,<0r9=int6464#11
+# asm 2: addpd <0t9=%xmm13,<0r9=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: 0t10 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>0t10=int6464#14
+# asm 2: movdqa <0ab2six=%xmm12,>0t10=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t10 *= *(int128 *)(op1 + 128)
+# asm 1: mulpd 128(<op1=int64#2),<0t10=int6464#14
+# asm 2: mulpd 128(<op1=%rsi),<0t10=%xmm13
+mulpd 128(%rsi),%xmm13
+
+# qhasm: float6464 0r10 +=0t10
+# asm 1: addpd <0t10=int6464#14,<0r10=int6464#12
+# asm 2: addpd <0t10=%xmm13,<0r10=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: 0t11 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>0t11=int6464#14
+# asm 2: movdqa <0ab2six=%xmm12,>0t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t11 *= *(int128 *)(op1 + 144)
+# asm 1: mulpd 144(<op1=int64#2),<0t11=int6464#14
+# asm 2: mulpd 144(<op1=%rsi),<0t11=%xmm13
+mulpd 144(%rsi),%xmm13
+
+# qhasm: float6464 0r11 +=0t11
+# asm 1: addpd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addpd <0t11=%xmm13,<0r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: 1t12 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>1t12=int6464#13
+# asm 2: movdqa <0ab2six=%xmm12,>1t12=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 1t12 *= *(int128 *)(op1 + 160)
+# asm 1: mulpd 160(<op1=int64#2),<1t12=int6464#13
+# asm 2: mulpd 160(<op1=%rsi),<1t12=%xmm12
+mulpd 160(%rsi),%xmm12
+
+# qhasm: float6464 0t12 +=1t12
+# asm 1: addpd <1t12=int6464#13,<0t12=int6464#2
+# asm 2: addpd <1t12=%xmm12,<0t12=%xmm1
+addpd %xmm12,%xmm1
+
+# qhasm: *(int128 *)(1mysp + 32) = 0r2
+# asm 1: movdqa <0r2=int6464#4,32(<1mysp=int64#4)
+# asm 2: movdqa <0r2=%xmm3,32(<1mysp=%rcx)
+movdqa %xmm3,32(%rcx)
+
+# qhasm: 0ab3 = *(int128 *)(0arg1p + 48)
+# asm 1: movdqa 48(<0arg1p=int64#5),>0ab3=int6464#4
+# asm 2: movdqa 48(<0arg1p=%r8),>0ab3=%xmm3
+movdqa 48(%r8),%xmm3
+
+# qhasm: 0ab3six = 0ab3
+# asm 1: movdqa <0ab3=int6464#4,>0ab3six=int6464#13
+# asm 2: movdqa <0ab3=%xmm3,>0ab3six=%xmm12
+movdqa %xmm3,%xmm12
+
+# qhasm: float6464 0ab3six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0ab3six=int6464#13
+# asm 2: mulpd SIX_SIX,<0ab3six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: 0t3 = 0ab3
+# asm 1: movdqa <0ab3=int6464#4,>0t3=int6464#14
+# asm 2: movdqa <0ab3=%xmm3,>0t3=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 0t3 *= *(int128 *)(op1 + 0)
+# asm 1: mulpd 0(<op1=int64#2),<0t3=int6464#14
+# asm 2: mulpd 0(<op1=%rsi),<0t3=%xmm13
+mulpd 0(%rsi),%xmm13
+
+# qhasm: float6464 0r3 +=0t3
+# asm 1: addpd <0t3=int6464#14,<0r3=int6464#5
+# asm 2: addpd <0t3=%xmm13,<0r3=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: 0t7 = 0ab3
+# asm 1: movdqa <0ab3=int6464#4,>0t7=int6464#14
+# asm 2: movdqa <0ab3=%xmm3,>0t7=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 0t7 *= *(int128 *)(op1 + 64)
+# asm 1: mulpd 64(<op1=int64#2),<0t7=int6464#14
+# asm 2: mulpd 64(<op1=%rsi),<0t7=%xmm13
+mulpd 64(%rsi),%xmm13
+
+# qhasm: float6464 0r7 +=0t7
+# asm 1: addpd <0t7=int6464#14,<0r7=int6464#9
+# asm 2: addpd <0t7=%xmm13,<0r7=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: 0t8 = 0ab3
+# asm 1: movdqa <0ab3=int6464#4,>0t8=int6464#14
+# asm 2: movdqa <0ab3=%xmm3,>0t8=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 0t8 *= *(int128 *)(op1 + 80)
+# asm 1: mulpd 80(<op1=int64#2),<0t8=int6464#14
+# asm 2: mulpd 80(<op1=%rsi),<0t8=%xmm13
+mulpd 80(%rsi),%xmm13
+
+# qhasm: float6464 0r8 +=0t8
+# asm 1: addpd <0t8=int6464#14,<0r8=int6464#10
+# asm 2: addpd <0t8=%xmm13,<0r8=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: 0t9 = 0ab3
+# asm 1: movdqa <0ab3=int6464#4,>0t9=int6464#14
+# asm 2: movdqa <0ab3=%xmm3,>0t9=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 0t9 *= *(int128 *)(op1 + 96)
+# asm 1: mulpd 96(<op1=int64#2),<0t9=int6464#14
+# asm 2: mulpd 96(<op1=%rsi),<0t9=%xmm13
+mulpd 96(%rsi),%xmm13
+
+# qhasm: float6464 0r9 +=0t9
+# asm 1: addpd <0t9=int6464#14,<0r9=int6464#11
+# asm 2: addpd <0t9=%xmm13,<0r9=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: 1t13 = 0ab3
+# asm 1: movdqa <0ab3=int6464#4,>1t13=int6464#14
+# asm 2: movdqa <0ab3=%xmm3,>1t13=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 1t13 *= *(int128 *)(op1 + 160)
+# asm 1: mulpd 160(<op1=int64#2),<1t13=int6464#14
+# asm 2: mulpd 160(<op1=%rsi),<1t13=%xmm13
+mulpd 160(%rsi),%xmm13
+
+# qhasm: float6464 0t13 +=1t13
+# asm 1: addpd <1t13=int6464#14,<0t13=int6464#3
+# asm 2: addpd <1t13=%xmm13,<0t13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: 1t14 = 0ab3
+# asm 1: movdqa <0ab3=int6464#4,>1t14=int6464#4
+# asm 2: movdqa <0ab3=%xmm3,>1t14=%xmm3
+movdqa %xmm3,%xmm3
+
+# qhasm: float6464 1t14 *= *(int128 *)(op1 + 176)
+# asm 1: mulpd 176(<op1=int64#2),<1t14=int6464#4
+# asm 2: mulpd 176(<op1=%rsi),<1t14=%xmm3
+mulpd 176(%rsi),%xmm3
+
+# qhasm: 0t14 =1t14
+# asm 1: movdqa <1t14=int6464#4,>0t14=int6464#4
+# asm 2: movdqa <1t14=%xmm3,>0t14=%xmm3
+movdqa %xmm3,%xmm3
+
+# qhasm: 0t4 = 0ab3six
+# asm 1: movdqa <0ab3six=int6464#13,>0t4=int6464#14
+# asm 2: movdqa <0ab3six=%xmm12,>0t4=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t4 *= *(int128 *)(op1 + 16)
+# asm 1: mulpd 16(<op1=int64#2),<0t4=int6464#14
+# asm 2: mulpd 16(<op1=%rsi),<0t4=%xmm13
+mulpd 16(%rsi),%xmm13
+
+# qhasm: float6464 0r4 +=0t4
+# asm 1: addpd <0t4=int6464#14,<0r4=int6464#6
+# asm 2: addpd <0t4=%xmm13,<0r4=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: 0t5 = 0ab3six
+# asm 1: movdqa <0ab3six=int6464#13,>0t5=int6464#14
+# asm 2: movdqa <0ab3six=%xmm12,>0t5=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t5 *= *(int128 *)(op1 + 32)
+# asm 1: mulpd 32(<op1=int64#2),<0t5=int6464#14
+# asm 2: mulpd 32(<op1=%rsi),<0t5=%xmm13
+mulpd 32(%rsi),%xmm13
+
+# qhasm: float6464 0r5 +=0t5
+# asm 1: addpd <0t5=int6464#14,<0r5=int6464#7
+# asm 2: addpd <0t5=%xmm13,<0r5=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: 0t6 = 0ab3six
+# asm 1: movdqa <0ab3six=int6464#13,>0t6=int6464#14
+# asm 2: movdqa <0ab3six=%xmm12,>0t6=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t6 *= *(int128 *)(op1 + 48)
+# asm 1: mulpd 48(<op1=int64#2),<0t6=int6464#14
+# asm 2: mulpd 48(<op1=%rsi),<0t6=%xmm13
+mulpd 48(%rsi),%xmm13
+
+# qhasm: float6464 0r6 +=0t6
+# asm 1: addpd <0t6=int6464#14,<0r6=int6464#8
+# asm 2: addpd <0t6=%xmm13,<0r6=%xmm7
+addpd %xmm13,%xmm7
+
+# qhasm: 0t10 = 0ab3six
+# asm 1: movdqa <0ab3six=int6464#13,>0t10=int6464#14
+# asm 2: movdqa <0ab3six=%xmm12,>0t10=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t10 *= *(int128 *)(op1 + 112)
+# asm 1: mulpd 112(<op1=int64#2),<0t10=int6464#14
+# asm 2: mulpd 112(<op1=%rsi),<0t10=%xmm13
+mulpd 112(%rsi),%xmm13
+
+# qhasm: float6464 0r10 +=0t10
+# asm 1: addpd <0t10=int6464#14,<0r10=int6464#12
+# asm 2: addpd <0t10=%xmm13,<0r10=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: 0t11 = 0ab3six
+# asm 1: movdqa <0ab3six=int6464#13,>0t11=int6464#14
+# asm 2: movdqa <0ab3six=%xmm12,>0t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t11 *= *(int128 *)(op1 + 128)
+# asm 1: mulpd 128(<op1=int64#2),<0t11=int6464#14
+# asm 2: mulpd 128(<op1=%rsi),<0t11=%xmm13
+mulpd 128(%rsi),%xmm13
+
+# qhasm: float6464 0r11 +=0t11
+# asm 1: addpd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addpd <0t11=%xmm13,<0r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: 1t12 = 0ab3six
+# asm 1: movdqa <0ab3six=int6464#13,>1t12=int6464#13
+# asm 2: movdqa <0ab3six=%xmm12,>1t12=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 1t12 *= *(int128 *)(op1 + 144)
+# asm 1: mulpd 144(<op1=int64#2),<1t12=int6464#13
+# asm 2: mulpd 144(<op1=%rsi),<1t12=%xmm12
+mulpd 144(%rsi),%xmm12
+
+# qhasm: float6464 0t12 +=1t12
+# asm 1: addpd <1t12=int6464#13,<0t12=int6464#2
+# asm 2: addpd <1t12=%xmm12,<0t12=%xmm1
+addpd %xmm12,%xmm1
+
+# qhasm: *(int128 *)(1mysp + 48) = 0r3
+# asm 1: movdqa <0r3=int6464#5,48(<1mysp=int64#4)
+# asm 2: movdqa <0r3=%xmm4,48(<1mysp=%rcx)
+movdqa %xmm4,48(%rcx)
+
+# qhasm: 0ab4 = *(int128 *)(0arg1p + 64)
+# asm 1: movdqa 64(<0arg1p=int64#5),>0ab4=int6464#5
+# asm 2: movdqa 64(<0arg1p=%r8),>0ab4=%xmm4
+movdqa 64(%r8),%xmm4
+
+# qhasm: 0ab4six = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>0ab4six=int6464#13
+# asm 2: movdqa <0ab4=%xmm4,>0ab4six=%xmm12
+movdqa %xmm4,%xmm12
+
+# qhasm: float6464 0ab4six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0ab4six=int6464#13
+# asm 2: mulpd SIX_SIX,<0ab4six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: 0t4 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>0t4=int6464#14
+# asm 2: movdqa <0ab4=%xmm4,>0t4=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0t4 *= *(int128 *)(op1 + 0)
+# asm 1: mulpd 0(<op1=int64#2),<0t4=int6464#14
+# asm 2: mulpd 0(<op1=%rsi),<0t4=%xmm13
+mulpd 0(%rsi),%xmm13
+
+# qhasm: float6464 0r4 +=0t4
+# asm 1: addpd <0t4=int6464#14,<0r4=int6464#6
+# asm 2: addpd <0t4=%xmm13,<0r4=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: 0t7 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>0t7=int6464#14
+# asm 2: movdqa <0ab4=%xmm4,>0t7=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0t7 *= *(int128 *)(op1 + 48)
+# asm 1: mulpd 48(<op1=int64#2),<0t7=int6464#14
+# asm 2: mulpd 48(<op1=%rsi),<0t7=%xmm13
+mulpd 48(%rsi),%xmm13
+
+# qhasm: float6464 0r7 +=0t7
+# asm 1: addpd <0t7=int6464#14,<0r7=int6464#9
+# asm 2: addpd <0t7=%xmm13,<0r7=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: 0t8 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>0t8=int6464#14
+# asm 2: movdqa <0ab4=%xmm4,>0t8=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0t8 *= *(int128 *)(op1 + 64)
+# asm 1: mulpd 64(<op1=int64#2),<0t8=int6464#14
+# asm 2: mulpd 64(<op1=%rsi),<0t8=%xmm13
+mulpd 64(%rsi),%xmm13
+
+# qhasm: float6464 0r8 +=0t8
+# asm 1: addpd <0t8=int6464#14,<0r8=int6464#10
+# asm 2: addpd <0t8=%xmm13,<0r8=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: 0t9 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>0t9=int6464#14
+# asm 2: movdqa <0ab4=%xmm4,>0t9=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0t9 *= *(int128 *)(op1 + 80)
+# asm 1: mulpd 80(<op1=int64#2),<0t9=int6464#14
+# asm 2: mulpd 80(<op1=%rsi),<0t9=%xmm13
+mulpd 80(%rsi),%xmm13
+
+# qhasm: float6464 0r9 +=0t9
+# asm 1: addpd <0t9=int6464#14,<0r9=int6464#11
+# asm 2: addpd <0t9=%xmm13,<0r9=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: 0t10 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>0t10=int6464#14
+# asm 2: movdqa <0ab4=%xmm4,>0t10=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0t10 *= *(int128 *)(op1 + 96)
+# asm 1: mulpd 96(<op1=int64#2),<0t10=int6464#14
+# asm 2: mulpd 96(<op1=%rsi),<0t10=%xmm13
+mulpd 96(%rsi),%xmm13
+
+# qhasm: float6464 0r10 +=0t10
+# asm 1: addpd <0t10=int6464#14,<0r10=int6464#12
+# asm 2: addpd <0t10=%xmm13,<0r10=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: 1t13 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>1t13=int6464#14
+# asm 2: movdqa <0ab4=%xmm4,>1t13=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 1t13 *= *(int128 *)(op1 + 144)
+# asm 1: mulpd 144(<op1=int64#2),<1t13=int6464#14
+# asm 2: mulpd 144(<op1=%rsi),<1t13=%xmm13
+mulpd 144(%rsi),%xmm13
+
+# qhasm: float6464 0t13 +=1t13
+# asm 1: addpd <1t13=int6464#14,<0t13=int6464#3
+# asm 2: addpd <1t13=%xmm13,<0t13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: 1t14 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>1t14=int6464#14
+# asm 2: movdqa <0ab4=%xmm4,>1t14=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 1t14 *= *(int128 *)(op1 + 160)
+# asm 1: mulpd 160(<op1=int64#2),<1t14=int6464#14
+# asm 2: mulpd 160(<op1=%rsi),<1t14=%xmm13
+mulpd 160(%rsi),%xmm13
+
+# qhasm: float6464 0t14 +=1t14
+# asm 1: addpd <1t14=int6464#14,<0t14=int6464#4
+# asm 2: addpd <1t14=%xmm13,<0t14=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: 1t15 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>1t15=int6464#5
+# asm 2: movdqa <0ab4=%xmm4,>1t15=%xmm4
+movdqa %xmm4,%xmm4
+
+# qhasm: float6464 1t15 *= *(int128 *)(op1 + 176)
+# asm 1: mulpd 176(<op1=int64#2),<1t15=int6464#5
+# asm 2: mulpd 176(<op1=%rsi),<1t15=%xmm4
+mulpd 176(%rsi),%xmm4
+
+# qhasm: 0t15 =1t15
+# asm 1: movdqa <1t15=int6464#5,>0t15=int6464#5
+# asm 2: movdqa <1t15=%xmm4,>0t15=%xmm4
+movdqa %xmm4,%xmm4
+
+# qhasm: 0t5 = 0ab4six
+# asm 1: movdqa <0ab4six=int6464#13,>0t5=int6464#14
+# asm 2: movdqa <0ab4six=%xmm12,>0t5=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t5 *= *(int128 *)(op1 + 16)
+# asm 1: mulpd 16(<op1=int64#2),<0t5=int6464#14
+# asm 2: mulpd 16(<op1=%rsi),<0t5=%xmm13
+mulpd 16(%rsi),%xmm13
+
+# qhasm: float6464 0r5 +=0t5
+# asm 1: addpd <0t5=int6464#14,<0r5=int6464#7
+# asm 2: addpd <0t5=%xmm13,<0r5=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: 0t6 = 0ab4six
+# asm 1: movdqa <0ab4six=int6464#13,>0t6=int6464#14
+# asm 2: movdqa <0ab4six=%xmm12,>0t6=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t6 *= *(int128 *)(op1 + 32)
+# asm 1: mulpd 32(<op1=int64#2),<0t6=int6464#14
+# asm 2: mulpd 32(<op1=%rsi),<0t6=%xmm13
+mulpd 32(%rsi),%xmm13
+
+# qhasm: float6464 0r6 +=0t6
+# asm 1: addpd <0t6=int6464#14,<0r6=int6464#8
+# asm 2: addpd <0t6=%xmm13,<0r6=%xmm7
+addpd %xmm13,%xmm7
+
+# qhasm: 0t11 = 0ab4six
+# asm 1: movdqa <0ab4six=int6464#13,>0t11=int6464#14
+# asm 2: movdqa <0ab4six=%xmm12,>0t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t11 *= *(int128 *)(op1 + 112)
+# asm 1: mulpd 112(<op1=int64#2),<0t11=int6464#14
+# asm 2: mulpd 112(<op1=%rsi),<0t11=%xmm13
+mulpd 112(%rsi),%xmm13
+
+# qhasm: float6464 0r11 +=0t11
+# asm 1: addpd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addpd <0t11=%xmm13,<0r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: 1t12 = 0ab4six
+# asm 1: movdqa <0ab4six=int6464#13,>1t12=int6464#13
+# asm 2: movdqa <0ab4six=%xmm12,>1t12=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 1t12 *= *(int128 *)(op1 + 128)
+# asm 1: mulpd 128(<op1=int64#2),<1t12=int6464#13
+# asm 2: mulpd 128(<op1=%rsi),<1t12=%xmm12
+mulpd 128(%rsi),%xmm12
+
+# qhasm: float6464 0t12 +=1t12
+# asm 1: addpd <1t12=int6464#13,<0t12=int6464#2
+# asm 2: addpd <1t12=%xmm12,<0t12=%xmm1
+addpd %xmm12,%xmm1
+
+# qhasm: *(int128 *)(1mysp + 64) = 0r4
+# asm 1: movdqa <0r4=int6464#6,64(<1mysp=int64#4)
+# asm 2: movdqa <0r4=%xmm5,64(<1mysp=%rcx)
+movdqa %xmm5,64(%rcx)
+
+# qhasm: 0ab5 = *(int128 *)(0arg1p + 80)
+# asm 1: movdqa 80(<0arg1p=int64#5),>0ab5=int6464#6
+# asm 2: movdqa 80(<0arg1p=%r8),>0ab5=%xmm5
+movdqa 80(%r8),%xmm5
+
+# qhasm: 0ab5six = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0ab5six=int6464#13
+# asm 2: movdqa <0ab5=%xmm5,>0ab5six=%xmm12
+movdqa %xmm5,%xmm12
+
+# qhasm: float6464 0ab5six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0ab5six=int6464#13
+# asm 2: mulpd SIX_SIX,<0ab5six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: 0t5 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t5=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>0t5=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0t5 *= *(int128 *)(op1 + 0)
+# asm 1: mulpd 0(<op1=int64#2),<0t5=int6464#14
+# asm 2: mulpd 0(<op1=%rsi),<0t5=%xmm13
+mulpd 0(%rsi),%xmm13
+
+# qhasm: float6464 0r5 +=0t5
+# asm 1: addpd <0t5=int6464#14,<0r5=int6464#7
+# asm 2: addpd <0t5=%xmm13,<0r5=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: 0t7 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t7=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>0t7=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0t7 *= *(int128 *)(op1 + 32)
+# asm 1: mulpd 32(<op1=int64#2),<0t7=int6464#14
+# asm 2: mulpd 32(<op1=%rsi),<0t7=%xmm13
+mulpd 32(%rsi),%xmm13
+
+# qhasm: float6464 0r7 +=0t7
+# asm 1: addpd <0t7=int6464#14,<0r7=int6464#9
+# asm 2: addpd <0t7=%xmm13,<0r7=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: 0t8 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t8=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>0t8=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0t8 *= *(int128 *)(op1 + 48)
+# asm 1: mulpd 48(<op1=int64#2),<0t8=int6464#14
+# asm 2: mulpd 48(<op1=%rsi),<0t8=%xmm13
+mulpd 48(%rsi),%xmm13
+
+# qhasm: float6464 0r8 +=0t8
+# asm 1: addpd <0t8=int6464#14,<0r8=int6464#10
+# asm 2: addpd <0t8=%xmm13,<0r8=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: 0t9 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t9=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>0t9=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0t9 *= *(int128 *)(op1 + 64)
+# asm 1: mulpd 64(<op1=int64#2),<0t9=int6464#14
+# asm 2: mulpd 64(<op1=%rsi),<0t9=%xmm13
+mulpd 64(%rsi),%xmm13
+
+# qhasm: float6464 0r9 +=0t9
+# asm 1: addpd <0t9=int6464#14,<0r9=int6464#11
+# asm 2: addpd <0t9=%xmm13,<0r9=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: 0t10 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t10=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>0t10=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0t10 *= *(int128 *)(op1 + 80)
+# asm 1: mulpd 80(<op1=int64#2),<0t10=int6464#14
+# asm 2: mulpd 80(<op1=%rsi),<0t10=%xmm13
+mulpd 80(%rsi),%xmm13
+
+# qhasm: float6464 0r10 +=0t10
+# asm 1: addpd <0t10=int6464#14,<0r10=int6464#12
+# asm 2: addpd <0t10=%xmm13,<0r10=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: 0t11 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t11=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>0t11=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0t11 *= *(int128 *)(op1 + 96)
+# asm 1: mulpd 96(<op1=int64#2),<0t11=int6464#14
+# asm 2: mulpd 96(<op1=%rsi),<0t11=%xmm13
+mulpd 96(%rsi),%xmm13
+
+# qhasm: float6464 0r11 +=0t11
+# asm 1: addpd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addpd <0t11=%xmm13,<0r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: 1t13 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>1t13=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>1t13=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 1t13 *= *(int128 *)(op1 + 128)
+# asm 1: mulpd 128(<op1=int64#2),<1t13=int6464#14
+# asm 2: mulpd 128(<op1=%rsi),<1t13=%xmm13
+mulpd 128(%rsi),%xmm13
+
+# qhasm: float6464 0t13 +=1t13
+# asm 1: addpd <1t13=int6464#14,<0t13=int6464#3
+# asm 2: addpd <1t13=%xmm13,<0t13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: 1t14 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>1t14=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>1t14=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 1t14 *= *(int128 *)(op1 + 144)
+# asm 1: mulpd 144(<op1=int64#2),<1t14=int6464#14
+# asm 2: mulpd 144(<op1=%rsi),<1t14=%xmm13
+mulpd 144(%rsi),%xmm13
+
+# qhasm: float6464 0t14 +=1t14
+# asm 1: addpd <1t14=int6464#14,<0t14=int6464#4
+# asm 2: addpd <1t14=%xmm13,<0t14=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: 1t15 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>1t15=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>1t15=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 1t15 *= *(int128 *)(op1 + 160)
+# asm 1: mulpd 160(<op1=int64#2),<1t15=int6464#14
+# asm 2: mulpd 160(<op1=%rsi),<1t15=%xmm13
+mulpd 160(%rsi),%xmm13
+
+# qhasm: float6464 0t15 +=1t15
+# asm 1: addpd <1t15=int6464#14,<0t15=int6464#5
+# asm 2: addpd <1t15=%xmm13,<0t15=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: 1t16 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>1t16=int6464#6
+# asm 2: movdqa <0ab5=%xmm5,>1t16=%xmm5
+movdqa %xmm5,%xmm5
+
+# qhasm: float6464 1t16 *= *(int128 *)(op1 + 176)
+# asm 1: mulpd 176(<op1=int64#2),<1t16=int6464#6
+# asm 2: mulpd 176(<op1=%rsi),<1t16=%xmm5
+mulpd 176(%rsi),%xmm5
+
+# qhasm: 0t16 =1t16
+# asm 1: movdqa <1t16=int6464#6,>0t16=int6464#6
+# asm 2: movdqa <1t16=%xmm5,>0t16=%xmm5
+movdqa %xmm5,%xmm5
+
+# qhasm: 0t6 = 0ab5six
+# asm 1: movdqa <0ab5six=int6464#13,>0t6=int6464#14
+# asm 2: movdqa <0ab5six=%xmm12,>0t6=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t6 *= *(int128 *)(op1 + 16)
+# asm 1: mulpd 16(<op1=int64#2),<0t6=int6464#14
+# asm 2: mulpd 16(<op1=%rsi),<0t6=%xmm13
+mulpd 16(%rsi),%xmm13
+
+# qhasm: float6464 0r6 +=0t6
+# asm 1: addpd <0t6=int6464#14,<0r6=int6464#8
+# asm 2: addpd <0t6=%xmm13,<0r6=%xmm7
+addpd %xmm13,%xmm7
+
+# qhasm: 1t12 = 0ab5six
+# asm 1: movdqa <0ab5six=int6464#13,>1t12=int6464#13
+# asm 2: movdqa <0ab5six=%xmm12,>1t12=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 1t12 *= *(int128 *)(op1 + 112)
+# asm 1: mulpd 112(<op1=int64#2),<1t12=int6464#13
+# asm 2: mulpd 112(<op1=%rsi),<1t12=%xmm12
+mulpd 112(%rsi),%xmm12
+
+# qhasm: float6464 0t12 +=1t12
+# asm 1: addpd <1t12=int6464#13,<0t12=int6464#2
+# asm 2: addpd <1t12=%xmm12,<0t12=%xmm1
+addpd %xmm12,%xmm1
+
+# qhasm: *(int128 *)(1mysp + 80) = 0r5
+# asm 1: movdqa <0r5=int6464#7,80(<1mysp=int64#4)
+# asm 2: movdqa <0r5=%xmm6,80(<1mysp=%rcx)
+movdqa %xmm6,80(%rcx)
+
+# qhasm: 0ab6 = *(int128 *)(0arg1p + 96)
+# asm 1: movdqa 96(<0arg1p=int64#5),>0ab6=int6464#7
+# asm 2: movdqa 96(<0arg1p=%r8),>0ab6=%xmm6
+movdqa 96(%r8),%xmm6
+
+# qhasm: 0t6 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t6=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t6=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t6 *= *(int128 *)(op1 + 0)
+# asm 1: mulpd 0(<op1=int64#2),<0t6=int6464#13
+# asm 2: mulpd 0(<op1=%rsi),<0t6=%xmm12
+mulpd 0(%rsi),%xmm12
+
+# qhasm: float6464 0r6 +=0t6
+# asm 1: addpd <0t6=int6464#13,<0r6=int6464#8
+# asm 2: addpd <0t6=%xmm12,<0r6=%xmm7
+addpd %xmm12,%xmm7
+
+# qhasm: 0t7 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t7=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t7=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t7 *= *(int128 *)(op1 + 16)
+# asm 1: mulpd 16(<op1=int64#2),<0t7=int6464#13
+# asm 2: mulpd 16(<op1=%rsi),<0t7=%xmm12
+mulpd 16(%rsi),%xmm12
+
+# qhasm: float6464 0r7 +=0t7
+# asm 1: addpd <0t7=int6464#13,<0r7=int6464#9
+# asm 2: addpd <0t7=%xmm12,<0r7=%xmm8
+addpd %xmm12,%xmm8
+
+# qhasm: 0t8 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t8=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t8=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t8 *= *(int128 *)(op1 + 32)
+# asm 1: mulpd 32(<op1=int64#2),<0t8=int6464#13
+# asm 2: mulpd 32(<op1=%rsi),<0t8=%xmm12
+mulpd 32(%rsi),%xmm12
+
+# qhasm: float6464 0r8 +=0t8
+# asm 1: addpd <0t8=int6464#13,<0r8=int6464#10
+# asm 2: addpd <0t8=%xmm12,<0r8=%xmm9
+addpd %xmm12,%xmm9
+
+# qhasm: 0t9 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t9=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t9=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t9 *= *(int128 *)(op1 + 48)
+# asm 1: mulpd 48(<op1=int64#2),<0t9=int6464#13
+# asm 2: mulpd 48(<op1=%rsi),<0t9=%xmm12
+mulpd 48(%rsi),%xmm12
+
+# qhasm: float6464 0r9 +=0t9
+# asm 1: addpd <0t9=int6464#13,<0r9=int6464#11
+# asm 2: addpd <0t9=%xmm12,<0r9=%xmm10
+addpd %xmm12,%xmm10
+
+# qhasm: 0t10 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t10=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t10=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t10 *= *(int128 *)(op1 + 64)
+# asm 1: mulpd 64(<op1=int64#2),<0t10=int6464#13
+# asm 2: mulpd 64(<op1=%rsi),<0t10=%xmm12
+mulpd 64(%rsi),%xmm12
+
+# qhasm: float6464 0r10 +=0t10
+# asm 1: addpd <0t10=int6464#13,<0r10=int6464#12
+# asm 2: addpd <0t10=%xmm12,<0r10=%xmm11
+addpd %xmm12,%xmm11
+
+# qhasm: 0t11 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t11=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t11=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t11 *= *(int128 *)(op1 + 80)
+# asm 1: mulpd 80(<op1=int64#2),<0t11=int6464#13
+# asm 2: mulpd 80(<op1=%rsi),<0t11=%xmm12
+mulpd 80(%rsi),%xmm12
+
+# qhasm: float6464 0r11 +=0t11
+# asm 1: addpd <0t11=int6464#13,<0r11=int6464#1
+# asm 2: addpd <0t11=%xmm12,<0r11=%xmm0
+addpd %xmm12,%xmm0
+
+# qhasm: 1t12 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>1t12=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>1t12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 1t12 *= *(int128 *)(op1 + 96)
+# asm 1: mulpd 96(<op1=int64#2),<1t12=int6464#13
+# asm 2: mulpd 96(<op1=%rsi),<1t12=%xmm12
+mulpd 96(%rsi),%xmm12
+
+# qhasm: float6464 0t12 +=1t12
+# asm 1: addpd <1t12=int6464#13,<0t12=int6464#2
+# asm 2: addpd <1t12=%xmm12,<0t12=%xmm1
+addpd %xmm12,%xmm1
+
+# qhasm: 1t13 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>1t13=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>1t13=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 1t13 *= *(int128 *)(op1 + 112)
+# asm 1: mulpd 112(<op1=int64#2),<1t13=int6464#13
+# asm 2: mulpd 112(<op1=%rsi),<1t13=%xmm12
+mulpd 112(%rsi),%xmm12
+
+# qhasm: float6464 0t13 +=1t13
+# asm 1: addpd <1t13=int6464#13,<0t13=int6464#3
+# asm 2: addpd <1t13=%xmm12,<0t13=%xmm2
+addpd %xmm12,%xmm2
+
+# qhasm: 1t14 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>1t14=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>1t14=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 1t14 *= *(int128 *)(op1 + 128)
+# asm 1: mulpd 128(<op1=int64#2),<1t14=int6464#13
+# asm 2: mulpd 128(<op1=%rsi),<1t14=%xmm12
+mulpd 128(%rsi),%xmm12
+
+# qhasm: float6464 0t14 +=1t14
+# asm 1: addpd <1t14=int6464#13,<0t14=int6464#4
+# asm 2: addpd <1t14=%xmm12,<0t14=%xmm3
+addpd %xmm12,%xmm3
+
+# qhasm: 1t15 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>1t15=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>1t15=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 1t15 *= *(int128 *)(op1 + 144)
+# asm 1: mulpd 144(<op1=int64#2),<1t15=int6464#13
+# asm 2: mulpd 144(<op1=%rsi),<1t15=%xmm12
+mulpd 144(%rsi),%xmm12
+
+# qhasm: float6464 0t15 +=1t15
+# asm 1: addpd <1t15=int6464#13,<0t15=int6464#5
+# asm 2: addpd <1t15=%xmm12,<0t15=%xmm4
+addpd %xmm12,%xmm4
+
+# qhasm: 1t16 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>1t16=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>1t16=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 1t16 *= *(int128 *)(op1 + 160)
+# asm 1: mulpd 160(<op1=int64#2),<1t16=int6464#13
+# asm 2: mulpd 160(<op1=%rsi),<1t16=%xmm12
+mulpd 160(%rsi),%xmm12
+
+# qhasm: float6464 0t16 +=1t16
+# asm 1: addpd <1t16=int6464#13,<0t16=int6464#6
+# asm 2: addpd <1t16=%xmm12,<0t16=%xmm5
+addpd %xmm12,%xmm5
+
+# qhasm: 1t17 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>1t17=int6464#7
+# asm 2: movdqa <0ab6=%xmm6,>1t17=%xmm6
+movdqa %xmm6,%xmm6
+
+# qhasm: float6464 1t17 *= *(int128 *)(op1 + 176)
+# asm 1: mulpd 176(<op1=int64#2),<1t17=int6464#7
+# asm 2: mulpd 176(<op1=%rsi),<1t17=%xmm6
+mulpd 176(%rsi),%xmm6
+
+# qhasm: 0t17 =1t17
+# asm 1: movdqa <1t17=int6464#7,>0t17=int6464#7
+# asm 2: movdqa <1t17=%xmm6,>0t17=%xmm6
+movdqa %xmm6,%xmm6
+
+# qhasm: *(int128 *)(1mysp + 96) = 0r6
+# asm 1: movdqa <0r6=int6464#8,96(<1mysp=int64#4)
+# asm 2: movdqa <0r6=%xmm7,96(<1mysp=%rcx)
+movdqa %xmm7,96(%rcx)
+
+# qhasm: 0ab7 = *(int128 *)(0arg1p + 112)
+# asm 1: movdqa 112(<0arg1p=int64#5),>0ab7=int6464#8
+# asm 2: movdqa 112(<0arg1p=%r8),>0ab7=%xmm7
+movdqa 112(%r8),%xmm7
+
+# qhasm: 0ab7six = 0ab7
+# asm 1: movdqa <0ab7=int6464#8,>0ab7six=int6464#13
+# asm 2: movdqa <0ab7=%xmm7,>0ab7six=%xmm12
+movdqa %xmm7,%xmm12
+
+# qhasm: float6464 0ab7six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0ab7six=int6464#13
+# asm 2: mulpd SIX_SIX,<0ab7six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: 0t7 = 0ab7
+# asm 1: movdqa <0ab7=int6464#8,>0t7=int6464#14
+# asm 2: movdqa <0ab7=%xmm7,>0t7=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 0t7 *= *(int128 *)(op1 + 0)
+# asm 1: mulpd 0(<op1=int64#2),<0t7=int6464#14
+# asm 2: mulpd 0(<op1=%rsi),<0t7=%xmm13
+mulpd 0(%rsi),%xmm13
+
+# qhasm: float6464 0r7 +=0t7
+# asm 1: addpd <0t7=int6464#14,<0r7=int6464#9
+# asm 2: addpd <0t7=%xmm13,<0r7=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: 1t13 = 0ab7
+# asm 1: movdqa <0ab7=int6464#8,>1t13=int6464#8
+# asm 2: movdqa <0ab7=%xmm7,>1t13=%xmm7
+movdqa %xmm7,%xmm7
+
+# qhasm: float6464 1t13 *= *(int128 *)(op1 + 96)
+# asm 1: mulpd 96(<op1=int64#2),<1t13=int6464#8
+# asm 2: mulpd 96(<op1=%rsi),<1t13=%xmm7
+mulpd 96(%rsi),%xmm7
+
+# qhasm: float6464 0t13 +=1t13
+# asm 1: addpd <1t13=int6464#8,<0t13=int6464#3
+# asm 2: addpd <1t13=%xmm7,<0t13=%xmm2
+addpd %xmm7,%xmm2
+
+# qhasm: 0t8 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>0t8=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>0t8=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 0t8 *= *(int128 *)(op1 + 16)
+# asm 1: mulpd 16(<op1=int64#2),<0t8=int6464#8
+# asm 2: mulpd 16(<op1=%rsi),<0t8=%xmm7
+mulpd 16(%rsi),%xmm7
+
+# qhasm: float6464 0r8 +=0t8
+# asm 1: addpd <0t8=int6464#8,<0r8=int6464#10
+# asm 2: addpd <0t8=%xmm7,<0r8=%xmm9
+addpd %xmm7,%xmm9
+
+# qhasm: 0t9 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>0t9=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>0t9=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 0t9 *= *(int128 *)(op1 + 32)
+# asm 1: mulpd 32(<op1=int64#2),<0t9=int6464#8
+# asm 2: mulpd 32(<op1=%rsi),<0t9=%xmm7
+mulpd 32(%rsi),%xmm7
+
+# qhasm: float6464 0r9 +=0t9
+# asm 1: addpd <0t9=int6464#8,<0r9=int6464#11
+# asm 2: addpd <0t9=%xmm7,<0r9=%xmm10
+addpd %xmm7,%xmm10
+
+# qhasm: 0t10 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>0t10=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>0t10=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 0t10 *= *(int128 *)(op1 + 48)
+# asm 1: mulpd 48(<op1=int64#2),<0t10=int6464#8
+# asm 2: mulpd 48(<op1=%rsi),<0t10=%xmm7
+mulpd 48(%rsi),%xmm7
+
+# qhasm: float6464 0r10 +=0t10
+# asm 1: addpd <0t10=int6464#8,<0r10=int6464#12
+# asm 2: addpd <0t10=%xmm7,<0r10=%xmm11
+addpd %xmm7,%xmm11
+
+# qhasm: 0t11 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>0t11=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>0t11=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 0t11 *= *(int128 *)(op1 + 64)
+# asm 1: mulpd 64(<op1=int64#2),<0t11=int6464#8
+# asm 2: mulpd 64(<op1=%rsi),<0t11=%xmm7
+mulpd 64(%rsi),%xmm7
+
+# qhasm: float6464 0r11 +=0t11
+# asm 1: addpd <0t11=int6464#8,<0r11=int6464#1
+# asm 2: addpd <0t11=%xmm7,<0r11=%xmm0
+addpd %xmm7,%xmm0
+
+# qhasm: 1t12 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>1t12=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>1t12=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 1t12 *= *(int128 *)(op1 + 80)
+# asm 1: mulpd 80(<op1=int64#2),<1t12=int6464#8
+# asm 2: mulpd 80(<op1=%rsi),<1t12=%xmm7
+mulpd 80(%rsi),%xmm7
+
+# qhasm: float6464 0t12 +=1t12
+# asm 1: addpd <1t12=int6464#8,<0t12=int6464#2
+# asm 2: addpd <1t12=%xmm7,<0t12=%xmm1
+addpd %xmm7,%xmm1
+
+# qhasm: 1t14 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>1t14=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>1t14=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 1t14 *= *(int128 *)(op1 + 112)
+# asm 1: mulpd 112(<op1=int64#2),<1t14=int6464#8
+# asm 2: mulpd 112(<op1=%rsi),<1t14=%xmm7
+mulpd 112(%rsi),%xmm7
+
+# qhasm: float6464 0t14 +=1t14
+# asm 1: addpd <1t14=int6464#8,<0t14=int6464#4
+# asm 2: addpd <1t14=%xmm7,<0t14=%xmm3
+addpd %xmm7,%xmm3
+
+# qhasm: 1t15 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>1t15=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>1t15=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 1t15 *= *(int128 *)(op1 + 128)
+# asm 1: mulpd 128(<op1=int64#2),<1t15=int6464#8
+# asm 2: mulpd 128(<op1=%rsi),<1t15=%xmm7
+mulpd 128(%rsi),%xmm7
+
+# qhasm: float6464 0t15 +=1t15
+# asm 1: addpd <1t15=int6464#8,<0t15=int6464#5
+# asm 2: addpd <1t15=%xmm7,<0t15=%xmm4
+addpd %xmm7,%xmm4
+
+# qhasm: 1t16 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>1t16=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>1t16=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 1t16 *= *(int128 *)(op1 + 144)
+# asm 1: mulpd 144(<op1=int64#2),<1t16=int6464#8
+# asm 2: mulpd 144(<op1=%rsi),<1t16=%xmm7
+mulpd 144(%rsi),%xmm7
+
+# qhasm: float6464 0t16 +=1t16
+# asm 1: addpd <1t16=int6464#8,<0t16=int6464#6
+# asm 2: addpd <1t16=%xmm7,<0t16=%xmm5
+addpd %xmm7,%xmm5
+
+# qhasm: 1t17 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>1t17=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>1t17=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 1t17 *= *(int128 *)(op1 + 160)
+# asm 1: mulpd 160(<op1=int64#2),<1t17=int6464#8
+# asm 2: mulpd 160(<op1=%rsi),<1t17=%xmm7
+mulpd 160(%rsi),%xmm7
+
+# qhasm: float6464 0t17 +=1t17
+# asm 1: addpd <1t17=int6464#8,<0t17=int6464#7
+# asm 2: addpd <1t17=%xmm7,<0t17=%xmm6
+addpd %xmm7,%xmm6
+
+# qhasm: 1t18 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>1t18=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>1t18=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 1t18 *= *(int128 *)(op1 + 176)
+# asm 1: mulpd 176(<op1=int64#2),<1t18=int6464#8
+# asm 2: mulpd 176(<op1=%rsi),<1t18=%xmm7
+mulpd 176(%rsi),%xmm7
+
+# qhasm: 0t18 =1t18
+# asm 1: movdqa <1t18=int6464#8,>0t18=int6464#8
+# asm 2: movdqa <1t18=%xmm7,>0t18=%xmm7
+movdqa %xmm7,%xmm7
+
+# qhasm: *(int128 *)(1mysp + 112) = 0r7
+# asm 1: movdqa <0r7=int6464#9,112(<1mysp=int64#4)
+# asm 2: movdqa <0r7=%xmm8,112(<1mysp=%rcx)
+movdqa %xmm8,112(%rcx)
+
+# qhasm: 0ab8 = *(int128 *)(0arg1p + 128)
+# asm 1: movdqa 128(<0arg1p=int64#5),>0ab8=int6464#9
+# asm 2: movdqa 128(<0arg1p=%r8),>0ab8=%xmm8
+movdqa 128(%r8),%xmm8
+
+# qhasm: 0ab8six = 0ab8
+# asm 1: movdqa <0ab8=int6464#9,>0ab8six=int6464#13
+# asm 2: movdqa <0ab8=%xmm8,>0ab8six=%xmm12
+movdqa %xmm8,%xmm12
+
+# qhasm: float6464 0ab8six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0ab8six=int6464#13
+# asm 2: mulpd SIX_SIX,<0ab8six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: 0t8 = 0ab8
+# asm 1: movdqa <0ab8=int6464#9,>0t8=int6464#14
+# asm 2: movdqa <0ab8=%xmm8,>0t8=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm: float6464 0t8 *= *(int128 *)(op1 + 0)
+# asm 1: mulpd 0(<op1=int64#2),<0t8=int6464#14
+# asm 2: mulpd 0(<op1=%rsi),<0t8=%xmm13
+mulpd 0(%rsi),%xmm13
+
+# qhasm: float6464 0r8 +=0t8
+# asm 1: addpd <0t8=int6464#14,<0r8=int6464#10
+# asm 2: addpd <0t8=%xmm13,<0r8=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: 1t13 = 0ab8
+# asm 1: movdqa <0ab8=int6464#9,>1t13=int6464#14
+# asm 2: movdqa <0ab8=%xmm8,>1t13=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm: float6464 1t13 *= *(int128 *)(op1 + 80)
+# asm 1: mulpd 80(<op1=int64#2),<1t13=int6464#14
+# asm 2: mulpd 80(<op1=%rsi),<1t13=%xmm13
+mulpd 80(%rsi),%xmm13
+
+# qhasm: float6464 0t13 +=1t13
+# asm 1: addpd <1t13=int6464#14,<0t13=int6464#3
+# asm 2: addpd <1t13=%xmm13,<0t13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: 1t14 = 0ab8
+# asm 1: movdqa <0ab8=int6464#9,>1t14=int6464#14
+# asm 2: movdqa <0ab8=%xmm8,>1t14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm: float6464 1t14 *= *(int128 *)(op1 + 96)
+# asm 1: mulpd 96(<op1=int64#2),<1t14=int6464#14
+# asm 2: mulpd 96(<op1=%rsi),<1t14=%xmm13
+mulpd 96(%rsi),%xmm13
+
+# qhasm: float6464 0t14 +=1t14
+# asm 1: addpd <1t14=int6464#14,<0t14=int6464#4
+# asm 2: addpd <1t14=%xmm13,<0t14=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: 1t19 = 0ab8
+# asm 1: movdqa <0ab8=int6464#9,>1t19=int6464#9
+# asm 2: movdqa <0ab8=%xmm8,>1t19=%xmm8
+movdqa %xmm8,%xmm8
+
+# qhasm: float6464 1t19 *= *(int128 *)(op1 + 176)
+# asm 1: mulpd 176(<op1=int64#2),<1t19=int6464#9
+# asm 2: mulpd 176(<op1=%rsi),<1t19=%xmm8
+mulpd 176(%rsi),%xmm8
+
+# qhasm: 0t19 =1t19
+# asm 1: movdqa <1t19=int6464#9,>0t19=int6464#9
+# asm 2: movdqa <1t19=%xmm8,>0t19=%xmm8
+movdqa %xmm8,%xmm8
+
+# qhasm: 0t9 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>0t9=int6464#14
+# asm 2: movdqa <0ab8six=%xmm12,>0t9=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t9 *= *(int128 *)(op1 + 16)
+# asm 1: mulpd 16(<op1=int64#2),<0t9=int6464#14
+# asm 2: mulpd 16(<op1=%rsi),<0t9=%xmm13
+mulpd 16(%rsi),%xmm13
+
+# qhasm: float6464 0r9 +=0t9
+# asm 1: addpd <0t9=int6464#14,<0r9=int6464#11
+# asm 2: addpd <0t9=%xmm13,<0r9=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: 0t10 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>0t10=int6464#14
+# asm 2: movdqa <0ab8six=%xmm12,>0t10=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t10 *= *(int128 *)(op1 + 32)
+# asm 1: mulpd 32(<op1=int64#2),<0t10=int6464#14
+# asm 2: mulpd 32(<op1=%rsi),<0t10=%xmm13
+mulpd 32(%rsi),%xmm13
+
+# qhasm: float6464 0r10 +=0t10
+# asm 1: addpd <0t10=int6464#14,<0r10=int6464#12
+# asm 2: addpd <0t10=%xmm13,<0r10=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: 0t11 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>0t11=int6464#14
+# asm 2: movdqa <0ab8six=%xmm12,>0t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t11 *= *(int128 *)(op1 + 48)
+# asm 1: mulpd 48(<op1=int64#2),<0t11=int6464#14
+# asm 2: mulpd 48(<op1=%rsi),<0t11=%xmm13
+mulpd 48(%rsi),%xmm13
+
+# qhasm: float6464 0r11 +=0t11
+# asm 1: addpd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addpd <0t11=%xmm13,<0r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: 1t12 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>1t12=int6464#14
+# asm 2: movdqa <0ab8six=%xmm12,>1t12=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 1t12 *= *(int128 *)(op1 + 64)
+# asm 1: mulpd 64(<op1=int64#2),<1t12=int6464#14
+# asm 2: mulpd 64(<op1=%rsi),<1t12=%xmm13
+mulpd 64(%rsi),%xmm13
+
+# qhasm: float6464 0t12 +=1t12
+# asm 1: addpd <1t12=int6464#14,<0t12=int6464#2
+# asm 2: addpd <1t12=%xmm13,<0t12=%xmm1
+addpd %xmm13,%xmm1
+
+# qhasm: 1t15 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>1t15=int6464#14
+# asm 2: movdqa <0ab8six=%xmm12,>1t15=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 1t15 *= *(int128 *)(op1 + 112)
+# asm 1: mulpd 112(<op1=int64#2),<1t15=int6464#14
+# asm 2: mulpd 112(<op1=%rsi),<1t15=%xmm13
+mulpd 112(%rsi),%xmm13
+
+# qhasm: float6464 0t15 +=1t15
+# asm 1: addpd <1t15=int6464#14,<0t15=int6464#5
+# asm 2: addpd <1t15=%xmm13,<0t15=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: 1t16 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>1t16=int6464#14
+# asm 2: movdqa <0ab8six=%xmm12,>1t16=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 1t16 *= *(int128 *)(op1 + 128)
+# asm 1: mulpd 128(<op1=int64#2),<1t16=int6464#14
+# asm 2: mulpd 128(<op1=%rsi),<1t16=%xmm13
+mulpd 128(%rsi),%xmm13
+
+# qhasm: float6464 0t16 +=1t16
+# asm 1: addpd <1t16=int6464#14,<0t16=int6464#6
+# asm 2: addpd <1t16=%xmm13,<0t16=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: 1t17 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>1t17=int6464#14
+# asm 2: movdqa <0ab8six=%xmm12,>1t17=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 1t17 *= *(int128 *)(op1 + 144)
+# asm 1: mulpd 144(<op1=int64#2),<1t17=int6464#14
+# asm 2: mulpd 144(<op1=%rsi),<1t17=%xmm13
+mulpd 144(%rsi),%xmm13
+
+# qhasm: float6464 0t17 +=1t17
+# asm 1: addpd <1t17=int6464#14,<0t17=int6464#7
+# asm 2: addpd <1t17=%xmm13,<0t17=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: 1t18 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>1t18=int6464#13
+# asm 2: movdqa <0ab8six=%xmm12,>1t18=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 1t18 *= *(int128 *)(op1 + 160)
+# asm 1: mulpd 160(<op1=int64#2),<1t18=int6464#13
+# asm 2: mulpd 160(<op1=%rsi),<1t18=%xmm12
+mulpd 160(%rsi),%xmm12
+
+# qhasm: float6464 0t18 +=1t18
+# asm 1: addpd <1t18=int6464#13,<0t18=int6464#8
+# asm 2: addpd <1t18=%xmm12,<0t18=%xmm7
+addpd %xmm12,%xmm7
+
+# qhasm: *(int128 *)(1mysp + 128) = 0r8
+# asm 1: movdqa <0r8=int6464#10,128(<1mysp=int64#4)
+# asm 2: movdqa <0r8=%xmm9,128(<1mysp=%rcx)
+movdqa %xmm9,128(%rcx)
+
+# qhasm: 0ab9 = *(int128 *)(0arg1p + 144)
+# asm 1: movdqa 144(<0arg1p=int64#5),>0ab9=int6464#10
+# asm 2: movdqa 144(<0arg1p=%r8),>0ab9=%xmm9
+movdqa 144(%r8),%xmm9
+
+# qhasm: 0ab9six = 0ab9
+# asm 1: movdqa <0ab9=int6464#10,>0ab9six=int6464#13
+# asm 2: movdqa <0ab9=%xmm9,>0ab9six=%xmm12
+movdqa %xmm9,%xmm12
+
+# qhasm: float6464 0ab9six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0ab9six=int6464#13
+# asm 2: mulpd SIX_SIX,<0ab9six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: 0t9 = 0ab9
+# asm 1: movdqa <0ab9=int6464#10,>0t9=int6464#14
+# asm 2: movdqa <0ab9=%xmm9,>0t9=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 0t9 *= *(int128 *)(op1 + 0)
+# asm 1: mulpd 0(<op1=int64#2),<0t9=int6464#14
+# asm 2: mulpd 0(<op1=%rsi),<0t9=%xmm13
+mulpd 0(%rsi),%xmm13
+
+# qhasm: float6464 0r9 +=0t9
+# asm 1: addpd <0t9=int6464#14,<0r9=int6464#11
+# asm 2: addpd <0t9=%xmm13,<0r9=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: 1t13 = 0ab9
+# asm 1: movdqa <0ab9=int6464#10,>1t13=int6464#14
+# asm 2: movdqa <0ab9=%xmm9,>1t13=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 1t13 *= *(int128 *)(op1 + 64)
+# asm 1: mulpd 64(<op1=int64#2),<1t13=int6464#14
+# asm 2: mulpd 64(<op1=%rsi),<1t13=%xmm13
+mulpd 64(%rsi),%xmm13
+
+# qhasm: float6464 0t13 +=1t13
+# asm 1: addpd <1t13=int6464#14,<0t13=int6464#3
+# asm 2: addpd <1t13=%xmm13,<0t13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: 1t14 = 0ab9
+# asm 1: movdqa <0ab9=int6464#10,>1t14=int6464#14
+# asm 2: movdqa <0ab9=%xmm9,>1t14=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 1t14 *= *(int128 *)(op1 + 80)
+# asm 1: mulpd 80(<op1=int64#2),<1t14=int6464#14
+# asm 2: mulpd 80(<op1=%rsi),<1t14=%xmm13
+mulpd 80(%rsi),%xmm13
+
+# qhasm: float6464 0t14 +=1t14
+# asm 1: addpd <1t14=int6464#14,<0t14=int6464#4
+# asm 2: addpd <1t14=%xmm13,<0t14=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: 1t15 = 0ab9
+# asm 1: movdqa <0ab9=int6464#10,>1t15=int6464#14
+# asm 2: movdqa <0ab9=%xmm9,>1t15=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 1t15 *= *(int128 *)(op1 + 96)
+# asm 1: mulpd 96(<op1=int64#2),<1t15=int6464#14
+# asm 2: mulpd 96(<op1=%rsi),<1t15=%xmm13
+mulpd 96(%rsi),%xmm13
+
+# qhasm: float6464 0t15 +=1t15
+# asm 1: addpd <1t15=int6464#14,<0t15=int6464#5
+# asm 2: addpd <1t15=%xmm13,<0t15=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: 1t19 = 0ab9
+# asm 1: movdqa <0ab9=int6464#10,>1t19=int6464#14
+# asm 2: movdqa <0ab9=%xmm9,>1t19=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 1t19 *= *(int128 *)(op1 + 160)
+# asm 1: mulpd 160(<op1=int64#2),<1t19=int6464#14
+# asm 2: mulpd 160(<op1=%rsi),<1t19=%xmm13
+mulpd 160(%rsi),%xmm13
+
+# qhasm: float6464 0t19 +=1t19
+# asm 1: addpd <1t19=int6464#14,<0t19=int6464#9
+# asm 2: addpd <1t19=%xmm13,<0t19=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: 1t20 = 0ab9
+# asm 1: movdqa <0ab9=int6464#10,>1t20=int6464#10
+# asm 2: movdqa <0ab9=%xmm9,>1t20=%xmm9
+movdqa %xmm9,%xmm9
+
+# qhasm: float6464 1t20 *= *(int128 *)(op1 + 176)
+# asm 1: mulpd 176(<op1=int64#2),<1t20=int6464#10
+# asm 2: mulpd 176(<op1=%rsi),<1t20=%xmm9
+mulpd 176(%rsi),%xmm9
+
+# qhasm: 0t20 =1t20
+# asm 1: movdqa <1t20=int6464#10,>0t20=int6464#10
+# asm 2: movdqa <1t20=%xmm9,>0t20=%xmm9
+movdqa %xmm9,%xmm9
+
+# qhasm: 0t10 = 0ab9six
+# asm 1: movdqa <0ab9six=int6464#13,>0t10=int6464#14
+# asm 2: movdqa <0ab9six=%xmm12,>0t10=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t10 *= *(int128 *)(op1 + 16)
+# asm 1: mulpd 16(<op1=int64#2),<0t10=int6464#14
+# asm 2: mulpd 16(<op1=%rsi),<0t10=%xmm13
+mulpd 16(%rsi),%xmm13
+
+# qhasm: float6464 0r10 +=0t10
+# asm 1: addpd <0t10=int6464#14,<0r10=int6464#12
+# asm 2: addpd <0t10=%xmm13,<0r10=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: 0t11 = 0ab9six
+# asm 1: movdqa <0ab9six=int6464#13,>0t11=int6464#14
+# asm 2: movdqa <0ab9six=%xmm12,>0t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t11 *= *(int128 *)(op1 + 32)
+# asm 1: mulpd 32(<op1=int64#2),<0t11=int6464#14
+# asm 2: mulpd 32(<op1=%rsi),<0t11=%xmm13
+mulpd 32(%rsi),%xmm13
+
+# qhasm: float6464 0r11 +=0t11
+# asm 1: addpd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addpd <0t11=%xmm13,<0r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: 1t12 = 0ab9six
+# asm 1: movdqa <0ab9six=int6464#13,>1t12=int6464#14
+# asm 2: movdqa <0ab9six=%xmm12,>1t12=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 1t12 *= *(int128 *)(op1 + 48)
+# asm 1: mulpd 48(<op1=int64#2),<1t12=int6464#14
+# asm 2: mulpd 48(<op1=%rsi),<1t12=%xmm13
+mulpd 48(%rsi),%xmm13
+
+# qhasm: float6464 0t12 +=1t12
+# asm 1: addpd <1t12=int6464#14,<0t12=int6464#2
+# asm 2: addpd <1t12=%xmm13,<0t12=%xmm1
+addpd %xmm13,%xmm1
+
+# qhasm: 1t16 = 0ab9six
+# asm 1: movdqa <0ab9six=int6464#13,>1t16=int6464#14
+# asm 2: movdqa <0ab9six=%xmm12,>1t16=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 1t16 *= *(int128 *)(op1 + 112)
+# asm 1: mulpd 112(<op1=int64#2),<1t16=int6464#14
+# asm 2: mulpd 112(<op1=%rsi),<1t16=%xmm13
+mulpd 112(%rsi),%xmm13
+
+# qhasm: float6464 0t16 +=1t16
+# asm 1: addpd <1t16=int6464#14,<0t16=int6464#6
+# asm 2: addpd <1t16=%xmm13,<0t16=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: 1t17 = 0ab9six
+# asm 1: movdqa <0ab9six=int6464#13,>1t17=int6464#14
+# asm 2: movdqa <0ab9six=%xmm12,>1t17=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 1t17 *= *(int128 *)(op1 + 128)
+# asm 1: mulpd 128(<op1=int64#2),<1t17=int6464#14
+# asm 2: mulpd 128(<op1=%rsi),<1t17=%xmm13
+mulpd 128(%rsi),%xmm13
+
+# qhasm: float6464 0t17 +=1t17
+# asm 1: addpd <1t17=int6464#14,<0t17=int6464#7
+# asm 2: addpd <1t17=%xmm13,<0t17=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: 1t18 = 0ab9six
+# asm 1: movdqa <0ab9six=int6464#13,>1t18=int6464#13
+# asm 2: movdqa <0ab9six=%xmm12,>1t18=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 1t18 *= *(int128 *)(op1 + 144)
+# asm 1: mulpd 144(<op1=int64#2),<1t18=int6464#13
+# asm 2: mulpd 144(<op1=%rsi),<1t18=%xmm12
+mulpd 144(%rsi),%xmm12
+
+# qhasm: float6464 0t18 +=1t18
+# asm 1: addpd <1t18=int6464#13,<0t18=int6464#8
+# asm 2: addpd <1t18=%xmm12,<0t18=%xmm7
+addpd %xmm12,%xmm7
+
+# qhasm: *(int128 *)(1mysp + 144) = 0r9
+# asm 1: movdqa <0r9=int6464#11,144(<1mysp=int64#4)
+# asm 2: movdqa <0r9=%xmm10,144(<1mysp=%rcx)
+movdqa %xmm10,144(%rcx)
+
+# qhasm: 0ab10 = *(int128 *)(0arg1p + 160)
+# asm 1: movdqa 160(<0arg1p=int64#5),>0ab10=int6464#11
+# asm 2: movdqa 160(<0arg1p=%r8),>0ab10=%xmm10
+movdqa 160(%r8),%xmm10
+
+# qhasm: 0ab10six = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>0ab10six=int6464#13
+# asm 2: movdqa <0ab10=%xmm10,>0ab10six=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm: float6464 0ab10six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0ab10six=int6464#13
+# asm 2: mulpd SIX_SIX,<0ab10six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: 0t10 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>0t10=int6464#14
+# asm 2: movdqa <0ab10=%xmm10,>0t10=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 0t10 *= *(int128 *)(op1 + 0)
+# asm 1: mulpd 0(<op1=int64#2),<0t10=int6464#14
+# asm 2: mulpd 0(<op1=%rsi),<0t10=%xmm13
+mulpd 0(%rsi),%xmm13
+
+# qhasm: float6464 0r10 +=0t10
+# asm 1: addpd <0t10=int6464#14,<0r10=int6464#12
+# asm 2: addpd <0t10=%xmm13,<0r10=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: 1t13 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>1t13=int6464#14
+# asm 2: movdqa <0ab10=%xmm10,>1t13=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 1t13 *= *(int128 *)(op1 + 48)
+# asm 1: mulpd 48(<op1=int64#2),<1t13=int6464#14
+# asm 2: mulpd 48(<op1=%rsi),<1t13=%xmm13
+mulpd 48(%rsi),%xmm13
+
+# qhasm: float6464 0t13 +=1t13
+# asm 1: addpd <1t13=int6464#14,<0t13=int6464#3
+# asm 2: addpd <1t13=%xmm13,<0t13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: 1t14 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>1t14=int6464#14
+# asm 2: movdqa <0ab10=%xmm10,>1t14=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 1t14 *= *(int128 *)(op1 + 64)
+# asm 1: mulpd 64(<op1=int64#2),<1t14=int6464#14
+# asm 2: mulpd 64(<op1=%rsi),<1t14=%xmm13
+mulpd 64(%rsi),%xmm13
+
+# qhasm: float6464 0t14 +=1t14
+# asm 1: addpd <1t14=int6464#14,<0t14=int6464#4
+# asm 2: addpd <1t14=%xmm13,<0t14=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: 1t16 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>1t16=int6464#14
+# asm 2: movdqa <0ab10=%xmm10,>1t16=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 1t16 *= *(int128 *)(op1 + 96)
+# asm 1: mulpd 96(<op1=int64#2),<1t16=int6464#14
+# asm 2: mulpd 96(<op1=%rsi),<1t16=%xmm13
+mulpd 96(%rsi),%xmm13
+
+# qhasm: float6464 0t16 +=1t16
+# asm 1: addpd <1t16=int6464#14,<0t16=int6464#6
+# asm 2: addpd <1t16=%xmm13,<0t16=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: 1t15 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>1t15=int6464#14
+# asm 2: movdqa <0ab10=%xmm10,>1t15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 1t15 *= *(int128 *)(op1 + 80)
+# asm 1: mulpd 80(<op1=int64#2),<1t15=int6464#14
+# asm 2: mulpd 80(<op1=%rsi),<1t15=%xmm13
+mulpd 80(%rsi),%xmm13
+
+# qhasm: float6464 0t15 +=1t15
+# asm 1: addpd <1t15=int6464#14,<0t15=int6464#5
+# asm 2: addpd <1t15=%xmm13,<0t15=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: 1t19 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>1t19=int6464#14
+# asm 2: movdqa <0ab10=%xmm10,>1t19=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 1t19 *= *(int128 *)(op1 + 144)
+# asm 1: mulpd 144(<op1=int64#2),<1t19=int6464#14
+# asm 2: mulpd 144(<op1=%rsi),<1t19=%xmm13
+mulpd 144(%rsi),%xmm13
+
+# qhasm: float6464 0t19 +=1t19
+# asm 1: addpd <1t19=int6464#14,<0t19=int6464#9
+# asm 2: addpd <1t19=%xmm13,<0t19=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: 1t20 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>1t20=int6464#14
+# asm 2: movdqa <0ab10=%xmm10,>1t20=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 1t20 *= *(int128 *)(op1 + 160)
+# asm 1: mulpd 160(<op1=int64#2),<1t20=int6464#14
+# asm 2: mulpd 160(<op1=%rsi),<1t20=%xmm13
+mulpd 160(%rsi),%xmm13
+
+# qhasm: float6464 0t20 +=1t20
+# asm 1: addpd <1t20=int6464#14,<0t20=int6464#10
+# asm 2: addpd <1t20=%xmm13,<0t20=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: 1t21 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>1t21=int6464#11
+# asm 2: movdqa <0ab10=%xmm10,>1t21=%xmm10
+movdqa %xmm10,%xmm10
+
+# qhasm: float6464 1t21 *= *(int128 *)(op1 + 176)
+# asm 1: mulpd 176(<op1=int64#2),<1t21=int6464#11
+# asm 2: mulpd 176(<op1=%rsi),<1t21=%xmm10
+mulpd 176(%rsi),%xmm10
+
+# qhasm: 0t21 =1t21
+# asm 1: movdqa <1t21=int6464#11,>0t21=int6464#11
+# asm 2: movdqa <1t21=%xmm10,>0t21=%xmm10
+movdqa %xmm10,%xmm10
+
+# qhasm: 0t11 = 0ab10six
+# asm 1: movdqa <0ab10six=int6464#13,>0t11=int6464#14
+# asm 2: movdqa <0ab10six=%xmm12,>0t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t11 *= *(int128 *)(op1 + 16)
+# asm 1: mulpd 16(<op1=int64#2),<0t11=int6464#14
+# asm 2: mulpd 16(<op1=%rsi),<0t11=%xmm13
+mulpd 16(%rsi),%xmm13
+
+# qhasm: float6464 0r11 +=0t11
+# asm 1: addpd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addpd <0t11=%xmm13,<0r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: 1t12 = 0ab10six
+# asm 1: movdqa <0ab10six=int6464#13,>1t12=int6464#14
+# asm 2: movdqa <0ab10six=%xmm12,>1t12=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 1t12 *= *(int128 *)(op1 + 32)
+# asm 1: mulpd 32(<op1=int64#2),<1t12=int6464#14
+# asm 2: mulpd 32(<op1=%rsi),<1t12=%xmm13
+mulpd 32(%rsi),%xmm13
+
+# qhasm: float6464 0t12 +=1t12
+# asm 1: addpd <1t12=int6464#14,<0t12=int6464#2
+# asm 2: addpd <1t12=%xmm13,<0t12=%xmm1
+addpd %xmm13,%xmm1
+
+# qhasm: 1t17 = 0ab10six
+# asm 1: movdqa <0ab10six=int6464#13,>1t17=int6464#14
+# asm 2: movdqa <0ab10six=%xmm12,>1t17=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 1t17 *= *(int128 *)(op1 + 112)
+# asm 1: mulpd 112(<op1=int64#2),<1t17=int6464#14
+# asm 2: mulpd 112(<op1=%rsi),<1t17=%xmm13
+mulpd 112(%rsi),%xmm13
+
+# qhasm: float6464 0t17 +=1t17
+# asm 1: addpd <1t17=int6464#14,<0t17=int6464#7
+# asm 2: addpd <1t17=%xmm13,<0t17=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: 1t18 = 0ab10six
+# asm 1: movdqa <0ab10six=int6464#13,>1t18=int6464#13
+# asm 2: movdqa <0ab10six=%xmm12,>1t18=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 1t18 *= *(int128 *)(op1 + 128)
+# asm 1: mulpd 128(<op1=int64#2),<1t18=int6464#13
+# asm 2: mulpd 128(<op1=%rsi),<1t18=%xmm12
+mulpd 128(%rsi),%xmm12
+
+# qhasm: float6464 0t18 +=1t18
+# asm 1: addpd <1t18=int6464#13,<0t18=int6464#8
+# asm 2: addpd <1t18=%xmm12,<0t18=%xmm7
+addpd %xmm12,%xmm7
+
+# qhasm: *(int128 *)(1mysp + 160) = 0r10
+# asm 1: movdqa <0r10=int6464#12,160(<1mysp=int64#4)
+# asm 2: movdqa <0r10=%xmm11,160(<1mysp=%rcx)
+movdqa %xmm11,160(%rcx)
+
+# qhasm: 0ab11 = *(int128 *)(0arg1p + 176)
+# asm 1: movdqa 176(<0arg1p=int64#5),>0ab11=int6464#12
+# asm 2: movdqa 176(<0arg1p=%r8),>0ab11=%xmm11
+movdqa 176(%r8),%xmm11
+
+# qhasm: 0ab11six = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>0ab11six=int6464#13
+# asm 2: movdqa <0ab11=%xmm11,>0ab11six=%xmm12
+movdqa %xmm11,%xmm12
+
+# qhasm: float6464 0ab11six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0ab11six=int6464#13
+# asm 2: mulpd SIX_SIX,<0ab11six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: 0t11 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>0t11=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>0t11=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 0t11 *= *(int128 *)(op1 + 0)
+# asm 1: mulpd 0(<op1=int64#2),<0t11=int6464#14
+# asm 2: mulpd 0(<op1=%rsi),<0t11=%xmm13
+mulpd 0(%rsi),%xmm13
+
+# qhasm: float6464 0r11 +=0t11
+# asm 1: addpd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addpd <0t11=%xmm13,<0r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: 1t13 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>1t13=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>1t13=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 1t13 *= *(int128 *)(op1 + 32)
+# asm 1: mulpd 32(<op1=int64#2),<1t13=int6464#14
+# asm 2: mulpd 32(<op1=%rsi),<1t13=%xmm13
+mulpd 32(%rsi),%xmm13
+
+# qhasm: float6464 0t13 +=1t13
+# asm 1: addpd <1t13=int6464#14,<0t13=int6464#3
+# asm 2: addpd <1t13=%xmm13,<0t13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: 1t14 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>1t14=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>1t14=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 1t14 *= *(int128 *)(op1 + 48)
+# asm 1: mulpd 48(<op1=int64#2),<1t14=int6464#14
+# asm 2: mulpd 48(<op1=%rsi),<1t14=%xmm13
+mulpd 48(%rsi),%xmm13
+
+# qhasm: float6464 0t14 +=1t14
+# asm 1: addpd <1t14=int6464#14,<0t14=int6464#4
+# asm 2: addpd <1t14=%xmm13,<0t14=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: 1t15 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>1t15=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>1t15=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 1t15 *= *(int128 *)(op1 + 64)
+# asm 1: mulpd 64(<op1=int64#2),<1t15=int6464#14
+# asm 2: mulpd 64(<op1=%rsi),<1t15=%xmm13
+mulpd 64(%rsi),%xmm13
+
+# qhasm: float6464 0t15 +=1t15
+# asm 1: addpd <1t15=int6464#14,<0t15=int6464#5
+# asm 2: addpd <1t15=%xmm13,<0t15=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: 1t16 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>1t16=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>1t16=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 1t16 *= *(int128 *)(op1 + 80)
+# asm 1: mulpd 80(<op1=int64#2),<1t16=int6464#14
+# asm 2: mulpd 80(<op1=%rsi),<1t16=%xmm13
+mulpd 80(%rsi),%xmm13
+
+# qhasm: float6464 0t16 +=1t16
+# asm 1: addpd <1t16=int6464#14,<0t16=int6464#6
+# asm 2: addpd <1t16=%xmm13,<0t16=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: 1t17 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>1t17=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>1t17=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 1t17 *= *(int128 *)(op1 + 96)
+# asm 1: mulpd 96(<op1=int64#2),<1t17=int6464#14
+# asm 2: mulpd 96(<op1=%rsi),<1t17=%xmm13
+mulpd 96(%rsi),%xmm13
+
+# qhasm: float6464 0t17 +=1t17
+# asm 1: addpd <1t17=int6464#14,<0t17=int6464#7
+# asm 2: addpd <1t17=%xmm13,<0t17=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: 1t19 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>1t19=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>1t19=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 1t19 *= *(int128 *)(op1 + 128)
+# asm 1: mulpd 128(<op1=int64#2),<1t19=int6464#14
+# asm 2: mulpd 128(<op1=%rsi),<1t19=%xmm13
+mulpd 128(%rsi),%xmm13
+
+# qhasm: float6464 0t19 +=1t19
+# asm 1: addpd <1t19=int6464#14,<0t19=int6464#9
+# asm 2: addpd <1t19=%xmm13,<0t19=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: 1t20 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>1t20=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>1t20=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 1t20 *= *(int128 *)(op1 + 144)
+# asm 1: mulpd 144(<op1=int64#2),<1t20=int6464#14
+# asm 2: mulpd 144(<op1=%rsi),<1t20=%xmm13
+mulpd 144(%rsi),%xmm13
+
+# qhasm: float6464 0t20 +=1t20
+# asm 1: addpd <1t20=int6464#14,<0t20=int6464#10
+# asm 2: addpd <1t20=%xmm13,<0t20=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: 1t21 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>1t21=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>1t21=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 1t21 *= *(int128 *)(op1 + 160)
+# asm 1: mulpd 160(<op1=int64#2),<1t21=int6464#14
+# asm 2: mulpd 160(<op1=%rsi),<1t21=%xmm13
+mulpd 160(%rsi),%xmm13
+
+# qhasm: float6464 0t21 +=1t21
+# asm 1: addpd <1t21=int6464#14,<0t21=int6464#11
+# asm 2: addpd <1t21=%xmm13,<0t21=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: 1t22 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>1t22=int6464#12
+# asm 2: movdqa <0ab11=%xmm11,>1t22=%xmm11
+movdqa %xmm11,%xmm11
+
+# qhasm: float6464 1t22 *= *(int128 *)(op1 + 176)
+# asm 1: mulpd 176(<op1=int64#2),<1t22=int6464#12
+# asm 2: mulpd 176(<op1=%rsi),<1t22=%xmm11
+mulpd 176(%rsi),%xmm11
+
+# qhasm: 0t22 =1t22
+# asm 1: movdqa <1t22=int6464#12,>0t22=int6464#12
+# asm 2: movdqa <1t22=%xmm11,>0t22=%xmm11
+movdqa %xmm11,%xmm11
+
+# qhasm: 1t12 = 0ab11six
+# asm 1: movdqa <0ab11six=int6464#13,>1t12=int6464#14
+# asm 2: movdqa <0ab11six=%xmm12,>1t12=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 1t12 *= *(int128 *)(op1 + 16)
+# asm 1: mulpd 16(<op1=int64#2),<1t12=int6464#14
+# asm 2: mulpd 16(<op1=%rsi),<1t12=%xmm13
+mulpd 16(%rsi),%xmm13
+
+# qhasm: float6464 0t12 +=1t12
+# asm 1: addpd <1t12=int6464#14,<0t12=int6464#2
+# asm 2: addpd <1t12=%xmm13,<0t12=%xmm1
+addpd %xmm13,%xmm1
+
+# qhasm: 1t18 = 0ab11six
+# asm 1: movdqa <0ab11six=int6464#13,>1t18=int6464#13
+# asm 2: movdqa <0ab11six=%xmm12,>1t18=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 1t18 *= *(int128 *)(op1 + 112)
+# asm 1: mulpd 112(<op1=int64#2),<1t18=int6464#13
+# asm 2: mulpd 112(<op1=%rsi),<1t18=%xmm12
+mulpd 112(%rsi),%xmm12
+
+# qhasm: float6464 0t18 +=1t18
+# asm 1: addpd <1t18=int6464#13,<0t18=int6464#8
+# asm 2: addpd <1t18=%xmm12,<0t18=%xmm7
+addpd %xmm12,%xmm7
+
+# qhasm: *(int128 *)(1mysp + 176) = 0r11
+# asm 1: movdqa <0r11=int6464#1,176(<1mysp=int64#4)
+# asm 2: movdqa <0r11=%xmm0,176(<1mysp=%rcx)
+movdqa %xmm0,176(%rcx)
+
+# qhasm: int6464 1r0
+
+# qhasm: int6464 1r1
+
+# qhasm: int6464 1r2
+
+# qhasm: int6464 1r3
+
+# qhasm: int6464 1r4
+
+# qhasm: int6464 1r5
+
+# qhasm: int6464 1r6
+
+# qhasm: int6464 1r7
+
+# qhasm: int6464 1r8
+
+# qhasm: int6464 1r9
+
+# qhasm: int6464 1r10
+
+# qhasm: int6464 1r11
+
+# qhasm: int6464 1t0
+
+# qhasm: int6464 1t1
+
+# qhasm: int6464 1t2
+
+# qhasm: int6464 1t3
+
+# qhasm: int6464 1t4
+
+# qhasm: int6464 1t5
+
+# qhasm: int6464 1t6
+
+# qhasm: int6464 1t7
+
+# qhasm: int6464 1t8
+
+# qhasm: int6464 1t9
+
+# qhasm: int6464 1t10
+
+# qhasm: int6464 1t11
+
+# qhasm: int6464 2t12
+
+# qhasm: int6464 2t13
+
+# qhasm: int6464 2t14
+
+# qhasm: int6464 2t15
+
+# qhasm: int6464 2t16
+
+# qhasm: int6464 2t17
+
+# qhasm: int6464 2t18
+
+# qhasm: int6464 2t19
+
+# qhasm: int6464 2t20
+
+# qhasm: int6464 2t21
+
+# qhasm: int6464 2t22
+
+# qhasm: 1r0 = *(int128 *)(1mysp + 0)
+# asm 1: movdqa 0(<1mysp=int64#4),>1r0=int6464#1
+# asm 2: movdqa 0(<1mysp=%rcx),>1r0=%xmm0
+movdqa 0(%rcx),%xmm0
+
+# qhasm: float6464 1r0 -= 0t12
+# asm 1: subpd <0t12=int6464#2,<1r0=int6464#1
+# asm 2: subpd <0t12=%xmm1,<1r0=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 2t15 = 0t15
+# asm 1: movdqa <0t15=int6464#5,>2t15=int6464#13
+# asm 2: movdqa <0t15=%xmm4,>2t15=%xmm12
+movdqa %xmm4,%xmm12
+
+# qhasm: float6464 2t15 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<2t15=int6464#13
+# asm 2: mulpd SIX_SIX,<2t15=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: float6464 1r0 += 2t15
+# asm 1: addpd <2t15=int6464#13,<1r0=int6464#1
+# asm 2: addpd <2t15=%xmm12,<1r0=%xmm0
+addpd %xmm12,%xmm0
+
+# qhasm: 2t18 = 0t18
+# asm 1: movdqa <0t18=int6464#8,>2t18=int6464#13
+# asm 2: movdqa <0t18=%xmm7,>2t18=%xmm12
+movdqa %xmm7,%xmm12
+
+# qhasm: float6464 2t18 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<2t18=int6464#13
+# asm 2: mulpd TWO_TWO,<2t18=%xmm12
+mulpd TWO_TWO,%xmm12
+
+# qhasm: float6464 1r0 -= 2t18
+# asm 1: subpd <2t18=int6464#13,<1r0=int6464#1
+# asm 2: subpd <2t18=%xmm12,<1r0=%xmm0
+subpd %xmm12,%xmm0
+
+# qhasm: 2t21 = 0t21
+# asm 1: movdqa <0t21=int6464#11,>2t21=int6464#13
+# asm 2: movdqa <0t21=%xmm10,>2t21=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm: float6464 2t21 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<2t21=int6464#13
+# asm 2: mulpd SIX_SIX,<2t21=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: float6464 1r0 -= 2t21
+# asm 1: subpd <2t21=int6464#13,<1r0=int6464#1
+# asm 2: subpd <2t21=%xmm12,<1r0=%xmm0
+subpd %xmm12,%xmm0
+
+# qhasm: *(int128 *)(1mysp + 0) = 1r0
+# asm 1: movdqa <1r0=int6464#1,0(<1mysp=int64#4)
+# asm 2: movdqa <1r0=%xmm0,0(<1mysp=%rcx)
+movdqa %xmm0,0(%rcx)
+
+# qhasm: 1r3 = *(int128 *)(1mysp + 48)
+# asm 1: movdqa 48(<1mysp=int64#4),>1r3=int6464#1
+# asm 2: movdqa 48(<1mysp=%rcx),>1r3=%xmm0
+movdqa 48(%rcx),%xmm0
+
+# qhasm: float6464 1r3 -= 0t12
+# asm 1: subpd <0t12=int6464#2,<1r3=int6464#1
+# asm 2: subpd <0t12=%xmm1,<1r3=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 2t15 = 0t15
+# asm 1: movdqa <0t15=int6464#5,>2t15=int6464#13
+# asm 2: movdqa <0t15=%xmm4,>2t15=%xmm12
+movdqa %xmm4,%xmm12
+
+# qhasm: float6464 2t15 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<2t15=int6464#13
+# asm 2: mulpd FIVE_FIVE,<2t15=%xmm12
+mulpd FIVE_FIVE,%xmm12
+
+# qhasm: float6464 1r3 += 2t15
+# asm 1: addpd <2t15=int6464#13,<1r3=int6464#1
+# asm 2: addpd <2t15=%xmm12,<1r3=%xmm0
+addpd %xmm12,%xmm0
+
+# qhasm: float6464 1r3 -= 0t18
+# asm 1: subpd <0t18=int6464#8,<1r3=int6464#1
+# asm 2: subpd <0t18=%xmm7,<1r3=%xmm0
+subpd %xmm7,%xmm0
+
+# qhasm: 2t21 = 0t21
+# asm 1: movdqa <0t21=int6464#11,>2t21=int6464#13
+# asm 2: movdqa <0t21=%xmm10,>2t21=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm: float6464 2t21 *= EIGHT_EIGHT
+# asm 1: mulpd EIGHT_EIGHT,<2t21=int6464#13
+# asm 2: mulpd EIGHT_EIGHT,<2t21=%xmm12
+mulpd EIGHT_EIGHT,%xmm12
+
+# qhasm: float6464 1r3 -= 2t21
+# asm 1: subpd <2t21=int6464#13,<1r3=int6464#1
+# asm 2: subpd <2t21=%xmm12,<1r3=%xmm0
+subpd %xmm12,%xmm0
+
+# qhasm: *(int128 *)(1mysp + 48) = 1r3
+# asm 1: movdqa <1r3=int6464#1,48(<1mysp=int64#4)
+# asm 2: movdqa <1r3=%xmm0,48(<1mysp=%rcx)
+movdqa %xmm0,48(%rcx)
+
+# qhasm: 1r6 = *(int128 *)(1mysp + 96)
+# asm 1: movdqa 96(<1mysp=int64#4),>1r6=int6464#1
+# asm 2: movdqa 96(<1mysp=%rcx),>1r6=%xmm0
+movdqa 96(%rcx),%xmm0
+
+# qhasm: 2t12 = 0t12
+# asm 1: movdqa <0t12=int6464#2,>2t12=int6464#13
+# asm 2: movdqa <0t12=%xmm1,>2t12=%xmm12
+movdqa %xmm1,%xmm12
+
+# qhasm: float6464 2t12 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<2t12=int6464#13
+# asm 2: mulpd FOUR_FOUR,<2t12=%xmm12
+mulpd FOUR_FOUR,%xmm12
+
+# qhasm: float6464 1r6 -= 2t12
+# asm 1: subpd <2t12=int6464#13,<1r6=int6464#1
+# asm 2: subpd <2t12=%xmm12,<1r6=%xmm0
+subpd %xmm12,%xmm0
+
+# qhasm: 2t15 = 0t15
+# asm 1: movdqa <0t15=int6464#5,>2t15=int6464#13
+# asm 2: movdqa <0t15=%xmm4,>2t15=%xmm12
+movdqa %xmm4,%xmm12
+
+# qhasm: float6464 2t15 *= EIGHTEEN_EIGHTEEN
+# asm 1: mulpd EIGHTEEN_EIGHTEEN,<2t15=int6464#13
+# asm 2: mulpd EIGHTEEN_EIGHTEEN,<2t15=%xmm12
+mulpd EIGHTEEN_EIGHTEEN,%xmm12
+
+# qhasm: float6464 1r6 += 2t15
+# asm 1: addpd <2t15=int6464#13,<1r6=int6464#1
+# asm 2: addpd <2t15=%xmm12,<1r6=%xmm0
+addpd %xmm12,%xmm0
+
+# qhasm: 2t18 = 0t18
+# asm 1: movdqa <0t18=int6464#8,>2t18=int6464#13
+# asm 2: movdqa <0t18=%xmm7,>2t18=%xmm12
+movdqa %xmm7,%xmm12
+
+# qhasm: float6464 2t18 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<2t18=int6464#13
+# asm 2: mulpd THREE_THREE,<2t18=%xmm12
+mulpd THREE_THREE,%xmm12
+
+# qhasm: float6464 1r6 -= 2t18
+# asm 1: subpd <2t18=int6464#13,<1r6=int6464#1
+# asm 2: subpd <2t18=%xmm12,<1r6=%xmm0
+subpd %xmm12,%xmm0
+
+# qhasm: 2t21 = 0t21
+# asm 1: movdqa <0t21=int6464#11,>2t21=int6464#13
+# asm 2: movdqa <0t21=%xmm10,>2t21=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm: float6464 2t21 *= THIRTY_THIRTY
+# asm 1: mulpd THIRTY_THIRTY,<2t21=int6464#13
+# asm 2: mulpd THIRTY_THIRTY,<2t21=%xmm12
+mulpd THIRTY_THIRTY,%xmm12
+
+# qhasm: float6464 1r6 -= 2t21
+# asm 1: subpd <2t21=int6464#13,<1r6=int6464#1
+# asm 2: subpd <2t21=%xmm12,<1r6=%xmm0
+subpd %xmm12,%xmm0
+
+# qhasm: *(int128 *)(1mysp + 96) = 1r6
+# asm 1: movdqa <1r6=int6464#1,96(<1mysp=int64#4)
+# asm 2: movdqa <1r6=%xmm0,96(<1mysp=%rcx)
+movdqa %xmm0,96(%rcx)
+
+# qhasm: 1r9 = *(int128 *)(1mysp + 144)
+# asm 1: movdqa 144(<1mysp=int64#4),>1r9=int6464#1
+# asm 2: movdqa 144(<1mysp=%rcx),>1r9=%xmm0
+movdqa 144(%rcx),%xmm0
+
+# qhasm: float6464 1r9 -= 0t12
+# asm 1: subpd <0t12=int6464#2,<1r9=int6464#1
+# asm 2: subpd <0t12=%xmm1,<1r9=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 2t15 = 0t15
+# asm 1: movdqa <0t15=int6464#5,>2t15=int6464#2
+# asm 2: movdqa <0t15=%xmm4,>2t15=%xmm1
+movdqa %xmm4,%xmm1
+
+# qhasm: float6464 2t15 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<2t15=int6464#2
+# asm 2: mulpd TWO_TWO,<2t15=%xmm1
+mulpd TWO_TWO,%xmm1
+
+# qhasm: float6464 1r9 += 2t15
+# asm 1: addpd <2t15=int6464#2,<1r9=int6464#1
+# asm 2: addpd <2t15=%xmm1,<1r9=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: float6464 1r9 += 0t18
+# asm 1: addpd <0t18=int6464#8,<1r9=int6464#1
+# asm 2: addpd <0t18=%xmm7,<1r9=%xmm0
+addpd %xmm7,%xmm0
+
+# qhasm: 2t21 = 0t21
+# asm 1: movdqa <0t21=int6464#11,>2t21=int6464#2
+# asm 2: movdqa <0t21=%xmm10,>2t21=%xmm1
+movdqa %xmm10,%xmm1
+
+# qhasm: float6464 2t21 *= NINE_NINE
+# asm 1: mulpd NINE_NINE,<2t21=int6464#2
+# asm 2: mulpd NINE_NINE,<2t21=%xmm1
+mulpd NINE_NINE,%xmm1
+
+# qhasm: float6464 1r9 -= 2t21
+# asm 1: subpd <2t21=int6464#2,<1r9=int6464#1
+# asm 2: subpd <2t21=%xmm1,<1r9=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(1mysp + 144) = 1r9
+# asm 1: movdqa <1r9=int6464#1,144(<1mysp=int64#4)
+# asm 2: movdqa <1r9=%xmm0,144(<1mysp=%rcx)
+movdqa %xmm0,144(%rcx)
+
+# qhasm: 1r1 = *(int128 *)(1mysp + 16)
+# asm 1: movdqa 16(<1mysp=int64#4),>1r1=int6464#1
+# asm 2: movdqa 16(<1mysp=%rcx),>1r1=%xmm0
+movdqa 16(%rcx),%xmm0
+
+# qhasm: float6464 1r1 -= 0t13
+# asm 1: subpd <0t13=int6464#3,<1r1=int6464#1
+# asm 2: subpd <0t13=%xmm2,<1r1=%xmm0
+subpd %xmm2,%xmm0
+
+# qhasm: float6464 1r1 += 0t16
+# asm 1: addpd <0t16=int6464#6,<1r1=int6464#1
+# asm 2: addpd <0t16=%xmm5,<1r1=%xmm0
+addpd %xmm5,%xmm0
+
+# qhasm: 2t19 = 0t19
+# asm 1: movdqa <0t19=int6464#9,>2t19=int6464#2
+# asm 2: movdqa <0t19=%xmm8,>2t19=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: float6464 2t19 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<2t19=int6464#2
+# asm 2: mulpd TWO_TWO,<2t19=%xmm1
+mulpd TWO_TWO,%xmm1
+
+# qhasm: float6464 1r1 -= 2t19
+# asm 1: subpd <2t19=int6464#2,<1r1=int6464#1
+# asm 2: subpd <2t19=%xmm1,<1r1=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: float6464 1r1 -= 0t22
+# asm 1: subpd <0t22=int6464#12,<1r1=int6464#1
+# asm 2: subpd <0t22=%xmm11,<1r1=%xmm0
+subpd %xmm11,%xmm0
+
+# qhasm: *(int128 *)(1mysp + 16) = 1r1
+# asm 1: movdqa <1r1=int6464#1,16(<1mysp=int64#4)
+# asm 2: movdqa <1r1=%xmm0,16(<1mysp=%rcx)
+movdqa %xmm0,16(%rcx)
+
+# qhasm: 1r4 = *(int128 *)(1mysp + 64)
+# asm 1: movdqa 64(<1mysp=int64#4),>1r4=int6464#1
+# asm 2: movdqa 64(<1mysp=%rcx),>1r4=%xmm0
+movdqa 64(%rcx),%xmm0
+
+# qhasm: 2t13 = 0t13
+# asm 1: movdqa <0t13=int6464#3,>2t13=int6464#2
+# asm 2: movdqa <0t13=%xmm2,>2t13=%xmm1
+movdqa %xmm2,%xmm1
+
+# qhasm: float6464 2t13 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<2t13=int6464#2
+# asm 2: mulpd SIX_SIX,<2t13=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 1r4 -= 2t13
+# asm 1: subpd <2t13=int6464#2,<1r4=int6464#1
+# asm 2: subpd <2t13=%xmm1,<1r4=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 2t16 = 0t16
+# asm 1: movdqa <0t16=int6464#6,>2t16=int6464#2
+# asm 2: movdqa <0t16=%xmm5,>2t16=%xmm1
+movdqa %xmm5,%xmm1
+
+# qhasm: float6464 2t16 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<2t16=int6464#2
+# asm 2: mulpd FIVE_FIVE,<2t16=%xmm1
+mulpd FIVE_FIVE,%xmm1
+
+# qhasm: float6464 1r4 += 2t16
+# asm 1: addpd <2t16=int6464#2,<1r4=int6464#1
+# asm 2: addpd <2t16=%xmm1,<1r4=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 2t19 = 0t19
+# asm 1: movdqa <0t19=int6464#9,>2t19=int6464#2
+# asm 2: movdqa <0t19=%xmm8,>2t19=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: float6464 2t19 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<2t19=int6464#2
+# asm 2: mulpd SIX_SIX,<2t19=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 1r4 -= 2t19
+# asm 1: subpd <2t19=int6464#2,<1r4=int6464#1
+# asm 2: subpd <2t19=%xmm1,<1r4=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 2t22 = 0t22
+# asm 1: movdqa <0t22=int6464#12,>2t22=int6464#2
+# asm 2: movdqa <0t22=%xmm11,>2t22=%xmm1
+movdqa %xmm11,%xmm1
+
+# qhasm: float6464 2t22 *= EIGHT_EIGHT
+# asm 1: mulpd EIGHT_EIGHT,<2t22=int6464#2
+# asm 2: mulpd EIGHT_EIGHT,<2t22=%xmm1
+mulpd EIGHT_EIGHT,%xmm1
+
+# qhasm: float6464 1r4 -= 2t22
+# asm 1: subpd <2t22=int6464#2,<1r4=int6464#1
+# asm 2: subpd <2t22=%xmm1,<1r4=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(1mysp + 64) = 1r4
+# asm 1: movdqa <1r4=int6464#1,64(<1mysp=int64#4)
+# asm 2: movdqa <1r4=%xmm0,64(<1mysp=%rcx)
+movdqa %xmm0,64(%rcx)
+
+# qhasm: 1r7 = *(int128 *)(1mysp + 112)
+# asm 1: movdqa 112(<1mysp=int64#4),>1r7=int6464#1
+# asm 2: movdqa 112(<1mysp=%rcx),>1r7=%xmm0
+movdqa 112(%rcx),%xmm0
+
+# qhasm: 2t13 = 0t13
+# asm 1: movdqa <0t13=int6464#3,>2t13=int6464#2
+# asm 2: movdqa <0t13=%xmm2,>2t13=%xmm1
+movdqa %xmm2,%xmm1
+
+# qhasm: float6464 2t13 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<2t13=int6464#2
+# asm 2: mulpd FOUR_FOUR,<2t13=%xmm1
+mulpd FOUR_FOUR,%xmm1
+
+# qhasm: float6464 1r7 -= 2t13
+# asm 1: subpd <2t13=int6464#2,<1r7=int6464#1
+# asm 2: subpd <2t13=%xmm1,<1r7=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 2t16 = 0t16
+# asm 1: movdqa <0t16=int6464#6,>2t16=int6464#2
+# asm 2: movdqa <0t16=%xmm5,>2t16=%xmm1
+movdqa %xmm5,%xmm1
+
+# qhasm: float6464 2t16 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<2t16=int6464#2
+# asm 2: mulpd THREE_THREE,<2t16=%xmm1
+mulpd THREE_THREE,%xmm1
+
+# qhasm: float6464 1r7 += 2t16
+# asm 1: addpd <2t16=int6464#2,<1r7=int6464#1
+# asm 2: addpd <2t16=%xmm1,<1r7=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 2t19 = 0t19
+# asm 1: movdqa <0t19=int6464#9,>2t19=int6464#2
+# asm 2: movdqa <0t19=%xmm8,>2t19=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: float6464 2t19 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<2t19=int6464#2
+# asm 2: mulpd THREE_THREE,<2t19=%xmm1
+mulpd THREE_THREE,%xmm1
+
+# qhasm: float6464 1r7 -= 2t19
+# asm 1: subpd <2t19=int6464#2,<1r7=int6464#1
+# asm 2: subpd <2t19=%xmm1,<1r7=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 2t22 = 0t22
+# asm 1: movdqa <0t22=int6464#12,>2t22=int6464#2
+# asm 2: movdqa <0t22=%xmm11,>2t22=%xmm1
+movdqa %xmm11,%xmm1
+
+# qhasm: float6464 2t22 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<2t22=int6464#2
+# asm 2: mulpd FIVE_FIVE,<2t22=%xmm1
+mulpd FIVE_FIVE,%xmm1
+
+# qhasm: float6464 1r7 -= 2t22
+# asm 1: subpd <2t22=int6464#2,<1r7=int6464#1
+# asm 2: subpd <2t22=%xmm1,<1r7=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(1mysp + 112) = 1r7
+# asm 1: movdqa <1r7=int6464#1,112(<1mysp=int64#4)
+# asm 2: movdqa <1r7=%xmm0,112(<1mysp=%rcx)
+movdqa %xmm0,112(%rcx)
+
+# qhasm: 1r10 = *(int128 *)(1mysp + 160)
+# asm 1: movdqa 160(<1mysp=int64#4),>1r10=int6464#1
+# asm 2: movdqa 160(<1mysp=%rcx),>1r10=%xmm0
+movdqa 160(%rcx),%xmm0
+
+# qhasm: 2t13 = 0t13
+# asm 1: movdqa <0t13=int6464#3,>2t13=int6464#2
+# asm 2: movdqa <0t13=%xmm2,>2t13=%xmm1
+movdqa %xmm2,%xmm1
+
+# qhasm: float6464 2t13 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<2t13=int6464#2
+# asm 2: mulpd SIX_SIX,<2t13=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 1r10 -= 2t13
+# asm 1: subpd <2t13=int6464#2,<1r10=int6464#1
+# asm 2: subpd <2t13=%xmm1,<1r10=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 2t16 = 0t16
+# asm 1: movdqa <0t16=int6464#6,>2t16=int6464#2
+# asm 2: movdqa <0t16=%xmm5,>2t16=%xmm1
+movdqa %xmm5,%xmm1
+
+# qhasm: float6464 2t16 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<2t16=int6464#2
+# asm 2: mulpd TWO_TWO,<2t16=%xmm1
+mulpd TWO_TWO,%xmm1
+
+# qhasm: float6464 1r10 += 2t16
+# asm 1: addpd <2t16=int6464#2,<1r10=int6464#1
+# asm 2: addpd <2t16=%xmm1,<1r10=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 2t19 = 0t19
+# asm 1: movdqa <0t19=int6464#9,>2t19=int6464#2
+# asm 2: movdqa <0t19=%xmm8,>2t19=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: float6464 2t19 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<2t19=int6464#2
+# asm 2: mulpd SIX_SIX,<2t19=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 1r10 += 2t19
+# asm 1: addpd <2t19=int6464#2,<1r10=int6464#1
+# asm 2: addpd <2t19=%xmm1,<1r10=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 2t22 = 0t22
+# asm 1: movdqa <0t22=int6464#12,>2t22=int6464#2
+# asm 2: movdqa <0t22=%xmm11,>2t22=%xmm1
+movdqa %xmm11,%xmm1
+
+# qhasm: float6464 2t22 *= NINE_NINE
+# asm 1: mulpd NINE_NINE,<2t22=int6464#2
+# asm 2: mulpd NINE_NINE,<2t22=%xmm1
+mulpd NINE_NINE,%xmm1
+
+# qhasm: float6464 1r10 -= 2t22
+# asm 1: subpd <2t22=int6464#2,<1r10=int6464#1
+# asm 2: subpd <2t22=%xmm1,<1r10=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(1mysp + 160) = 1r10
+# asm 1: movdqa <1r10=int6464#1,160(<1mysp=int64#4)
+# asm 2: movdqa <1r10=%xmm0,160(<1mysp=%rcx)
+movdqa %xmm0,160(%rcx)
+
+# qhasm: 1r2 = *(int128 *)(1mysp + 32)
+# asm 1: movdqa 32(<1mysp=int64#4),>1r2=int6464#1
+# asm 2: movdqa 32(<1mysp=%rcx),>1r2=%xmm0
+movdqa 32(%rcx),%xmm0
+
+# qhasm: float6464 1r2 -= 0t14
+# asm 1: subpd <0t14=int6464#4,<1r2=int6464#1
+# asm 2: subpd <0t14=%xmm3,<1r2=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 1r2 += 0t17
+# asm 1: addpd <0t17=int6464#7,<1r2=int6464#1
+# asm 2: addpd <0t17=%xmm6,<1r2=%xmm0
+addpd %xmm6,%xmm0
+
+# qhasm: 2t20 = 0t20
+# asm 1: movdqa <0t20=int6464#10,>2t20=int6464#2
+# asm 2: movdqa <0t20=%xmm9,>2t20=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm: float6464 2t20 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<2t20=int6464#2
+# asm 2: mulpd TWO_TWO,<2t20=%xmm1
+mulpd TWO_TWO,%xmm1
+
+# qhasm: float6464 1r2 -= 2t20
+# asm 1: subpd <2t20=int6464#2,<1r2=int6464#1
+# asm 2: subpd <2t20=%xmm1,<1r2=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(1mysp + 32) = 1r2
+# asm 1: movdqa <1r2=int6464#1,32(<1mysp=int64#4)
+# asm 2: movdqa <1r2=%xmm0,32(<1mysp=%rcx)
+movdqa %xmm0,32(%rcx)
+
+# qhasm: 1r5 = *(int128 *)(1mysp + 80)
+# asm 1: movdqa 80(<1mysp=int64#4),>1r5=int6464#1
+# asm 2: movdqa 80(<1mysp=%rcx),>1r5=%xmm0
+movdqa 80(%rcx),%xmm0
+
+# qhasm: 2t14 = 0t14
+# asm 1: movdqa <0t14=int6464#4,>2t14=int6464#2
+# asm 2: movdqa <0t14=%xmm3,>2t14=%xmm1
+movdqa %xmm3,%xmm1
+
+# qhasm: float6464 2t14 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<2t14=int6464#2
+# asm 2: mulpd SIX_SIX,<2t14=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 1r5 -= 2t14
+# asm 1: subpd <2t14=int6464#2,<1r5=int6464#1
+# asm 2: subpd <2t14=%xmm1,<1r5=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 2t17 = 0t17
+# asm 1: movdqa <0t17=int6464#7,>2t17=int6464#2
+# asm 2: movdqa <0t17=%xmm6,>2t17=%xmm1
+movdqa %xmm6,%xmm1
+
+# qhasm: float6464 2t17 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<2t17=int6464#2
+# asm 2: mulpd FIVE_FIVE,<2t17=%xmm1
+mulpd FIVE_FIVE,%xmm1
+
+# qhasm: float6464 1r5 += 2t17
+# asm 1: addpd <2t17=int6464#2,<1r5=int6464#1
+# asm 2: addpd <2t17=%xmm1,<1r5=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 2t20 = 0t20
+# asm 1: movdqa <0t20=int6464#10,>2t20=int6464#2
+# asm 2: movdqa <0t20=%xmm9,>2t20=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm: float6464 2t20 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<2t20=int6464#2
+# asm 2: mulpd SIX_SIX,<2t20=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 1r5 -= 2t20
+# asm 1: subpd <2t20=int6464#2,<1r5=int6464#1
+# asm 2: subpd <2t20=%xmm1,<1r5=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(1mysp + 80) = 1r5
+# asm 1: movdqa <1r5=int6464#1,80(<1mysp=int64#4)
+# asm 2: movdqa <1r5=%xmm0,80(<1mysp=%rcx)
+movdqa %xmm0,80(%rcx)
+
+# qhasm: 1r8 = *(int128 *)(1mysp + 128)
+# asm 1: movdqa 128(<1mysp=int64#4),>1r8=int6464#1
+# asm 2: movdqa 128(<1mysp=%rcx),>1r8=%xmm0
+movdqa 128(%rcx),%xmm0
+
+# qhasm: 2t14 = 0t14
+# asm 1: movdqa <0t14=int6464#4,>2t14=int6464#2
+# asm 2: movdqa <0t14=%xmm3,>2t14=%xmm1
+movdqa %xmm3,%xmm1
+
+# qhasm: float6464 2t14 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<2t14=int6464#2
+# asm 2: mulpd FOUR_FOUR,<2t14=%xmm1
+mulpd FOUR_FOUR,%xmm1
+
+# qhasm: float6464 1r8 -= 2t14
+# asm 1: subpd <2t14=int6464#2,<1r8=int6464#1
+# asm 2: subpd <2t14=%xmm1,<1r8=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 2t17 = 0t17
+# asm 1: movdqa <0t17=int6464#7,>2t17=int6464#2
+# asm 2: movdqa <0t17=%xmm6,>2t17=%xmm1
+movdqa %xmm6,%xmm1
+
+# qhasm: float6464 2t17 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<2t17=int6464#2
+# asm 2: mulpd THREE_THREE,<2t17=%xmm1
+mulpd THREE_THREE,%xmm1
+
+# qhasm: float6464 1r8 += 2t17
+# asm 1: addpd <2t17=int6464#2,<1r8=int6464#1
+# asm 2: addpd <2t17=%xmm1,<1r8=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 2t20 = 0t20
+# asm 1: movdqa <0t20=int6464#10,>2t20=int6464#2
+# asm 2: movdqa <0t20=%xmm9,>2t20=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm: float6464 2t20 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<2t20=int6464#2
+# asm 2: mulpd THREE_THREE,<2t20=%xmm1
+mulpd THREE_THREE,%xmm1
+
+# qhasm: float6464 1r8 -= 2t20
+# asm 1: subpd <2t20=int6464#2,<1r8=int6464#1
+# asm 2: subpd <2t20=%xmm1,<1r8=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(1mysp + 128) = 1r8
+# asm 1: movdqa <1r8=int6464#1,128(<1mysp=int64#4)
+# asm 2: movdqa <1r8=%xmm0,128(<1mysp=%rcx)
+movdqa %xmm0,128(%rcx)
+
+# qhasm: 1r11 = *(int128 *)(1mysp + 176)
+# asm 1: movdqa 176(<1mysp=int64#4),>1r11=int6464#1
+# asm 2: movdqa 176(<1mysp=%rcx),>1r11=%xmm0
+movdqa 176(%rcx),%xmm0
+
+# qhasm: 2t14 = 0t14
+# asm 1: movdqa <0t14=int6464#4,>2t14=int6464#2
+# asm 2: movdqa <0t14=%xmm3,>2t14=%xmm1
+movdqa %xmm3,%xmm1
+
+# qhasm: float6464 2t14 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<2t14=int6464#2
+# asm 2: mulpd SIX_SIX,<2t14=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 1r11 -= 2t14
+# asm 1: subpd <2t14=int6464#2,<1r11=int6464#1
+# asm 2: subpd <2t14=%xmm1,<1r11=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 2t17 = 0t17
+# asm 1: movdqa <0t17=int6464#7,>2t17=int6464#2
+# asm 2: movdqa <0t17=%xmm6,>2t17=%xmm1
+movdqa %xmm6,%xmm1
+
+# qhasm: float6464 2t17 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<2t17=int6464#2
+# asm 2: mulpd TWO_TWO,<2t17=%xmm1
+mulpd TWO_TWO,%xmm1
+
+# qhasm: float6464 1r11 += 2t17
+# asm 1: addpd <2t17=int6464#2,<1r11=int6464#1
+# asm 2: addpd <2t17=%xmm1,<1r11=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 2t20 = 0t20
+# asm 1: movdqa <0t20=int6464#10,>2t20=int6464#2
+# asm 2: movdqa <0t20=%xmm9,>2t20=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm: float6464 2t20 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<2t20=int6464#2
+# asm 2: mulpd SIX_SIX,<2t20=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 1r11 += 2t20
+# asm 1: addpd <2t20=int6464#2,<1r11=int6464#1
+# asm 2: addpd <2t20=%xmm1,<1r11=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(1mysp + 176) = 1r11
+# asm 1: movdqa <1r11=int6464#1,176(<1mysp=int64#4)
+# asm 2: movdqa <1r11=%xmm0,176(<1mysp=%rcx)
+movdqa %xmm0,176(%rcx)
+
+# qhasm: int6464 0round
+
+# qhasm: int6464 0carry
+
+# qhasm: int6464 2t6
+
+# qhasm: r0 = *(int128 *)(1mysp + 0)
+# asm 1: movdqa 0(<1mysp=int64#4),>r0=int6464#1
+# asm 2: movdqa 0(<1mysp=%rcx),>r0=%xmm0
+movdqa 0(%rcx),%xmm0
+
+# qhasm: r1 = *(int128 *)(1mysp + 16)
+# asm 1: movdqa 16(<1mysp=int64#4),>r1=int6464#2
+# asm 2: movdqa 16(<1mysp=%rcx),>r1=%xmm1
+movdqa 16(%rcx),%xmm1
+
+# qhasm: r2 = *(int128 *)(1mysp + 32)
+# asm 1: movdqa 32(<1mysp=int64#4),>r2=int6464#3
+# asm 2: movdqa 32(<1mysp=%rcx),>r2=%xmm2
+movdqa 32(%rcx),%xmm2
+
+# qhasm: r3 = *(int128 *)(1mysp + 48)
+# asm 1: movdqa 48(<1mysp=int64#4),>r3=int6464#4
+# asm 2: movdqa 48(<1mysp=%rcx),>r3=%xmm3
+movdqa 48(%rcx),%xmm3
+
+# qhasm: r4 = *(int128 *)(1mysp + 64)
+# asm 1: movdqa 64(<1mysp=int64#4),>r4=int6464#5
+# asm 2: movdqa 64(<1mysp=%rcx),>r4=%xmm4
+movdqa 64(%rcx),%xmm4
+
+# qhasm: r5 = *(int128 *)(1mysp + 80)
+# asm 1: movdqa 80(<1mysp=int64#4),>r5=int6464#6
+# asm 2: movdqa 80(<1mysp=%rcx),>r5=%xmm5
+movdqa 80(%rcx),%xmm5
+
+# qhasm: r6 = *(int128 *)(1mysp + 96)
+# asm 1: movdqa 96(<1mysp=int64#4),>r6=int6464#7
+# asm 2: movdqa 96(<1mysp=%rcx),>r6=%xmm6
+movdqa 96(%rcx),%xmm6
+
+# qhasm: r7 = *(int128 *)(1mysp + 112)
+# asm 1: movdqa 112(<1mysp=int64#4),>r7=int6464#8
+# asm 2: movdqa 112(<1mysp=%rcx),>r7=%xmm7
+movdqa 112(%rcx),%xmm7
+
+# qhasm: r8 = *(int128 *)(1mysp + 128)
+# asm 1: movdqa 128(<1mysp=int64#4),>r8=int6464#9
+# asm 2: movdqa 128(<1mysp=%rcx),>r8=%xmm8
+movdqa 128(%rcx),%xmm8
+
+# qhasm: r9 = *(int128 *)(1mysp + 144)
+# asm 1: movdqa 144(<1mysp=int64#4),>r9=int6464#10
+# asm 2: movdqa 144(<1mysp=%rcx),>r9=%xmm9
+movdqa 144(%rcx),%xmm9
+
+# qhasm: r10 = *(int128 *)(1mysp + 160)
+# asm 1: movdqa 160(<1mysp=int64#4),>r10=int6464#11
+# asm 2: movdqa 160(<1mysp=%rcx),>r10=%xmm10
+movdqa 160(%rcx),%xmm10
+
+# qhasm: r11 = *(int128 *)(1mysp + 176)
+# asm 1: movdqa 176(<1mysp=int64#4),>r11=int6464#12
+# asm 2: movdqa 176(<1mysp=%rcx),>r11=%xmm11
+movdqa 176(%rcx),%xmm11
+
+# qhasm: 0round = ROUND_ROUND
+# asm 1: movdqa ROUND_ROUND,<0round=int6464#13
+# asm 2: movdqa ROUND_ROUND,<0round=%xmm12
+movdqa ROUND_ROUND,%xmm12
+
+# qhasm: 0carry = r1
+# asm 1: movdqa <r1=int6464#2,>0carry=int6464#14
+# asm 2: movdqa <r1=%xmm1,>0carry=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r2 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r2=int6464#3
+# asm 2: addpd <0carry=%xmm13,<r2=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r1 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r1=int6464#2
+# asm 2: subpd <0carry=%xmm13,<r1=%xmm1
+subpd %xmm13,%xmm1
+
+# qhasm: 0carry = r4
+# asm 1: movdqa <r4=int6464#5,>0carry=int6464#14
+# asm 2: movdqa <r4=%xmm4,>0carry=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r5 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r5=int6464#6
+# asm 2: addpd <0carry=%xmm13,<r5=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r4 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r4=int6464#5
+# asm 2: subpd <0carry=%xmm13,<r4=%xmm4
+subpd %xmm13,%xmm4
+
+# qhasm: 0carry = r7
+# asm 1: movdqa <r7=int6464#8,>0carry=int6464#14
+# asm 2: movdqa <r7=%xmm7,>0carry=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r8 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r8=int6464#9
+# asm 2: addpd <0carry=%xmm13,<r8=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r7 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r7=int6464#8
+# asm 2: subpd <0carry=%xmm13,<r7=%xmm7
+subpd %xmm13,%xmm7
+
+# qhasm: 0carry = r10
+# asm 1: movdqa <r10=int6464#11,>0carry=int6464#14
+# asm 2: movdqa <r10=%xmm10,>0carry=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r11 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r11=int6464#12
+# asm 2: addpd <0carry=%xmm13,<r11=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r10 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r10=int6464#11
+# asm 2: subpd <0carry=%xmm13,<r10=%xmm10
+subpd %xmm13,%xmm10
+
+# qhasm: 0carry = r2
+# asm 1: movdqa <r2=int6464#3,>0carry=int6464#14
+# asm 2: movdqa <r2=%xmm2,>0carry=%xmm13
+movdqa %xmm2,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r3 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r3=int6464#4
+# asm 2: addpd <0carry=%xmm13,<r3=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r2 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r2=int6464#3
+# asm 2: subpd <0carry=%xmm13,<r2=%xmm2
+subpd %xmm13,%xmm2
+
+# qhasm: 0carry = r5
+# asm 1: movdqa <r5=int6464#6,>0carry=int6464#14
+# asm 2: movdqa <r5=%xmm5,>0carry=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r6 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r6=int6464#7
+# asm 2: addpd <0carry=%xmm13,<r6=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r5 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r5=int6464#6
+# asm 2: subpd <0carry=%xmm13,<r5=%xmm5
+subpd %xmm13,%xmm5
+
+# qhasm: 0carry = r8
+# asm 1: movdqa <r8=int6464#9,>0carry=int6464#14
+# asm 2: movdqa <r8=%xmm8,>0carry=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r9 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r9=int6464#10
+# asm 2: addpd <0carry=%xmm13,<r9=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r8 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r8=int6464#9
+# asm 2: subpd <0carry=%xmm13,<r8=%xmm8
+subpd %xmm13,%xmm8
+
+# qhasm: 0carry = r11
+# asm 1: movdqa <r11=int6464#12,>0carry=int6464#14
+# asm 2: movdqa <r11=%xmm11,>0carry=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r0 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r0=int6464#1
+# asm 2: subpd <0carry=%xmm13,<r0=%xmm0
+subpd %xmm13,%xmm0
+
+# qhasm: float6464 r3 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r3=int6464#4
+# asm 2: subpd <0carry=%xmm13,<r3=%xmm3
+subpd %xmm13,%xmm3
+
+# qhasm: 2t6 = 0carry
+# asm 1: movdqa <0carry=int6464#14,>2t6=int6464#15
+# asm 2: movdqa <0carry=%xmm13,>2t6=%xmm14
+movdqa %xmm13,%xmm14
+
+# qhasm: float6464 2t6 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<2t6=int6464#15
+# asm 2: mulpd FOUR_FOUR,<2t6=%xmm14
+mulpd FOUR_FOUR,%xmm14
+
+# qhasm: float6464 r6 -= 2t6
+# asm 1: subpd <2t6=int6464#15,<r6=int6464#7
+# asm 2: subpd <2t6=%xmm14,<r6=%xmm6
+subpd %xmm14,%xmm6
+
+# qhasm: float6464 r9 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r9=int6464#10
+# asm 2: subpd <0carry=%xmm13,<r9=%xmm9
+subpd %xmm13,%xmm9
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r11 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r11=int6464#12
+# asm 2: subpd <0carry=%xmm13,<r11=%xmm11
+subpd %xmm13,%xmm11
+
+# qhasm: 0carry = r0
+# asm 1: movdqa <r0=int6464#1,>0carry=int6464#14
+# asm 2: movdqa <r0=%xmm0,>0carry=%xmm13
+movdqa %xmm0,%xmm13
+
+# qhasm: float6464 0carry *= V6INV_V6INV
+# asm 1: mulpd V6INV_V6INV,<0carry=int6464#14
+# asm 2: mulpd V6INV_V6INV,<0carry=%xmm13
+mulpd V6INV_V6INV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r1 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r1=int6464#2
+# asm 2: addpd <0carry=%xmm13,<r1=%xmm1
+addpd %xmm13,%xmm1
+
+# qhasm: float6464 0carry *= V6_V6
+# asm 1: mulpd V6_V6,<0carry=int6464#14
+# asm 2: mulpd V6_V6,<0carry=%xmm13
+mulpd V6_V6,%xmm13
+
+# qhasm: float6464 r0 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r0=int6464#1
+# asm 2: subpd <0carry=%xmm13,<r0=%xmm0
+subpd %xmm13,%xmm0
+
+# qhasm: 0carry = r3
+# asm 1: movdqa <r3=int6464#4,>0carry=int6464#14
+# asm 2: movdqa <r3=%xmm3,>0carry=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r4 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r4=int6464#5
+# asm 2: addpd <0carry=%xmm13,<r4=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r3 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r3=int6464#4
+# asm 2: subpd <0carry=%xmm13,<r3=%xmm3
+subpd %xmm13,%xmm3
+
+# qhasm: 0carry = r6
+# asm 1: movdqa <r6=int6464#7,>0carry=int6464#14
+# asm 2: movdqa <r6=%xmm6,>0carry=%xmm13
+movdqa %xmm6,%xmm13
+
+# qhasm: float6464 0carry *= V6INV_V6INV
+# asm 1: mulpd V6INV_V6INV,<0carry=int6464#14
+# asm 2: mulpd V6INV_V6INV,<0carry=%xmm13
+mulpd V6INV_V6INV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r7 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r7=int6464#8
+# asm 2: addpd <0carry=%xmm13,<r7=%xmm7
+addpd %xmm13,%xmm7
+
+# qhasm: float6464 0carry *= V6_V6
+# asm 1: mulpd V6_V6,<0carry=int6464#14
+# asm 2: mulpd V6_V6,<0carry=%xmm13
+mulpd V6_V6,%xmm13
+
+# qhasm: float6464 r6 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r6=int6464#7
+# asm 2: subpd <0carry=%xmm13,<r6=%xmm6
+subpd %xmm13,%xmm6
+
+# qhasm: 0carry = r9
+# asm 1: movdqa <r9=int6464#10,>0carry=int6464#14
+# asm 2: movdqa <r9=%xmm9,>0carry=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r10 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r10=int6464#11
+# asm 2: addpd <0carry=%xmm13,<r10=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r9 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r9=int6464#10
+# asm 2: subpd <0carry=%xmm13,<r9=%xmm9
+subpd %xmm13,%xmm9
+
+# qhasm: 0carry = r1
+# asm 1: movdqa <r1=int6464#2,>0carry=int6464#14
+# asm 2: movdqa <r1=%xmm1,>0carry=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r2 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r2=int6464#3
+# asm 2: addpd <0carry=%xmm13,<r2=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r1 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r1=int6464#2
+# asm 2: subpd <0carry=%xmm13,<r1=%xmm1
+subpd %xmm13,%xmm1
+
+# qhasm: 0carry = r4
+# asm 1: movdqa <r4=int6464#5,>0carry=int6464#14
+# asm 2: movdqa <r4=%xmm4,>0carry=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r5 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r5=int6464#6
+# asm 2: addpd <0carry=%xmm13,<r5=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r4 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r4=int6464#5
+# asm 2: subpd <0carry=%xmm13,<r4=%xmm4
+subpd %xmm13,%xmm4
+
+# qhasm: 0carry = r7
+# asm 1: movdqa <r7=int6464#8,>0carry=int6464#14
+# asm 2: movdqa <r7=%xmm7,>0carry=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r8 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r8=int6464#9
+# asm 2: addpd <0carry=%xmm13,<r8=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r7 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r7=int6464#8
+# asm 2: subpd <0carry=%xmm13,<r7=%xmm7
+subpd %xmm13,%xmm7
+
+# qhasm: 0carry = r10
+# asm 1: movdqa <r10=int6464#11,>0carry=int6464#14
+# asm 2: movdqa <r10=%xmm10,>0carry=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r11 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r11=int6464#12
+# asm 2: addpd <0carry=%xmm13,<r11=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r10 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r10=int6464#11
+# asm 2: subpd <0carry=%xmm13,<r10=%xmm10
+subpd %xmm13,%xmm10
+
+# qhasm: *(int128 *)(rop +   0) =  r0
+# asm 1: movdqa <r0=int6464#1,0(<rop=int64#1)
+# asm 2: movdqa <r0=%xmm0,0(<rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: *(int128 *)(rop +  16) =  r1
+# asm 1: movdqa <r1=int6464#2,16(<rop=int64#1)
+# asm 2: movdqa <r1=%xmm1,16(<rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(rop +  32) =  r2
+# asm 1: movdqa <r2=int6464#3,32(<rop=int64#1)
+# asm 2: movdqa <r2=%xmm2,32(<rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: *(int128 *)(rop +  48) =  r3
+# asm 1: movdqa <r3=int6464#4,48(<rop=int64#1)
+# asm 2: movdqa <r3=%xmm3,48(<rop=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm: *(int128 *)(rop +  64) =  r4
+# asm 1: movdqa <r4=int6464#5,64(<rop=int64#1)
+# asm 2: movdqa <r4=%xmm4,64(<rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(rop +  80) =  r5
+# asm 1: movdqa <r5=int6464#6,80(<rop=int64#1)
+# asm 2: movdqa <r5=%xmm5,80(<rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: *(int128 *)(rop +  96) =  r6
+# asm 1: movdqa <r6=int6464#7,96(<rop=int64#1)
+# asm 2: movdqa <r6=%xmm6,96(<rop=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm: *(int128 *)(rop + 112) =  r7
+# asm 1: movdqa <r7=int6464#8,112(<rop=int64#1)
+# asm 2: movdqa <r7=%xmm7,112(<rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(rop + 128) =  r8
+# asm 1: movdqa <r8=int6464#9,128(<rop=int64#1)
+# asm 2: movdqa <r8=%xmm8,128(<rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: *(int128 *)(rop + 144) =  r9
+# asm 1: movdqa <r9=int6464#10,144(<rop=int64#1)
+# asm 2: movdqa <r9=%xmm9,144(<rop=%rdi)
+movdqa %xmm9,144(%rdi)
+
+# qhasm: *(int128 *)(rop + 160) = r10
+# asm 1: movdqa <r10=int6464#11,160(<rop=int64#1)
+# asm 2: movdqa <r10=%xmm10,160(<rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(rop + 176) = r11
+# asm 1: movdqa <r11=int6464#12,176(<rop=int64#1)
+# asm 2: movdqa <r11=%xmm11,176(<rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 446 - 0
dclxvi-20130329/fp2e_mulxi.s

@@ -0,0 +1,446 @@
+# File:   dclxvi-20130329/fp2e_mulxi.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fp2e_mulxi_qhasm
+.text
+.p2align 5
+.globl _fp2e_mulxi_qhasm
+.globl fp2e_mulxi_qhasm
+_fp2e_mulxi_qhasm:
+fp2e_mulxi_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: int64 0rop
+
+# qhasm: int64 0op
+
+# qhasm: input 0rop
+
+# qhasm: input 0op
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: int6464 0t0
+
+# qhasm: int6464 0t1
+
+# qhasm: int6464 0t2
+
+# qhasm: int6464 0t3
+
+# qhasm: 0r0  = *(int128 *)(0op +   0)
+# asm 1: movdqa 0(<0op=int64#2),>0r0=int6464#1
+# asm 2: movdqa 0(<0op=%rsi),>0r0=%xmm0
+movdqa 0(%rsi),%xmm0
+
+# qhasm: 0r1  = *(int128 *)(0op +  16)
+# asm 1: movdqa 16(<0op=int64#2),>0r1=int6464#2
+# asm 2: movdqa 16(<0op=%rsi),>0r1=%xmm1
+movdqa 16(%rsi),%xmm1
+
+# qhasm: 0r2  = *(int128 *)(0op +  32)
+# asm 1: movdqa 32(<0op=int64#2),>0r2=int6464#3
+# asm 2: movdqa 32(<0op=%rsi),>0r2=%xmm2
+movdqa 32(%rsi),%xmm2
+
+# qhasm: 0r3  = *(int128 *)(0op +  48)
+# asm 1: movdqa 48(<0op=int64#2),>0r3=int6464#4
+# asm 2: movdqa 48(<0op=%rsi),>0r3=%xmm3
+movdqa 48(%rsi),%xmm3
+
+# qhasm: 0r4  = *(int128 *)(0op +  64)
+# asm 1: movdqa 64(<0op=int64#2),>0r4=int6464#5
+# asm 2: movdqa 64(<0op=%rsi),>0r4=%xmm4
+movdqa 64(%rsi),%xmm4
+
+# qhasm: 0r5  = *(int128 *)(0op +  80)
+# asm 1: movdqa 80(<0op=int64#2),>0r5=int6464#6
+# asm 2: movdqa 80(<0op=%rsi),>0r5=%xmm5
+movdqa 80(%rsi),%xmm5
+
+# qhasm: 0r6  = *(int128 *)(0op +  96)
+# asm 1: movdqa 96(<0op=int64#2),>0r6=int6464#7
+# asm 2: movdqa 96(<0op=%rsi),>0r6=%xmm6
+movdqa 96(%rsi),%xmm6
+
+# qhasm: 0r7  = *(int128 *)(0op + 112)
+# asm 1: movdqa 112(<0op=int64#2),>0r7=int6464#8
+# asm 2: movdqa 112(<0op=%rsi),>0r7=%xmm7
+movdqa 112(%rsi),%xmm7
+
+# qhasm: 0r8  = *(int128 *)(0op + 128)
+# asm 1: movdqa 128(<0op=int64#2),>0r8=int6464#9
+# asm 2: movdqa 128(<0op=%rsi),>0r8=%xmm8
+movdqa 128(%rsi),%xmm8
+
+# qhasm: 0r9  = *(int128 *)(0op + 144)
+# asm 1: movdqa 144(<0op=int64#2),>0r9=int6464#10
+# asm 2: movdqa 144(<0op=%rsi),>0r9=%xmm9
+movdqa 144(%rsi),%xmm9
+
+# qhasm: 0r10 = *(int128 *)(0op + 160)
+# asm 1: movdqa 160(<0op=int64#2),>0r10=int6464#11
+# asm 2: movdqa 160(<0op=%rsi),>0r10=%xmm10
+movdqa 160(%rsi),%xmm10
+
+# qhasm: 0r11 = *(int128 *)(0op + 176)
+# asm 1: movdqa 176(<0op=int64#2),>0r11=int6464#12
+# asm 2: movdqa 176(<0op=%rsi),>0r11=%xmm11
+movdqa 176(%rsi),%xmm11
+
+# qhasm: int6464 1t0
+
+# qhasm: int6464 1t1
+
+# qhasm: int6464 1t2
+
+# qhasm: int6464 1t3
+
+# qhasm: int6464 0t4
+
+# qhasm: int6464 0t5
+
+# qhasm: int6464 0t6
+
+# qhasm: int6464 0t7
+
+# qhasm: int6464 0t8
+
+# qhasm: int6464 0t9
+
+# qhasm: int6464 0t10
+
+# qhasm: int6464 0t11
+
+# qhasm: 1t0 = 0r0
+# asm 1: movdqa <0r0=int6464#1,>1t0=int6464#13
+# asm 2: movdqa <0r0=%xmm0,>1t0=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: float6464 0r0 *= THREE_MINUSONE
+# asm 1: mulpd THREE_MINUSONE,<0r0=int6464#1
+# asm 2: mulpd THREE_MINUSONE,<0r0=%xmm0
+mulpd THREE_MINUSONE,%xmm0
+
+# qhasm: float6464 1t0 *= ONE_THREE
+# asm 1: mulpd ONE_THREE,<1t0=int6464#13
+# asm 2: mulpd ONE_THREE,<1t0=%xmm12
+mulpd ONE_THREE,%xmm12
+
+# qhasm: float6464 0r0[0] += 0r0[1];0r0[1] = 1t0[0] + 1t0[1]
+# asm 1: haddpd <1t0=int6464#13,<0r0=int6464#1
+# asm 2: haddpd <1t0=%xmm12,<0r0=%xmm0
+haddpd %xmm12,%xmm0
+
+# qhasm: 1t1 = 0r1
+# asm 1: movdqa <0r1=int6464#2,>1t1=int6464#13
+# asm 2: movdqa <0r1=%xmm1,>1t1=%xmm12
+movdqa %xmm1,%xmm12
+
+# qhasm: float6464 0r1 *= THREE_MINUSONE
+# asm 1: mulpd THREE_MINUSONE,<0r1=int6464#2
+# asm 2: mulpd THREE_MINUSONE,<0r1=%xmm1
+mulpd THREE_MINUSONE,%xmm1
+
+# qhasm: float6464 1t1 *= ONE_THREE
+# asm 1: mulpd ONE_THREE,<1t1=int6464#13
+# asm 2: mulpd ONE_THREE,<1t1=%xmm12
+mulpd ONE_THREE,%xmm12
+
+# qhasm: float6464 0r1[0] += 0r1[1];0r1[1] = 1t1[0] + 1t1[1]
+# asm 1: haddpd <1t1=int6464#13,<0r1=int6464#2
+# asm 2: haddpd <1t1=%xmm12,<0r1=%xmm1
+haddpd %xmm12,%xmm1
+
+# qhasm: 1t2 = 0r2
+# asm 1: movdqa <0r2=int6464#3,>1t2=int6464#13
+# asm 2: movdqa <0r2=%xmm2,>1t2=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm: float6464 0r2 *= THREE_MINUSONE
+# asm 1: mulpd THREE_MINUSONE,<0r2=int6464#3
+# asm 2: mulpd THREE_MINUSONE,<0r2=%xmm2
+mulpd THREE_MINUSONE,%xmm2
+
+# qhasm: float6464 1t2 *= ONE_THREE
+# asm 1: mulpd ONE_THREE,<1t2=int6464#13
+# asm 2: mulpd ONE_THREE,<1t2=%xmm12
+mulpd ONE_THREE,%xmm12
+
+# qhasm: float6464 0r2[0] += 0r2[1];0r2[1] = 1t2[0] + 1t2[1]
+# asm 1: haddpd <1t2=int6464#13,<0r2=int6464#3
+# asm 2: haddpd <1t2=%xmm12,<0r2=%xmm2
+haddpd %xmm12,%xmm2
+
+# qhasm: 1t3 = 0r3
+# asm 1: movdqa <0r3=int6464#4,>1t3=int6464#13
+# asm 2: movdqa <0r3=%xmm3,>1t3=%xmm12
+movdqa %xmm3,%xmm12
+
+# qhasm: float6464 0r3 *= THREE_MINUSONE
+# asm 1: mulpd THREE_MINUSONE,<0r3=int6464#4
+# asm 2: mulpd THREE_MINUSONE,<0r3=%xmm3
+mulpd THREE_MINUSONE,%xmm3
+
+# qhasm: float6464 1t3 *= ONE_THREE
+# asm 1: mulpd ONE_THREE,<1t3=int6464#13
+# asm 2: mulpd ONE_THREE,<1t3=%xmm12
+mulpd ONE_THREE,%xmm12
+
+# qhasm: float6464 0r3[0] += 0r3[1];0r3[1] = 1t3[0] + 1t3[1]
+# asm 1: haddpd <1t3=int6464#13,<0r3=int6464#4
+# asm 2: haddpd <1t3=%xmm12,<0r3=%xmm3
+haddpd %xmm12,%xmm3
+
+# qhasm: 0t4 = 0r4
+# asm 1: movdqa <0r4=int6464#5,>0t4=int6464#13
+# asm 2: movdqa <0r4=%xmm4,>0t4=%xmm12
+movdqa %xmm4,%xmm12
+
+# qhasm: float6464 0r4 *= THREE_MINUSONE
+# asm 1: mulpd THREE_MINUSONE,<0r4=int6464#5
+# asm 2: mulpd THREE_MINUSONE,<0r4=%xmm4
+mulpd THREE_MINUSONE,%xmm4
+
+# qhasm: float6464 0t4 *= ONE_THREE
+# asm 1: mulpd ONE_THREE,<0t4=int6464#13
+# asm 2: mulpd ONE_THREE,<0t4=%xmm12
+mulpd ONE_THREE,%xmm12
+
+# qhasm: float6464 0r4[0] += 0r4[1];0r4[1] = 0t4[0] + 0t4[1]
+# asm 1: haddpd <0t4=int6464#13,<0r4=int6464#5
+# asm 2: haddpd <0t4=%xmm12,<0r4=%xmm4
+haddpd %xmm12,%xmm4
+
+# qhasm: 0t5 = 0r5
+# asm 1: movdqa <0r5=int6464#6,>0t5=int6464#13
+# asm 2: movdqa <0r5=%xmm5,>0t5=%xmm12
+movdqa %xmm5,%xmm12
+
+# qhasm: float6464 0r5 *= THREE_MINUSONE
+# asm 1: mulpd THREE_MINUSONE,<0r5=int6464#6
+# asm 2: mulpd THREE_MINUSONE,<0r5=%xmm5
+mulpd THREE_MINUSONE,%xmm5
+
+# qhasm: float6464 0t5 *= ONE_THREE
+# asm 1: mulpd ONE_THREE,<0t5=int6464#13
+# asm 2: mulpd ONE_THREE,<0t5=%xmm12
+mulpd ONE_THREE,%xmm12
+
+# qhasm: float6464 0r5[0] += 0r5[1];0r5[1] = 0t5[0] + 0t5[1]
+# asm 1: haddpd <0t5=int6464#13,<0r5=int6464#6
+# asm 2: haddpd <0t5=%xmm12,<0r5=%xmm5
+haddpd %xmm12,%xmm5
+
+# qhasm: 0t6 = 0r6
+# asm 1: movdqa <0r6=int6464#7,>0t6=int6464#13
+# asm 2: movdqa <0r6=%xmm6,>0t6=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0r6 *= THREE_MINUSONE
+# asm 1: mulpd THREE_MINUSONE,<0r6=int6464#7
+# asm 2: mulpd THREE_MINUSONE,<0r6=%xmm6
+mulpd THREE_MINUSONE,%xmm6
+
+# qhasm: float6464 0t6 *= ONE_THREE
+# asm 1: mulpd ONE_THREE,<0t6=int6464#13
+# asm 2: mulpd ONE_THREE,<0t6=%xmm12
+mulpd ONE_THREE,%xmm12
+
+# qhasm: float6464 0r6[0] += 0r6[1];0r6[1] = 0t6[0] + 0t6[1]
+# asm 1: haddpd <0t6=int6464#13,<0r6=int6464#7
+# asm 2: haddpd <0t6=%xmm12,<0r6=%xmm6
+haddpd %xmm12,%xmm6
+
+# qhasm: 0t7 = 0r7
+# asm 1: movdqa <0r7=int6464#8,>0t7=int6464#13
+# asm 2: movdqa <0r7=%xmm7,>0t7=%xmm12
+movdqa %xmm7,%xmm12
+
+# qhasm: float6464 0r7 *= THREE_MINUSONE
+# asm 1: mulpd THREE_MINUSONE,<0r7=int6464#8
+# asm 2: mulpd THREE_MINUSONE,<0r7=%xmm7
+mulpd THREE_MINUSONE,%xmm7
+
+# qhasm: float6464 0t7 *= ONE_THREE
+# asm 1: mulpd ONE_THREE,<0t7=int6464#13
+# asm 2: mulpd ONE_THREE,<0t7=%xmm12
+mulpd ONE_THREE,%xmm12
+
+# qhasm: float6464 0r7[0] += 0r7[1];0r7[1] = 0t7[0] + 0t7[1]
+# asm 1: haddpd <0t7=int6464#13,<0r7=int6464#8
+# asm 2: haddpd <0t7=%xmm12,<0r7=%xmm7
+haddpd %xmm12,%xmm7
+
+# qhasm: 0t8 = 0r8
+# asm 1: movdqa <0r8=int6464#9,>0t8=int6464#13
+# asm 2: movdqa <0r8=%xmm8,>0t8=%xmm12
+movdqa %xmm8,%xmm12
+
+# qhasm: float6464 0r8 *= THREE_MINUSONE
+# asm 1: mulpd THREE_MINUSONE,<0r8=int6464#9
+# asm 2: mulpd THREE_MINUSONE,<0r8=%xmm8
+mulpd THREE_MINUSONE,%xmm8
+
+# qhasm: float6464 0t8 *= ONE_THREE
+# asm 1: mulpd ONE_THREE,<0t8=int6464#13
+# asm 2: mulpd ONE_THREE,<0t8=%xmm12
+mulpd ONE_THREE,%xmm12
+
+# qhasm: float6464 0r8[0] += 0r8[1];0r8[1] = 0t8[0] + 0t8[1]
+# asm 1: haddpd <0t8=int6464#13,<0r8=int6464#9
+# asm 2: haddpd <0t8=%xmm12,<0r8=%xmm8
+haddpd %xmm12,%xmm8
+
+# qhasm: 0t9 = 0r9
+# asm 1: movdqa <0r9=int6464#10,>0t9=int6464#13
+# asm 2: movdqa <0r9=%xmm9,>0t9=%xmm12
+movdqa %xmm9,%xmm12
+
+# qhasm: float6464 0r9 *= THREE_MINUSONE
+# asm 1: mulpd THREE_MINUSONE,<0r9=int6464#10
+# asm 2: mulpd THREE_MINUSONE,<0r9=%xmm9
+mulpd THREE_MINUSONE,%xmm9
+
+# qhasm: float6464 0t9 *= ONE_THREE
+# asm 1: mulpd ONE_THREE,<0t9=int6464#13
+# asm 2: mulpd ONE_THREE,<0t9=%xmm12
+mulpd ONE_THREE,%xmm12
+
+# qhasm: float6464 0r9[0] += 0r9[1];0r9[1] = 0t9[0] + 0t9[1]
+# asm 1: haddpd <0t9=int6464#13,<0r9=int6464#10
+# asm 2: haddpd <0t9=%xmm12,<0r9=%xmm9
+haddpd %xmm12,%xmm9
+
+# qhasm: 0t10 = 0r10
+# asm 1: movdqa <0r10=int6464#11,>0t10=int6464#13
+# asm 2: movdqa <0r10=%xmm10,>0t10=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm: float6464 0r10 *= THREE_MINUSONE
+# asm 1: mulpd THREE_MINUSONE,<0r10=int6464#11
+# asm 2: mulpd THREE_MINUSONE,<0r10=%xmm10
+mulpd THREE_MINUSONE,%xmm10
+
+# qhasm: float6464 0t10 *= ONE_THREE
+# asm 1: mulpd ONE_THREE,<0t10=int6464#13
+# asm 2: mulpd ONE_THREE,<0t10=%xmm12
+mulpd ONE_THREE,%xmm12
+
+# qhasm: float6464 0r10[0] += 0r10[1];0r10[1] = 0t10[0] + 0t10[1]
+# asm 1: haddpd <0t10=int6464#13,<0r10=int6464#11
+# asm 2: haddpd <0t10=%xmm12,<0r10=%xmm10
+haddpd %xmm12,%xmm10
+
+# qhasm: 0t11 = 0r11
+# asm 1: movdqa <0r11=int6464#12,>0t11=int6464#13
+# asm 2: movdqa <0r11=%xmm11,>0t11=%xmm12
+movdqa %xmm11,%xmm12
+
+# qhasm: float6464 0r11 *= THREE_MINUSONE
+# asm 1: mulpd THREE_MINUSONE,<0r11=int6464#12
+# asm 2: mulpd THREE_MINUSONE,<0r11=%xmm11
+mulpd THREE_MINUSONE,%xmm11
+
+# qhasm: float6464 0t11 *= ONE_THREE
+# asm 1: mulpd ONE_THREE,<0t11=int6464#13
+# asm 2: mulpd ONE_THREE,<0t11=%xmm12
+mulpd ONE_THREE,%xmm12
+
+# qhasm: float6464 0r11[0] += 0r11[1];0r11[1] = 0t11[0] + 0t11[1]
+# asm 1: haddpd <0t11=int6464#13,<0r11=int6464#12
+# asm 2: haddpd <0t11=%xmm12,<0r11=%xmm11
+haddpd %xmm12,%xmm11
+
+# qhasm: *(int128 *)(0rop +   0) =  0r0
+# asm 1: movdqa <0r0=int6464#1,0(<0rop=int64#1)
+# asm 2: movdqa <0r0=%xmm0,0(<0rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: *(int128 *)(0rop +  16) =  0r1
+# asm 1: movdqa <0r1=int6464#2,16(<0rop=int64#1)
+# asm 2: movdqa <0r1=%xmm1,16(<0rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(0rop +  32) =  0r2
+# asm 1: movdqa <0r2=int6464#3,32(<0rop=int64#1)
+# asm 2: movdqa <0r2=%xmm2,32(<0rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: *(int128 *)(0rop +  48) =  0r3
+# asm 1: movdqa <0r3=int6464#4,48(<0rop=int64#1)
+# asm 2: movdqa <0r3=%xmm3,48(<0rop=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm: *(int128 *)(0rop +  64) =  0r4
+# asm 1: movdqa <0r4=int6464#5,64(<0rop=int64#1)
+# asm 2: movdqa <0r4=%xmm4,64(<0rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(0rop +  80) =  0r5
+# asm 1: movdqa <0r5=int6464#6,80(<0rop=int64#1)
+# asm 2: movdqa <0r5=%xmm5,80(<0rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: *(int128 *)(0rop +  96) =  0r6
+# asm 1: movdqa <0r6=int6464#7,96(<0rop=int64#1)
+# asm 2: movdqa <0r6=%xmm6,96(<0rop=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm: *(int128 *)(0rop + 112) =  0r7
+# asm 1: movdqa <0r7=int6464#8,112(<0rop=int64#1)
+# asm 2: movdqa <0r7=%xmm7,112(<0rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(0rop + 128) =  0r8
+# asm 1: movdqa <0r8=int6464#9,128(<0rop=int64#1)
+# asm 2: movdqa <0r8=%xmm8,128(<0rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: *(int128 *)(0rop + 144) =  0r9
+# asm 1: movdqa <0r9=int6464#10,144(<0rop=int64#1)
+# asm 2: movdqa <0r9=%xmm9,144(<0rop=%rdi)
+movdqa %xmm9,144(%rdi)
+
+# qhasm: *(int128 *)(0rop + 160) = 0r10
+# asm 1: movdqa <0r10=int6464#11,160(<0rop=int64#1)
+# asm 2: movdqa <0r10=%xmm10,160(<0rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(0rop + 176) = 0r11
+# asm 1: movdqa <0r11=int6464#12,176(<0rop=int64#1)
+# asm 2: movdqa <0r11=%xmm11,176(<0rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 249 - 0
dclxvi-20130329/fp2e_neg.s

@@ -0,0 +1,249 @@
+# File:   dclxvi-20130329/fp2e_neg.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fp2e_neg_qhasm
+.text
+.p2align 5
+.globl _fp2e_neg_qhasm
+.globl fp2e_neg_qhasm
+_fp2e_neg_qhasm:
+fp2e_neg_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: int64 0rop
+
+# qhasm: int64 0op
+
+# qhasm: input 0rop
+
+# qhasm: input 0op
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: int6464 0t0
+
+# qhasm: int6464 0t1
+
+# qhasm: int6464 0t2
+
+# qhasm: int6464 0t3
+
+# qhasm: 0r0  = *(int128 *)(0op +   0)
+# asm 1: movdqa 0(<0op=int64#2),>0r0=int6464#1
+# asm 2: movdqa 0(<0op=%rsi),>0r0=%xmm0
+movdqa 0(%rsi),%xmm0
+
+# qhasm: 0r1  = *(int128 *)(0op +  16)
+# asm 1: movdqa 16(<0op=int64#2),>0r1=int6464#2
+# asm 2: movdqa 16(<0op=%rsi),>0r1=%xmm1
+movdqa 16(%rsi),%xmm1
+
+# qhasm: 0r2  = *(int128 *)(0op +  32)
+# asm 1: movdqa 32(<0op=int64#2),>0r2=int6464#3
+# asm 2: movdqa 32(<0op=%rsi),>0r2=%xmm2
+movdqa 32(%rsi),%xmm2
+
+# qhasm: 0r3  = *(int128 *)(0op +  48)
+# asm 1: movdqa 48(<0op=int64#2),>0r3=int6464#4
+# asm 2: movdqa 48(<0op=%rsi),>0r3=%xmm3
+movdqa 48(%rsi),%xmm3
+
+# qhasm: 0r4  = *(int128 *)(0op +  64)
+# asm 1: movdqa 64(<0op=int64#2),>0r4=int6464#5
+# asm 2: movdqa 64(<0op=%rsi),>0r4=%xmm4
+movdqa 64(%rsi),%xmm4
+
+# qhasm: 0r5  = *(int128 *)(0op +  80)
+# asm 1: movdqa 80(<0op=int64#2),>0r5=int6464#6
+# asm 2: movdqa 80(<0op=%rsi),>0r5=%xmm5
+movdqa 80(%rsi),%xmm5
+
+# qhasm: 0r6  = *(int128 *)(0op +  96)
+# asm 1: movdqa 96(<0op=int64#2),>0r6=int6464#7
+# asm 2: movdqa 96(<0op=%rsi),>0r6=%xmm6
+movdqa 96(%rsi),%xmm6
+
+# qhasm: 0r7  = *(int128 *)(0op + 112)
+# asm 1: movdqa 112(<0op=int64#2),>0r7=int6464#8
+# asm 2: movdqa 112(<0op=%rsi),>0r7=%xmm7
+movdqa 112(%rsi),%xmm7
+
+# qhasm: 0r8  = *(int128 *)(0op + 128)
+# asm 1: movdqa 128(<0op=int64#2),>0r8=int6464#9
+# asm 2: movdqa 128(<0op=%rsi),>0r8=%xmm8
+movdqa 128(%rsi),%xmm8
+
+# qhasm: 0r9  = *(int128 *)(0op + 144)
+# asm 1: movdqa 144(<0op=int64#2),>0r9=int6464#10
+# asm 2: movdqa 144(<0op=%rsi),>0r9=%xmm9
+movdqa 144(%rsi),%xmm9
+
+# qhasm: 0r10 = *(int128 *)(0op + 160)
+# asm 1: movdqa 160(<0op=int64#2),>0r10=int6464#11
+# asm 2: movdqa 160(<0op=%rsi),>0r10=%xmm10
+movdqa 160(%rsi),%xmm10
+
+# qhasm: 0r11 = *(int128 *)(0op + 176)
+# asm 1: movdqa 176(<0op=int64#2),>0r11=int6464#12
+# asm 2: movdqa 176(<0op=%rsi),>0r11=%xmm11
+movdqa 176(%rsi),%xmm11
+
+# qhasm: int6464 1t0
+
+# qhasm: 1t0 = MONE_MONE
+# asm 1: movdqa MONE_MONE,<1t0=int6464#13
+# asm 2: movdqa MONE_MONE,<1t0=%xmm12
+movdqa MONE_MONE,%xmm12
+
+# qhasm: float6464 0r0  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r0=int6464#1
+# asm 2: mulpd <1t0=%xmm12,<0r0=%xmm0
+mulpd %xmm12,%xmm0
+
+# qhasm: float6464 0r1  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r1=int6464#2
+# asm 2: mulpd <1t0=%xmm12,<0r1=%xmm1
+mulpd %xmm12,%xmm1
+
+# qhasm: float6464 0r2  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r2=int6464#3
+# asm 2: mulpd <1t0=%xmm12,<0r2=%xmm2
+mulpd %xmm12,%xmm2
+
+# qhasm: float6464 0r3  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r3=int6464#4
+# asm 2: mulpd <1t0=%xmm12,<0r3=%xmm3
+mulpd %xmm12,%xmm3
+
+# qhasm: float6464 0r4  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r4=int6464#5
+# asm 2: mulpd <1t0=%xmm12,<0r4=%xmm4
+mulpd %xmm12,%xmm4
+
+# qhasm: float6464 0r5  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r5=int6464#6
+# asm 2: mulpd <1t0=%xmm12,<0r5=%xmm5
+mulpd %xmm12,%xmm5
+
+# qhasm: float6464 0r6  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r6=int6464#7
+# asm 2: mulpd <1t0=%xmm12,<0r6=%xmm6
+mulpd %xmm12,%xmm6
+
+# qhasm: float6464 0r7  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r7=int6464#8
+# asm 2: mulpd <1t0=%xmm12,<0r7=%xmm7
+mulpd %xmm12,%xmm7
+
+# qhasm: float6464 0r8  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r8=int6464#9
+# asm 2: mulpd <1t0=%xmm12,<0r8=%xmm8
+mulpd %xmm12,%xmm8
+
+# qhasm: float6464 0r9  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r9=int6464#10
+# asm 2: mulpd <1t0=%xmm12,<0r9=%xmm9
+mulpd %xmm12,%xmm9
+
+# qhasm: float6464 0r10 *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r10=int6464#11
+# asm 2: mulpd <1t0=%xmm12,<0r10=%xmm10
+mulpd %xmm12,%xmm10
+
+# qhasm: float6464 0r11 *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r11=int6464#12
+# asm 2: mulpd <1t0=%xmm12,<0r11=%xmm11
+mulpd %xmm12,%xmm11
+
+# qhasm: *(int128 *)(0rop +   0) =  0r0
+# asm 1: movdqa <0r0=int6464#1,0(<0rop=int64#1)
+# asm 2: movdqa <0r0=%xmm0,0(<0rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: *(int128 *)(0rop +  16) =  0r1
+# asm 1: movdqa <0r1=int6464#2,16(<0rop=int64#1)
+# asm 2: movdqa <0r1=%xmm1,16(<0rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(0rop +  32) =  0r2
+# asm 1: movdqa <0r2=int6464#3,32(<0rop=int64#1)
+# asm 2: movdqa <0r2=%xmm2,32(<0rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: *(int128 *)(0rop +  48) =  0r3
+# asm 1: movdqa <0r3=int6464#4,48(<0rop=int64#1)
+# asm 2: movdqa <0r3=%xmm3,48(<0rop=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm: *(int128 *)(0rop +  64) =  0r4
+# asm 1: movdqa <0r4=int6464#5,64(<0rop=int64#1)
+# asm 2: movdqa <0r4=%xmm4,64(<0rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(0rop +  80) =  0r5
+# asm 1: movdqa <0r5=int6464#6,80(<0rop=int64#1)
+# asm 2: movdqa <0r5=%xmm5,80(<0rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: *(int128 *)(0rop +  96) =  0r6
+# asm 1: movdqa <0r6=int6464#7,96(<0rop=int64#1)
+# asm 2: movdqa <0r6=%xmm6,96(<0rop=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm: *(int128 *)(0rop + 112) =  0r7
+# asm 1: movdqa <0r7=int6464#8,112(<0rop=int64#1)
+# asm 2: movdqa <0r7=%xmm7,112(<0rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(0rop + 128) =  0r8
+# asm 1: movdqa <0r8=int6464#9,128(<0rop=int64#1)
+# asm 2: movdqa <0r8=%xmm8,128(<0rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: *(int128 *)(0rop + 144) =  0r9
+# asm 1: movdqa <0r9=int6464#10,144(<0rop=int64#1)
+# asm 2: movdqa <0r9=%xmm9,144(<0rop=%rdi)
+movdqa %xmm9,144(%rdi)
+
+# qhasm: *(int128 *)(0rop + 160) = 0r10
+# asm 1: movdqa <0r10=int6464#11,160(<0rop=int64#1)
+# asm 2: movdqa <0r10=%xmm10,160(<0rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(0rop + 176) = 0r11
+# asm 1: movdqa <0r11=int6464#12,176(<0rop=int64#1)
+# asm 2: movdqa <0r11=%xmm11,176(<0rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 245 - 0
dclxvi-20130329/fp2e_neg2.s

@@ -0,0 +1,245 @@
+# File:   dclxvi-20130329/fp2e_neg2.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fp2e_neg2_qhasm
+.text
+.p2align 5
+.globl _fp2e_neg2_qhasm
+.globl fp2e_neg2_qhasm
+_fp2e_neg2_qhasm:
+fp2e_neg2_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: int64 0rop
+
+# qhasm: input 0rop
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: int6464 0t0
+
+# qhasm: int6464 0t1
+
+# qhasm: int6464 0t2
+
+# qhasm: int6464 0t3
+
+# qhasm: 0r0  = *(int128 *)(0rop +   0)
+# asm 1: movdqa 0(<0rop=int64#1),>0r0=int6464#1
+# asm 2: movdqa 0(<0rop=%rdi),>0r0=%xmm0
+movdqa 0(%rdi),%xmm0
+
+# qhasm: 0r1  = *(int128 *)(0rop +  16)
+# asm 1: movdqa 16(<0rop=int64#1),>0r1=int6464#2
+# asm 2: movdqa 16(<0rop=%rdi),>0r1=%xmm1
+movdqa 16(%rdi),%xmm1
+
+# qhasm: 0r2  = *(int128 *)(0rop +  32)
+# asm 1: movdqa 32(<0rop=int64#1),>0r2=int6464#3
+# asm 2: movdqa 32(<0rop=%rdi),>0r2=%xmm2
+movdqa 32(%rdi),%xmm2
+
+# qhasm: 0r3  = *(int128 *)(0rop +  48)
+# asm 1: movdqa 48(<0rop=int64#1),>0r3=int6464#4
+# asm 2: movdqa 48(<0rop=%rdi),>0r3=%xmm3
+movdqa 48(%rdi),%xmm3
+
+# qhasm: 0r4  = *(int128 *)(0rop +  64)
+# asm 1: movdqa 64(<0rop=int64#1),>0r4=int6464#5
+# asm 2: movdqa 64(<0rop=%rdi),>0r4=%xmm4
+movdqa 64(%rdi),%xmm4
+
+# qhasm: 0r5  = *(int128 *)(0rop +  80)
+# asm 1: movdqa 80(<0rop=int64#1),>0r5=int6464#6
+# asm 2: movdqa 80(<0rop=%rdi),>0r5=%xmm5
+movdqa 80(%rdi),%xmm5
+
+# qhasm: 0r6  = *(int128 *)(0rop +  96)
+# asm 1: movdqa 96(<0rop=int64#1),>0r6=int6464#7
+# asm 2: movdqa 96(<0rop=%rdi),>0r6=%xmm6
+movdqa 96(%rdi),%xmm6
+
+# qhasm: 0r7  = *(int128 *)(0rop + 112)
+# asm 1: movdqa 112(<0rop=int64#1),>0r7=int6464#8
+# asm 2: movdqa 112(<0rop=%rdi),>0r7=%xmm7
+movdqa 112(%rdi),%xmm7
+
+# qhasm: 0r8  = *(int128 *)(0rop + 128)
+# asm 1: movdqa 128(<0rop=int64#1),>0r8=int6464#9
+# asm 2: movdqa 128(<0rop=%rdi),>0r8=%xmm8
+movdqa 128(%rdi),%xmm8
+
+# qhasm: 0r9  = *(int128 *)(0rop + 144)
+# asm 1: movdqa 144(<0rop=int64#1),>0r9=int6464#10
+# asm 2: movdqa 144(<0rop=%rdi),>0r9=%xmm9
+movdqa 144(%rdi),%xmm9
+
+# qhasm: 0r10 = *(int128 *)(0rop + 160)
+# asm 1: movdqa 160(<0rop=int64#1),>0r10=int6464#11
+# asm 2: movdqa 160(<0rop=%rdi),>0r10=%xmm10
+movdqa 160(%rdi),%xmm10
+
+# qhasm: 0r11 = *(int128 *)(0rop + 176)
+# asm 1: movdqa 176(<0rop=int64#1),>0r11=int6464#12
+# asm 2: movdqa 176(<0rop=%rdi),>0r11=%xmm11
+movdqa 176(%rdi),%xmm11
+
+# qhasm: int6464 1t0
+
+# qhasm: 1t0 = MONE_MONE
+# asm 1: movdqa MONE_MONE,<1t0=int6464#13
+# asm 2: movdqa MONE_MONE,<1t0=%xmm12
+movdqa MONE_MONE,%xmm12
+
+# qhasm: float6464 0r0  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r0=int6464#1
+# asm 2: mulpd <1t0=%xmm12,<0r0=%xmm0
+mulpd %xmm12,%xmm0
+
+# qhasm: float6464 0r1  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r1=int6464#2
+# asm 2: mulpd <1t0=%xmm12,<0r1=%xmm1
+mulpd %xmm12,%xmm1
+
+# qhasm: float6464 0r2  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r2=int6464#3
+# asm 2: mulpd <1t0=%xmm12,<0r2=%xmm2
+mulpd %xmm12,%xmm2
+
+# qhasm: float6464 0r3  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r3=int6464#4
+# asm 2: mulpd <1t0=%xmm12,<0r3=%xmm3
+mulpd %xmm12,%xmm3
+
+# qhasm: float6464 0r4  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r4=int6464#5
+# asm 2: mulpd <1t0=%xmm12,<0r4=%xmm4
+mulpd %xmm12,%xmm4
+
+# qhasm: float6464 0r5  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r5=int6464#6
+# asm 2: mulpd <1t0=%xmm12,<0r5=%xmm5
+mulpd %xmm12,%xmm5
+
+# qhasm: float6464 0r6  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r6=int6464#7
+# asm 2: mulpd <1t0=%xmm12,<0r6=%xmm6
+mulpd %xmm12,%xmm6
+
+# qhasm: float6464 0r7  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r7=int6464#8
+# asm 2: mulpd <1t0=%xmm12,<0r7=%xmm7
+mulpd %xmm12,%xmm7
+
+# qhasm: float6464 0r8  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r8=int6464#9
+# asm 2: mulpd <1t0=%xmm12,<0r8=%xmm8
+mulpd %xmm12,%xmm8
+
+# qhasm: float6464 0r9  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r9=int6464#10
+# asm 2: mulpd <1t0=%xmm12,<0r9=%xmm9
+mulpd %xmm12,%xmm9
+
+# qhasm: float6464 0r10 *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r10=int6464#11
+# asm 2: mulpd <1t0=%xmm12,<0r10=%xmm10
+mulpd %xmm12,%xmm10
+
+# qhasm: float6464 0r11 *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r11=int6464#12
+# asm 2: mulpd <1t0=%xmm12,<0r11=%xmm11
+mulpd %xmm12,%xmm11
+
+# qhasm: *(int128 *)(0rop +   0) =  0r0
+# asm 1: movdqa <0r0=int6464#1,0(<0rop=int64#1)
+# asm 2: movdqa <0r0=%xmm0,0(<0rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: *(int128 *)(0rop +  16) =  0r1
+# asm 1: movdqa <0r1=int6464#2,16(<0rop=int64#1)
+# asm 2: movdqa <0r1=%xmm1,16(<0rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(0rop +  32) =  0r2
+# asm 1: movdqa <0r2=int6464#3,32(<0rop=int64#1)
+# asm 2: movdqa <0r2=%xmm2,32(<0rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: *(int128 *)(0rop +  48) =  0r3
+# asm 1: movdqa <0r3=int6464#4,48(<0rop=int64#1)
+# asm 2: movdqa <0r3=%xmm3,48(<0rop=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm: *(int128 *)(0rop +  64) =  0r4
+# asm 1: movdqa <0r4=int6464#5,64(<0rop=int64#1)
+# asm 2: movdqa <0r4=%xmm4,64(<0rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(0rop +  80) =  0r5
+# asm 1: movdqa <0r5=int6464#6,80(<0rop=int64#1)
+# asm 2: movdqa <0r5=%xmm5,80(<0rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: *(int128 *)(0rop +  96) =  0r6
+# asm 1: movdqa <0r6=int6464#7,96(<0rop=int64#1)
+# asm 2: movdqa <0r6=%xmm6,96(<0rop=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm: *(int128 *)(0rop + 112) =  0r7
+# asm 1: movdqa <0r7=int6464#8,112(<0rop=int64#1)
+# asm 2: movdqa <0r7=%xmm7,112(<0rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(0rop + 128) =  0r8
+# asm 1: movdqa <0r8=int6464#9,128(<0rop=int64#1)
+# asm 2: movdqa <0r8=%xmm8,128(<0rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: *(int128 *)(0rop + 144) =  0r9
+# asm 1: movdqa <0r9=int6464#10,144(<0rop=int64#1)
+# asm 2: movdqa <0r9=%xmm9,144(<0rop=%rdi)
+movdqa %xmm9,144(%rdi)
+
+# qhasm: *(int128 *)(0rop + 160) = 0r10
+# asm 1: movdqa <0r10=int6464#11,160(<0rop=int64#1)
+# asm 2: movdqa <0r10=%xmm10,160(<0rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(0rop + 176) = 0r11
+# asm 1: movdqa <0r11=int6464#12,176(<0rop=int64#1)
+# asm 2: movdqa <0r11=%xmm11,176(<0rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 3846 - 0
dclxvi-20130329/fp2e_parallel_coeffmul.s

@@ -0,0 +1,3846 @@
+# File:   dclxvi-20130329/fp2e_parallel_coeffmul.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fp2e_parallel_coeffmul_qhasm
+.text
+.p2align 5
+.globl _fp2e_parallel_coeffmul_qhasm
+.globl fp2e_parallel_coeffmul_qhasm
+_fp2e_parallel_coeffmul_qhasm:
+fp2e_parallel_coeffmul_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $768,%r11
+sub %r11,%rsp
+
+# qhasm: int64 rop
+
+# qhasm: int64 op1
+
+# qhasm: int64 op2
+
+# qhasm: input rop
+
+# qhasm: input op1
+
+# qhasm: input op2
+
+# qhasm: stack6144 playground
+
+# qhasm: int64 rp
+
+# qhasm: rp = &playground
+# asm 1: leaq <playground=stack6144#1,>rp=int64#4
+# asm 2: leaq <playground=0(%rsp),>rp=%rcx
+leaq 0(%rsp),%rcx
+
+# qhasm: int64 c0
+
+# qhasm: caller c0
+
+# qhasm: stack64 stack_c0
+
+# qhasm: int64 c1
+
+# qhasm: caller c1
+
+# qhasm: stack64 stack_c1
+
+# qhasm: int64 c2
+
+# qhasm: caller c2
+
+# qhasm: stack64 stack_c2
+
+# qhasm: int64 c3
+
+# qhasm: caller c3
+
+# qhasm: stack64 stack_c3
+
+# qhasm: int64 c4
+
+# qhasm: caller c4
+
+# qhasm: stack64 stack_c4
+
+# qhasm: int64 c5
+
+# qhasm: caller c5
+
+# qhasm: stack64 stack_c5
+
+# qhasm: int64 c6
+
+# qhasm: caller c6
+
+# qhasm: stack64 stack_c6
+
+# qhasm: int64 c7
+
+# qhasm: caller c7
+
+# qhasm: stack64 stack_c7
+
+# qhasm: int6464 r0
+
+# qhasm: int6464 r1
+
+# qhasm: int6464 r2
+
+# qhasm: int6464 r3
+
+# qhasm: int6464 r4
+
+# qhasm: int6464 r5
+
+# qhasm: int6464 r6
+
+# qhasm: int6464 r7
+
+# qhasm: int6464 r8
+
+# qhasm: int6464 r9
+
+# qhasm: int6464 r10
+
+# qhasm: int6464 r11
+
+# qhasm: int6464 r12
+
+# qhasm: int6464 r13
+
+# qhasm: int6464 r14
+
+# qhasm: int6464 r15
+
+# qhasm: int6464 r16
+
+# qhasm: int6464 r17
+
+# qhasm: int6464 r18
+
+# qhasm: int6464 r19
+
+# qhasm: int6464 r20
+
+# qhasm: int6464 r21
+
+# qhasm: int6464 r22
+
+# qhasm: int6464 yoff
+
+# qhasm: int6464 t0
+
+# qhasm: int6464 t1
+
+# qhasm: int6464 t2
+
+# qhasm: int6464 t3
+
+# qhasm: int6464 t4
+
+# qhasm: int6464 t5
+
+# qhasm: int6464 t6
+
+# qhasm: int6464 t7
+
+# qhasm: int6464 t8
+
+# qhasm: int6464 t9
+
+# qhasm: int6464 t10
+
+# qhasm: int6464 t11
+
+# qhasm: int6464 t12
+
+# qhasm: int6464 t13
+
+# qhasm: int6464 t14
+
+# qhasm: int6464 t15
+
+# qhasm: int6464 t16
+
+# qhasm: int6464 t17
+
+# qhasm: int6464 t18
+
+# qhasm: int6464 t19
+
+# qhasm: int6464 t20
+
+# qhasm: int6464 t21
+
+# qhasm: int6464 t22
+
+# qhasm: int6464 ab1
+
+# qhasm: int6464 ab7
+
+# qhasm: int6464 ab1six
+
+# qhasm: int6464 ab2six
+
+# qhasm: int6464 ab3six
+
+# qhasm: int6464 ab4six
+
+# qhasm: int6464 ab5six
+
+# qhasm: int6464 ab6six
+
+# qhasm: int6464 ab7six
+
+# qhasm: int6464 ab8six
+
+# qhasm: int6464 ab9six
+
+# qhasm: int6464 ab10six
+
+# qhasm: int6464 ab11six
+
+# qhasm: int6464 sixsix
+
+# qhasm: int6464 b11
+
+# qhasm: sixsix = SIX_SIX
+# asm 1: movdqa SIX_SIX,<sixsix=int6464#1
+# asm 2: movdqa SIX_SIX,<sixsix=%xmm0
+movdqa SIX_SIX,%xmm0
+
+# qhasm: b11 = *(int128 *)(op2 + 176)
+# asm 1: movdqa 176(<op2=int64#3),>b11=int6464#2
+# asm 2: movdqa 176(<op2=%rdx),>b11=%xmm1
+movdqa 176(%rdx),%xmm1
+
+# qhasm: r11 = *(int128 *)(op1 + 0)
+# asm 1: movdqa 0(<op1=int64#2),>r11=int6464#3
+# asm 2: movdqa 0(<op1=%rsi),>r11=%xmm2
+movdqa 0(%rsi),%xmm2
+
+# qhasm: r0 = r11
+# asm 1: movdqa <r11=int6464#3,>r0=int6464#4
+# asm 2: movdqa <r11=%xmm2,>r0=%xmm3
+movdqa %xmm2,%xmm3
+
+# qhasm: float6464 r0 *= *(int128 *)(op2 + 0)
+# asm 1: mulpd 0(<op2=int64#3),<r0=int6464#4
+# asm 2: mulpd 0(<op2=%rdx),<r0=%xmm3
+mulpd 0(%rdx),%xmm3
+
+# qhasm: r1 = r11
+# asm 1: movdqa <r11=int6464#3,>r1=int6464#5
+# asm 2: movdqa <r11=%xmm2,>r1=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: float6464 r1 *= *(int128 *)(op2 + 16)
+# asm 1: mulpd 16(<op2=int64#3),<r1=int6464#5
+# asm 2: mulpd 16(<op2=%rdx),<r1=%xmm4
+mulpd 16(%rdx),%xmm4
+
+# qhasm: r2 = r11
+# asm 1: movdqa <r11=int6464#3,>r2=int6464#6
+# asm 2: movdqa <r11=%xmm2,>r2=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm: float6464 r2 *= *(int128 *)(op2 + 32)
+# asm 1: mulpd 32(<op2=int64#3),<r2=int6464#6
+# asm 2: mulpd 32(<op2=%rdx),<r2=%xmm5
+mulpd 32(%rdx),%xmm5
+
+# qhasm: r3 = r11
+# asm 1: movdqa <r11=int6464#3,>r3=int6464#7
+# asm 2: movdqa <r11=%xmm2,>r3=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: float6464 r3 *= *(int128 *)(op2 + 48)
+# asm 1: mulpd 48(<op2=int64#3),<r3=int6464#7
+# asm 2: mulpd 48(<op2=%rdx),<r3=%xmm6
+mulpd 48(%rdx),%xmm6
+
+# qhasm: r4 = r11
+# asm 1: movdqa <r11=int6464#3,>r4=int6464#8
+# asm 2: movdqa <r11=%xmm2,>r4=%xmm7
+movdqa %xmm2,%xmm7
+
+# qhasm: float6464 r4 *= *(int128 *)(op2 + 64)
+# asm 1: mulpd 64(<op2=int64#3),<r4=int6464#8
+# asm 2: mulpd 64(<op2=%rdx),<r4=%xmm7
+mulpd 64(%rdx),%xmm7
+
+# qhasm: r5 = r11
+# asm 1: movdqa <r11=int6464#3,>r5=int6464#9
+# asm 2: movdqa <r11=%xmm2,>r5=%xmm8
+movdqa %xmm2,%xmm8
+
+# qhasm: float6464 r5 *= *(int128 *)(op2 + 80)
+# asm 1: mulpd 80(<op2=int64#3),<r5=int6464#9
+# asm 2: mulpd 80(<op2=%rdx),<r5=%xmm8
+mulpd 80(%rdx),%xmm8
+
+# qhasm: r6 = r11
+# asm 1: movdqa <r11=int6464#3,>r6=int6464#10
+# asm 2: movdqa <r11=%xmm2,>r6=%xmm9
+movdqa %xmm2,%xmm9
+
+# qhasm: float6464 r6 *= *(int128 *)(op2 + 96)
+# asm 1: mulpd 96(<op2=int64#3),<r6=int6464#10
+# asm 2: mulpd 96(<op2=%rdx),<r6=%xmm9
+mulpd 96(%rdx),%xmm9
+
+# qhasm: r7 = r11
+# asm 1: movdqa <r11=int6464#3,>r7=int6464#11
+# asm 2: movdqa <r11=%xmm2,>r7=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm: float6464 r7 *= *(int128 *)(op2 + 112)
+# asm 1: mulpd 112(<op2=int64#3),<r7=int6464#11
+# asm 2: mulpd 112(<op2=%rdx),<r7=%xmm10
+mulpd 112(%rdx),%xmm10
+
+# qhasm: r8 = r11
+# asm 1: movdqa <r11=int6464#3,>r8=int6464#12
+# asm 2: movdqa <r11=%xmm2,>r8=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm: float6464 r8 *= *(int128 *)(op2 + 128)
+# asm 1: mulpd 128(<op2=int64#3),<r8=int6464#12
+# asm 2: mulpd 128(<op2=%rdx),<r8=%xmm11
+mulpd 128(%rdx),%xmm11
+
+# qhasm: r9 = r11
+# asm 1: movdqa <r11=int6464#3,>r9=int6464#13
+# asm 2: movdqa <r11=%xmm2,>r9=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm: float6464 r9 *= *(int128 *)(op2 + 144)
+# asm 1: mulpd 144(<op2=int64#3),<r9=int6464#13
+# asm 2: mulpd 144(<op2=%rdx),<r9=%xmm12
+mulpd 144(%rdx),%xmm12
+
+# qhasm: r10 = r11
+# asm 1: movdqa <r11=int6464#3,>r10=int6464#14
+# asm 2: movdqa <r11=%xmm2,>r10=%xmm13
+movdqa %xmm2,%xmm13
+
+# qhasm: float6464 r10 *= *(int128 *)(op2 + 160)
+# asm 1: mulpd 160(<op2=int64#3),<r10=int6464#14
+# asm 2: mulpd 160(<op2=%rdx),<r10=%xmm13
+mulpd 160(%rdx),%xmm13
+
+# qhasm: float6464 r11 *= b11
+# asm 1: mulpd <b11=int6464#2,<r11=int6464#3
+# asm 2: mulpd <b11=%xmm1,<r11=%xmm2
+mulpd %xmm1,%xmm2
+
+# qhasm: *(int128 *)(rp + 0) = r0
+# asm 1: movdqa <r0=int6464#4,0(<rp=int64#4)
+# asm 2: movdqa <r0=%xmm3,0(<rp=%rcx)
+movdqa %xmm3,0(%rcx)
+
+# qhasm: ab1 = *(int128 *)(op1 + 16)
+# asm 1: movdqa 16(<op1=int64#2),>ab1=int6464#4
+# asm 2: movdqa 16(<op1=%rsi),>ab1=%xmm3
+movdqa 16(%rsi),%xmm3
+
+# qhasm: r12 = ab1
+# asm 1: movdqa <ab1=int6464#4,>r12=int6464#15
+# asm 2: movdqa <ab1=%xmm3,>r12=%xmm14
+movdqa %xmm3,%xmm14
+
+# qhasm: float6464 r12 *= sixsix
+# asm 1: mulpd <sixsix=int6464#1,<r12=int6464#15
+# asm 2: mulpd <sixsix=%xmm0,<r12=%xmm14
+mulpd %xmm0,%xmm14
+
+# qhasm: t1 = ab1
+# asm 1: movdqa <ab1=int6464#4,>t1=int6464#16
+# asm 2: movdqa <ab1=%xmm3,>t1=%xmm15
+movdqa %xmm3,%xmm15
+
+# qhasm: float6464 t1 *= *(int128 *)(op2 + 0)
+# asm 1: mulpd 0(<op2=int64#3),<t1=int6464#16
+# asm 2: mulpd 0(<op2=%rdx),<t1=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r1 +=t1
+# asm 1: addpd <t1=int6464#16,<r1=int6464#5
+# asm 2: addpd <t1=%xmm15,<r1=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: float6464 ab1 *= *(int128 *)(op2 + 96)
+# asm 1: mulpd 96(<op2=int64#3),<ab1=int6464#4
+# asm 2: mulpd 96(<op2=%rdx),<ab1=%xmm3
+mulpd 96(%rdx),%xmm3
+
+# qhasm: float6464 r7 +=ab1
+# asm 1: addpd <ab1=int6464#4,<r7=int6464#11
+# asm 2: addpd <ab1=%xmm3,<r7=%xmm10
+addpd %xmm3,%xmm10
+
+# qhasm: t2 = r12
+# asm 1: movdqa <r12=int6464#15,>t2=int6464#4
+# asm 2: movdqa <r12=%xmm14,>t2=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 t2 *= *(int128 *)(op2 + 16)
+# asm 1: mulpd 16(<op2=int64#3),<t2=int6464#4
+# asm 2: mulpd 16(<op2=%rdx),<t2=%xmm3
+mulpd 16(%rdx),%xmm3
+
+# qhasm: float6464 r2 +=t2
+# asm 1: addpd <t2=int6464#4,<r2=int6464#6
+# asm 2: addpd <t2=%xmm3,<r2=%xmm5
+addpd %xmm3,%xmm5
+
+# qhasm: t3 = r12
+# asm 1: movdqa <r12=int6464#15,>t3=int6464#4
+# asm 2: movdqa <r12=%xmm14,>t3=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 t3 *= *(int128 *)(op2 + 32)
+# asm 1: mulpd 32(<op2=int64#3),<t3=int6464#4
+# asm 2: mulpd 32(<op2=%rdx),<t3=%xmm3
+mulpd 32(%rdx),%xmm3
+
+# qhasm: float6464 r3 +=t3
+# asm 1: addpd <t3=int6464#4,<r3=int6464#7
+# asm 2: addpd <t3=%xmm3,<r3=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: t4 = r12
+# asm 1: movdqa <r12=int6464#15,>t4=int6464#4
+# asm 2: movdqa <r12=%xmm14,>t4=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 t4 *= *(int128 *)(op2 + 48)
+# asm 1: mulpd 48(<op2=int64#3),<t4=int6464#4
+# asm 2: mulpd 48(<op2=%rdx),<t4=%xmm3
+mulpd 48(%rdx),%xmm3
+
+# qhasm: float6464 r4 +=t4
+# asm 1: addpd <t4=int6464#4,<r4=int6464#8
+# asm 2: addpd <t4=%xmm3,<r4=%xmm7
+addpd %xmm3,%xmm7
+
+# qhasm: t5 = r12
+# asm 1: movdqa <r12=int6464#15,>t5=int6464#4
+# asm 2: movdqa <r12=%xmm14,>t5=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 t5 *= *(int128 *)(op2 + 64)
+# asm 1: mulpd 64(<op2=int64#3),<t5=int6464#4
+# asm 2: mulpd 64(<op2=%rdx),<t5=%xmm3
+mulpd 64(%rdx),%xmm3
+
+# qhasm: float6464 r5 +=t5
+# asm 1: addpd <t5=int6464#4,<r5=int6464#9
+# asm 2: addpd <t5=%xmm3,<r5=%xmm8
+addpd %xmm3,%xmm8
+
+# qhasm: t6 = r12
+# asm 1: movdqa <r12=int6464#15,>t6=int6464#4
+# asm 2: movdqa <r12=%xmm14,>t6=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 t6 *= *(int128 *)(op2 + 80)
+# asm 1: mulpd 80(<op2=int64#3),<t6=int6464#4
+# asm 2: mulpd 80(<op2=%rdx),<t6=%xmm3
+mulpd 80(%rdx),%xmm3
+
+# qhasm: float6464 r6 +=t6
+# asm 1: addpd <t6=int6464#4,<r6=int6464#10
+# asm 2: addpd <t6=%xmm3,<r6=%xmm9
+addpd %xmm3,%xmm9
+
+# qhasm: t8 = r12
+# asm 1: movdqa <r12=int6464#15,>t8=int6464#4
+# asm 2: movdqa <r12=%xmm14,>t8=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 t8 *= *(int128 *)(op2 + 112)
+# asm 1: mulpd 112(<op2=int64#3),<t8=int6464#4
+# asm 2: mulpd 112(<op2=%rdx),<t8=%xmm3
+mulpd 112(%rdx),%xmm3
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#4,<r8=int6464#12
+# asm 2: addpd <t8=%xmm3,<r8=%xmm11
+addpd %xmm3,%xmm11
+
+# qhasm: t9 = r12
+# asm 1: movdqa <r12=int6464#15,>t9=int6464#4
+# asm 2: movdqa <r12=%xmm14,>t9=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 t9 *= *(int128 *)(op2 + 128)
+# asm 1: mulpd 128(<op2=int64#3),<t9=int6464#4
+# asm 2: mulpd 128(<op2=%rdx),<t9=%xmm3
+mulpd 128(%rdx),%xmm3
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#4,<r9=int6464#13
+# asm 2: addpd <t9=%xmm3,<r9=%xmm12
+addpd %xmm3,%xmm12
+
+# qhasm: t10 = r12
+# asm 1: movdqa <r12=int6464#15,>t10=int6464#4
+# asm 2: movdqa <r12=%xmm14,>t10=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 t10 *= *(int128 *)(op2 + 144)
+# asm 1: mulpd 144(<op2=int64#3),<t10=int6464#4
+# asm 2: mulpd 144(<op2=%rdx),<t10=%xmm3
+mulpd 144(%rdx),%xmm3
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#4,<r10=int6464#14
+# asm 2: addpd <t10=%xmm3,<r10=%xmm13
+addpd %xmm3,%xmm13
+
+# qhasm: t11 = r12
+# asm 1: movdqa <r12=int6464#15,>t11=int6464#4
+# asm 2: movdqa <r12=%xmm14,>t11=%xmm3
+movdqa %xmm14,%xmm3
+
+# qhasm: float6464 t11 *= *(int128 *)(op2 + 160)
+# asm 1: mulpd 160(<op2=int64#3),<t11=int6464#4
+# asm 2: mulpd 160(<op2=%rdx),<t11=%xmm3
+mulpd 160(%rdx),%xmm3
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#4,<r11=int6464#3
+# asm 2: addpd <t11=%xmm3,<r11=%xmm2
+addpd %xmm3,%xmm2
+
+# qhasm: float6464 r12 *= b11
+# asm 1: mulpd <b11=int6464#2,<r12=int6464#15
+# asm 2: mulpd <b11=%xmm1,<r12=%xmm14
+mulpd %xmm1,%xmm14
+
+# qhasm: *(int128 *)(rp + 16) = r1
+# asm 1: movdqa <r1=int6464#5,16(<rp=int64#4)
+# asm 2: movdqa <r1=%xmm4,16(<rp=%rcx)
+movdqa %xmm4,16(%rcx)
+
+# qhasm: r13 = *(int128 *)(op1 + 32)
+# asm 1: movdqa 32(<op1=int64#2),>r13=int6464#4
+# asm 2: movdqa 32(<op1=%rsi),>r13=%xmm3
+movdqa 32(%rsi),%xmm3
+
+# qhasm: ab2six = r13
+# asm 1: movdqa <r13=int6464#4,>ab2six=int6464#5
+# asm 2: movdqa <r13=%xmm3,>ab2six=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm: float6464 ab2six *= sixsix
+# asm 1: mulpd <sixsix=int6464#1,<ab2six=int6464#5
+# asm 2: mulpd <sixsix=%xmm0,<ab2six=%xmm4
+mulpd %xmm0,%xmm4
+
+# qhasm: t2 = r13
+# asm 1: movdqa <r13=int6464#4,>t2=int6464#16
+# asm 2: movdqa <r13=%xmm3,>t2=%xmm15
+movdqa %xmm3,%xmm15
+
+# qhasm: float6464 t2 *= *(int128 *)(op2 + 0)
+# asm 1: mulpd 0(<op2=int64#3),<t2=int6464#16
+# asm 2: mulpd 0(<op2=%rdx),<t2=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r2 +=t2
+# asm 1: addpd <t2=int6464#16,<r2=int6464#6
+# asm 2: addpd <t2=%xmm15,<r2=%xmm5
+addpd %xmm15,%xmm5
+
+# qhasm: t7 = r13
+# asm 1: movdqa <r13=int6464#4,>t7=int6464#16
+# asm 2: movdqa <r13=%xmm3,>t7=%xmm15
+movdqa %xmm3,%xmm15
+
+# qhasm: float6464 t7 *= *(int128 *)(op2 + 80)
+# asm 1: mulpd 80(<op2=int64#3),<t7=int6464#16
+# asm 2: mulpd 80(<op2=%rdx),<t7=%xmm15
+mulpd 80(%rdx),%xmm15
+
+# qhasm: float6464 r7 +=t7
+# asm 1: addpd <t7=int6464#16,<r7=int6464#11
+# asm 2: addpd <t7=%xmm15,<r7=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: t8 = r13
+# asm 1: movdqa <r13=int6464#4,>t8=int6464#16
+# asm 2: movdqa <r13=%xmm3,>t8=%xmm15
+movdqa %xmm3,%xmm15
+
+# qhasm: float6464 t8 *= *(int128 *)(op2 + 96)
+# asm 1: mulpd 96(<op2=int64#3),<t8=int6464#16
+# asm 2: mulpd 96(<op2=%rdx),<t8=%xmm15
+mulpd 96(%rdx),%xmm15
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#16,<r8=int6464#12
+# asm 2: addpd <t8=%xmm15,<r8=%xmm11
+addpd %xmm15,%xmm11
+
+# qhasm: float6464 r13 *= b11
+# asm 1: mulpd <b11=int6464#2,<r13=int6464#4
+# asm 2: mulpd <b11=%xmm1,<r13=%xmm3
+mulpd %xmm1,%xmm3
+
+# qhasm: t3 = ab2six
+# asm 1: movdqa <ab2six=int6464#5,>t3=int6464#16
+# asm 2: movdqa <ab2six=%xmm4,>t3=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t3 *= *(int128 *)(op2 + 16)
+# asm 1: mulpd 16(<op2=int64#3),<t3=int6464#16
+# asm 2: mulpd 16(<op2=%rdx),<t3=%xmm15
+mulpd 16(%rdx),%xmm15
+
+# qhasm: float6464 r3 +=t3
+# asm 1: addpd <t3=int6464#16,<r3=int6464#7
+# asm 2: addpd <t3=%xmm15,<r3=%xmm6
+addpd %xmm15,%xmm6
+
+# qhasm: t4 = ab2six
+# asm 1: movdqa <ab2six=int6464#5,>t4=int6464#16
+# asm 2: movdqa <ab2six=%xmm4,>t4=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t4 *= *(int128 *)(op2 + 32)
+# asm 1: mulpd 32(<op2=int64#3),<t4=int6464#16
+# asm 2: mulpd 32(<op2=%rdx),<t4=%xmm15
+mulpd 32(%rdx),%xmm15
+
+# qhasm: float6464 r4 +=t4
+# asm 1: addpd <t4=int6464#16,<r4=int6464#8
+# asm 2: addpd <t4=%xmm15,<r4=%xmm7
+addpd %xmm15,%xmm7
+
+# qhasm: t5 = ab2six
+# asm 1: movdqa <ab2six=int6464#5,>t5=int6464#16
+# asm 2: movdqa <ab2six=%xmm4,>t5=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t5 *= *(int128 *)(op2 + 48)
+# asm 1: mulpd 48(<op2=int64#3),<t5=int6464#16
+# asm 2: mulpd 48(<op2=%rdx),<t5=%xmm15
+mulpd 48(%rdx),%xmm15
+
+# qhasm: float6464 r5 +=t5
+# asm 1: addpd <t5=int6464#16,<r5=int6464#9
+# asm 2: addpd <t5=%xmm15,<r5=%xmm8
+addpd %xmm15,%xmm8
+
+# qhasm: t6 = ab2six
+# asm 1: movdqa <ab2six=int6464#5,>t6=int6464#16
+# asm 2: movdqa <ab2six=%xmm4,>t6=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t6 *= *(int128 *)(op2 + 64)
+# asm 1: mulpd 64(<op2=int64#3),<t6=int6464#16
+# asm 2: mulpd 64(<op2=%rdx),<t6=%xmm15
+mulpd 64(%rdx),%xmm15
+
+# qhasm: float6464 r6 +=t6
+# asm 1: addpd <t6=int6464#16,<r6=int6464#10
+# asm 2: addpd <t6=%xmm15,<r6=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: t9 = ab2six
+# asm 1: movdqa <ab2six=int6464#5,>t9=int6464#16
+# asm 2: movdqa <ab2six=%xmm4,>t9=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t9 *= *(int128 *)(op2 + 112)
+# asm 1: mulpd 112(<op2=int64#3),<t9=int6464#16
+# asm 2: mulpd 112(<op2=%rdx),<t9=%xmm15
+mulpd 112(%rdx),%xmm15
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#16,<r9=int6464#13
+# asm 2: addpd <t9=%xmm15,<r9=%xmm12
+addpd %xmm15,%xmm12
+
+# qhasm: t10 = ab2six
+# asm 1: movdqa <ab2six=int6464#5,>t10=int6464#16
+# asm 2: movdqa <ab2six=%xmm4,>t10=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t10 *= *(int128 *)(op2 + 128)
+# asm 1: mulpd 128(<op2=int64#3),<t10=int6464#16
+# asm 2: mulpd 128(<op2=%rdx),<t10=%xmm15
+mulpd 128(%rdx),%xmm15
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#16,<r10=int6464#14
+# asm 2: addpd <t10=%xmm15,<r10=%xmm13
+addpd %xmm15,%xmm13
+
+# qhasm: t11 = ab2six
+# asm 1: movdqa <ab2six=int6464#5,>t11=int6464#16
+# asm 2: movdqa <ab2six=%xmm4,>t11=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t11 *= *(int128 *)(op2 + 144)
+# asm 1: mulpd 144(<op2=int64#3),<t11=int6464#16
+# asm 2: mulpd 144(<op2=%rdx),<t11=%xmm15
+mulpd 144(%rdx),%xmm15
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#16,<r11=int6464#3
+# asm 2: addpd <t11=%xmm15,<r11=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: float6464 ab2six *= *(int128 *)(op2 + 160)
+# asm 1: mulpd 160(<op2=int64#3),<ab2six=int6464#5
+# asm 2: mulpd 160(<op2=%rdx),<ab2six=%xmm4
+mulpd 160(%rdx),%xmm4
+
+# qhasm: float6464 r12 += ab2six
+# asm 1: addpd <ab2six=int6464#5,<r12=int6464#15
+# asm 2: addpd <ab2six=%xmm4,<r12=%xmm14
+addpd %xmm4,%xmm14
+
+# qhasm: *(int128 *)(rp + 32) = r2
+# asm 1: movdqa <r2=int6464#6,32(<rp=int64#4)
+# asm 2: movdqa <r2=%xmm5,32(<rp=%rcx)
+movdqa %xmm5,32(%rcx)
+
+# qhasm: r14 = *(int128 *)(op1 + 48)
+# asm 1: movdqa 48(<op1=int64#2),>r14=int6464#5
+# asm 2: movdqa 48(<op1=%rsi),>r14=%xmm4
+movdqa 48(%rsi),%xmm4
+
+# qhasm: ab3six = r14
+# asm 1: movdqa <r14=int6464#5,>ab3six=int6464#6
+# asm 2: movdqa <r14=%xmm4,>ab3six=%xmm5
+movdqa %xmm4,%xmm5
+
+# qhasm: float6464 ab3six *= sixsix
+# asm 1: mulpd <sixsix=int6464#1,<ab3six=int6464#6
+# asm 2: mulpd <sixsix=%xmm0,<ab3six=%xmm5
+mulpd %xmm0,%xmm5
+
+# qhasm: t3 = r14
+# asm 1: movdqa <r14=int6464#5,>t3=int6464#16
+# asm 2: movdqa <r14=%xmm4,>t3=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t3 *= *(int128 *)(op2 + 0)
+# asm 1: mulpd 0(<op2=int64#3),<t3=int6464#16
+# asm 2: mulpd 0(<op2=%rdx),<t3=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r3 +=t3
+# asm 1: addpd <t3=int6464#16,<r3=int6464#7
+# asm 2: addpd <t3=%xmm15,<r3=%xmm6
+addpd %xmm15,%xmm6
+
+# qhasm: t7 = r14
+# asm 1: movdqa <r14=int6464#5,>t7=int6464#16
+# asm 2: movdqa <r14=%xmm4,>t7=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t7 *= *(int128 *)(op2 + 64)
+# asm 1: mulpd 64(<op2=int64#3),<t7=int6464#16
+# asm 2: mulpd 64(<op2=%rdx),<t7=%xmm15
+mulpd 64(%rdx),%xmm15
+
+# qhasm: float6464 r7 +=t7
+# asm 1: addpd <t7=int6464#16,<r7=int6464#11
+# asm 2: addpd <t7=%xmm15,<r7=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: t8 = r14
+# asm 1: movdqa <r14=int6464#5,>t8=int6464#16
+# asm 2: movdqa <r14=%xmm4,>t8=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t8 *= *(int128 *)(op2 + 80)
+# asm 1: mulpd 80(<op2=int64#3),<t8=int6464#16
+# asm 2: mulpd 80(<op2=%rdx),<t8=%xmm15
+mulpd 80(%rdx),%xmm15
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#16,<r8=int6464#12
+# asm 2: addpd <t8=%xmm15,<r8=%xmm11
+addpd %xmm15,%xmm11
+
+# qhasm: t9 = r14
+# asm 1: movdqa <r14=int6464#5,>t9=int6464#16
+# asm 2: movdqa <r14=%xmm4,>t9=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t9 *= *(int128 *)(op2 + 96)
+# asm 1: mulpd 96(<op2=int64#3),<t9=int6464#16
+# asm 2: mulpd 96(<op2=%rdx),<t9=%xmm15
+mulpd 96(%rdx),%xmm15
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#16,<r9=int6464#13
+# asm 2: addpd <t9=%xmm15,<r9=%xmm12
+addpd %xmm15,%xmm12
+
+# qhasm: t13 = r14
+# asm 1: movdqa <r14=int6464#5,>t13=int6464#16
+# asm 2: movdqa <r14=%xmm4,>t13=%xmm15
+movdqa %xmm4,%xmm15
+
+# qhasm: float6464 t13 *= *(int128 *)(op2 + 160)
+# asm 1: mulpd 160(<op2=int64#3),<t13=int6464#16
+# asm 2: mulpd 160(<op2=%rdx),<t13=%xmm15
+mulpd 160(%rdx),%xmm15
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#16,<r13=int6464#4
+# asm 2: addpd <t13=%xmm15,<r13=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: float6464 r14 *= b11
+# asm 1: mulpd <b11=int6464#2,<r14=int6464#5
+# asm 2: mulpd <b11=%xmm1,<r14=%xmm4
+mulpd %xmm1,%xmm4
+
+# qhasm: t4 = ab3six
+# asm 1: movdqa <ab3six=int6464#6,>t4=int6464#16
+# asm 2: movdqa <ab3six=%xmm5,>t4=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t4 *= *(int128 *)(op2 + 16)
+# asm 1: mulpd 16(<op2=int64#3),<t4=int6464#16
+# asm 2: mulpd 16(<op2=%rdx),<t4=%xmm15
+mulpd 16(%rdx),%xmm15
+
+# qhasm: float6464 r4 +=t4
+# asm 1: addpd <t4=int6464#16,<r4=int6464#8
+# asm 2: addpd <t4=%xmm15,<r4=%xmm7
+addpd %xmm15,%xmm7
+
+# qhasm: t5 = ab3six
+# asm 1: movdqa <ab3six=int6464#6,>t5=int6464#16
+# asm 2: movdqa <ab3six=%xmm5,>t5=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t5 *= *(int128 *)(op2 + 32)
+# asm 1: mulpd 32(<op2=int64#3),<t5=int6464#16
+# asm 2: mulpd 32(<op2=%rdx),<t5=%xmm15
+mulpd 32(%rdx),%xmm15
+
+# qhasm: float6464 r5 +=t5
+# asm 1: addpd <t5=int6464#16,<r5=int6464#9
+# asm 2: addpd <t5=%xmm15,<r5=%xmm8
+addpd %xmm15,%xmm8
+
+# qhasm: t6 = ab3six
+# asm 1: movdqa <ab3six=int6464#6,>t6=int6464#16
+# asm 2: movdqa <ab3six=%xmm5,>t6=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t6 *= *(int128 *)(op2 + 48)
+# asm 1: mulpd 48(<op2=int64#3),<t6=int6464#16
+# asm 2: mulpd 48(<op2=%rdx),<t6=%xmm15
+mulpd 48(%rdx),%xmm15
+
+# qhasm: float6464 r6 +=t6
+# asm 1: addpd <t6=int6464#16,<r6=int6464#10
+# asm 2: addpd <t6=%xmm15,<r6=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: t10 = ab3six
+# asm 1: movdqa <ab3six=int6464#6,>t10=int6464#16
+# asm 2: movdqa <ab3six=%xmm5,>t10=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t10 *= *(int128 *)(op2 + 112)
+# asm 1: mulpd 112(<op2=int64#3),<t10=int6464#16
+# asm 2: mulpd 112(<op2=%rdx),<t10=%xmm15
+mulpd 112(%rdx),%xmm15
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#16,<r10=int6464#14
+# asm 2: addpd <t10=%xmm15,<r10=%xmm13
+addpd %xmm15,%xmm13
+
+# qhasm: t11 = ab3six
+# asm 1: movdqa <ab3six=int6464#6,>t11=int6464#16
+# asm 2: movdqa <ab3six=%xmm5,>t11=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t11 *= *(int128 *)(op2 + 128)
+# asm 1: mulpd 128(<op2=int64#3),<t11=int6464#16
+# asm 2: mulpd 128(<op2=%rdx),<t11=%xmm15
+mulpd 128(%rdx),%xmm15
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#16,<r11=int6464#3
+# asm 2: addpd <t11=%xmm15,<r11=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: float6464 ab3six *= *(int128 *)(op2 + 144)
+# asm 1: mulpd 144(<op2=int64#3),<ab3six=int6464#6
+# asm 2: mulpd 144(<op2=%rdx),<ab3six=%xmm5
+mulpd 144(%rdx),%xmm5
+
+# qhasm: float6464 r12 += ab3six
+# asm 1: addpd <ab3six=int6464#6,<r12=int6464#15
+# asm 2: addpd <ab3six=%xmm5,<r12=%xmm14
+addpd %xmm5,%xmm14
+
+# qhasm: *(int128 *)(rp + 48) = r3
+# asm 1: movdqa <r3=int6464#7,48(<rp=int64#4)
+# asm 2: movdqa <r3=%xmm6,48(<rp=%rcx)
+movdqa %xmm6,48(%rcx)
+
+# qhasm: r15 = *(int128 *)(op1 + 64)
+# asm 1: movdqa 64(<op1=int64#2),>r15=int6464#6
+# asm 2: movdqa 64(<op1=%rsi),>r15=%xmm5
+movdqa 64(%rsi),%xmm5
+
+# qhasm: ab4six = r15
+# asm 1: movdqa <r15=int6464#6,>ab4six=int6464#7
+# asm 2: movdqa <r15=%xmm5,>ab4six=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: float6464 ab4six *= sixsix
+# asm 1: mulpd <sixsix=int6464#1,<ab4six=int6464#7
+# asm 2: mulpd <sixsix=%xmm0,<ab4six=%xmm6
+mulpd %xmm0,%xmm6
+
+# qhasm: t4 = r15
+# asm 1: movdqa <r15=int6464#6,>t4=int6464#16
+# asm 2: movdqa <r15=%xmm5,>t4=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t4 *= *(int128 *)(op2 + 0)
+# asm 1: mulpd 0(<op2=int64#3),<t4=int6464#16
+# asm 2: mulpd 0(<op2=%rdx),<t4=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r4 +=t4
+# asm 1: addpd <t4=int6464#16,<r4=int6464#8
+# asm 2: addpd <t4=%xmm15,<r4=%xmm7
+addpd %xmm15,%xmm7
+
+# qhasm: t7 = r15
+# asm 1: movdqa <r15=int6464#6,>t7=int6464#16
+# asm 2: movdqa <r15=%xmm5,>t7=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t7 *= *(int128 *)(op2 + 48)
+# asm 1: mulpd 48(<op2=int64#3),<t7=int6464#16
+# asm 2: mulpd 48(<op2=%rdx),<t7=%xmm15
+mulpd 48(%rdx),%xmm15
+
+# qhasm: float6464 r7 +=t7
+# asm 1: addpd <t7=int6464#16,<r7=int6464#11
+# asm 2: addpd <t7=%xmm15,<r7=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: t8 = r15
+# asm 1: movdqa <r15=int6464#6,>t8=int6464#16
+# asm 2: movdqa <r15=%xmm5,>t8=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t8 *= *(int128 *)(op2 + 64)
+# asm 1: mulpd 64(<op2=int64#3),<t8=int6464#16
+# asm 2: mulpd 64(<op2=%rdx),<t8=%xmm15
+mulpd 64(%rdx),%xmm15
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#16,<r8=int6464#12
+# asm 2: addpd <t8=%xmm15,<r8=%xmm11
+addpd %xmm15,%xmm11
+
+# qhasm: t9 = r15
+# asm 1: movdqa <r15=int6464#6,>t9=int6464#16
+# asm 2: movdqa <r15=%xmm5,>t9=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t9 *= *(int128 *)(op2 + 80)
+# asm 1: mulpd 80(<op2=int64#3),<t9=int6464#16
+# asm 2: mulpd 80(<op2=%rdx),<t9=%xmm15
+mulpd 80(%rdx),%xmm15
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#16,<r9=int6464#13
+# asm 2: addpd <t9=%xmm15,<r9=%xmm12
+addpd %xmm15,%xmm12
+
+# qhasm: t10 = r15
+# asm 1: movdqa <r15=int6464#6,>t10=int6464#16
+# asm 2: movdqa <r15=%xmm5,>t10=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t10 *= *(int128 *)(op2 + 96)
+# asm 1: mulpd 96(<op2=int64#3),<t10=int6464#16
+# asm 2: mulpd 96(<op2=%rdx),<t10=%xmm15
+mulpd 96(%rdx),%xmm15
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#16,<r10=int6464#14
+# asm 2: addpd <t10=%xmm15,<r10=%xmm13
+addpd %xmm15,%xmm13
+
+# qhasm: t13 = r15
+# asm 1: movdqa <r15=int6464#6,>t13=int6464#16
+# asm 2: movdqa <r15=%xmm5,>t13=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t13 *= *(int128 *)(op2 + 144)
+# asm 1: mulpd 144(<op2=int64#3),<t13=int6464#16
+# asm 2: mulpd 144(<op2=%rdx),<t13=%xmm15
+mulpd 144(%rdx),%xmm15
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#16,<r13=int6464#4
+# asm 2: addpd <t13=%xmm15,<r13=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: t14 = r15
+# asm 1: movdqa <r15=int6464#6,>t14=int6464#16
+# asm 2: movdqa <r15=%xmm5,>t14=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm: float6464 t14 *= *(int128 *)(op2 + 160)
+# asm 1: mulpd 160(<op2=int64#3),<t14=int6464#16
+# asm 2: mulpd 160(<op2=%rdx),<t14=%xmm15
+mulpd 160(%rdx),%xmm15
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#16,<r14=int6464#5
+# asm 2: addpd <t14=%xmm15,<r14=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: float6464 r15 *= b11
+# asm 1: mulpd <b11=int6464#2,<r15=int6464#6
+# asm 2: mulpd <b11=%xmm1,<r15=%xmm5
+mulpd %xmm1,%xmm5
+
+# qhasm: t5 = ab4six
+# asm 1: movdqa <ab4six=int6464#7,>t5=int6464#16
+# asm 2: movdqa <ab4six=%xmm6,>t5=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 t5 *= *(int128 *)(op2 + 16)
+# asm 1: mulpd 16(<op2=int64#3),<t5=int6464#16
+# asm 2: mulpd 16(<op2=%rdx),<t5=%xmm15
+mulpd 16(%rdx),%xmm15
+
+# qhasm: float6464 r5 +=t5
+# asm 1: addpd <t5=int6464#16,<r5=int6464#9
+# asm 2: addpd <t5=%xmm15,<r5=%xmm8
+addpd %xmm15,%xmm8
+
+# qhasm: t6 = ab4six
+# asm 1: movdqa <ab4six=int6464#7,>t6=int6464#16
+# asm 2: movdqa <ab4six=%xmm6,>t6=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 t6 *= *(int128 *)(op2 + 32)
+# asm 1: mulpd 32(<op2=int64#3),<t6=int6464#16
+# asm 2: mulpd 32(<op2=%rdx),<t6=%xmm15
+mulpd 32(%rdx),%xmm15
+
+# qhasm: float6464 r6 +=t6
+# asm 1: addpd <t6=int6464#16,<r6=int6464#10
+# asm 2: addpd <t6=%xmm15,<r6=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: t11 = ab4six
+# asm 1: movdqa <ab4six=int6464#7,>t11=int6464#16
+# asm 2: movdqa <ab4six=%xmm6,>t11=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 t11 *= *(int128 *)(op2 + 112)
+# asm 1: mulpd 112(<op2=int64#3),<t11=int6464#16
+# asm 2: mulpd 112(<op2=%rdx),<t11=%xmm15
+mulpd 112(%rdx),%xmm15
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#16,<r11=int6464#3
+# asm 2: addpd <t11=%xmm15,<r11=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: float6464 ab4six *= *(int128 *)(op2 + 128)
+# asm 1: mulpd 128(<op2=int64#3),<ab4six=int6464#7
+# asm 2: mulpd 128(<op2=%rdx),<ab4six=%xmm6
+mulpd 128(%rdx),%xmm6
+
+# qhasm: float6464 r12 += ab4six
+# asm 1: addpd <ab4six=int6464#7,<r12=int6464#15
+# asm 2: addpd <ab4six=%xmm6,<r12=%xmm14
+addpd %xmm6,%xmm14
+
+# qhasm: *(int128 *)(rp + 64) = r4
+# asm 1: movdqa <r4=int6464#8,64(<rp=int64#4)
+# asm 2: movdqa <r4=%xmm7,64(<rp=%rcx)
+movdqa %xmm7,64(%rcx)
+
+# qhasm: r16 = *(int128 *)(op1 + 80)
+# asm 1: movdqa 80(<op1=int64#2),>r16=int6464#7
+# asm 2: movdqa 80(<op1=%rsi),>r16=%xmm6
+movdqa 80(%rsi),%xmm6
+
+# qhasm: ab5six = r16
+# asm 1: movdqa <r16=int6464#7,>ab5six=int6464#8
+# asm 2: movdqa <r16=%xmm6,>ab5six=%xmm7
+movdqa %xmm6,%xmm7
+
+# qhasm: float6464 ab5six *= sixsix
+# asm 1: mulpd <sixsix=int6464#1,<ab5six=int6464#8
+# asm 2: mulpd <sixsix=%xmm0,<ab5six=%xmm7
+mulpd %xmm0,%xmm7
+
+# qhasm: t5 = r16
+# asm 1: movdqa <r16=int6464#7,>t5=int6464#16
+# asm 2: movdqa <r16=%xmm6,>t5=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 t5 *= *(int128 *)(op2 + 0)
+# asm 1: mulpd 0(<op2=int64#3),<t5=int6464#16
+# asm 2: mulpd 0(<op2=%rdx),<t5=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r5 +=t5
+# asm 1: addpd <t5=int6464#16,<r5=int6464#9
+# asm 2: addpd <t5=%xmm15,<r5=%xmm8
+addpd %xmm15,%xmm8
+
+# qhasm: t7 = r16
+# asm 1: movdqa <r16=int6464#7,>t7=int6464#16
+# asm 2: movdqa <r16=%xmm6,>t7=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 t7 *= *(int128 *)(op2 + 32)
+# asm 1: mulpd 32(<op2=int64#3),<t7=int6464#16
+# asm 2: mulpd 32(<op2=%rdx),<t7=%xmm15
+mulpd 32(%rdx),%xmm15
+
+# qhasm: float6464 r7 +=t7
+# asm 1: addpd <t7=int6464#16,<r7=int6464#11
+# asm 2: addpd <t7=%xmm15,<r7=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: t8 = r16
+# asm 1: movdqa <r16=int6464#7,>t8=int6464#16
+# asm 2: movdqa <r16=%xmm6,>t8=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 t8 *= *(int128 *)(op2 + 48)
+# asm 1: mulpd 48(<op2=int64#3),<t8=int6464#16
+# asm 2: mulpd 48(<op2=%rdx),<t8=%xmm15
+mulpd 48(%rdx),%xmm15
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#16,<r8=int6464#12
+# asm 2: addpd <t8=%xmm15,<r8=%xmm11
+addpd %xmm15,%xmm11
+
+# qhasm: t9 = r16
+# asm 1: movdqa <r16=int6464#7,>t9=int6464#16
+# asm 2: movdqa <r16=%xmm6,>t9=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 t9 *= *(int128 *)(op2 + 64)
+# asm 1: mulpd 64(<op2=int64#3),<t9=int6464#16
+# asm 2: mulpd 64(<op2=%rdx),<t9=%xmm15
+mulpd 64(%rdx),%xmm15
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#16,<r9=int6464#13
+# asm 2: addpd <t9=%xmm15,<r9=%xmm12
+addpd %xmm15,%xmm12
+
+# qhasm: t10 = r16
+# asm 1: movdqa <r16=int6464#7,>t10=int6464#16
+# asm 2: movdqa <r16=%xmm6,>t10=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 t10 *= *(int128 *)(op2 + 80)
+# asm 1: mulpd 80(<op2=int64#3),<t10=int6464#16
+# asm 2: mulpd 80(<op2=%rdx),<t10=%xmm15
+mulpd 80(%rdx),%xmm15
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#16,<r10=int6464#14
+# asm 2: addpd <t10=%xmm15,<r10=%xmm13
+addpd %xmm15,%xmm13
+
+# qhasm: t11 = r16
+# asm 1: movdqa <r16=int6464#7,>t11=int6464#16
+# asm 2: movdqa <r16=%xmm6,>t11=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 t11 *= *(int128 *)(op2 + 96)
+# asm 1: mulpd 96(<op2=int64#3),<t11=int6464#16
+# asm 2: mulpd 96(<op2=%rdx),<t11=%xmm15
+mulpd 96(%rdx),%xmm15
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#16,<r11=int6464#3
+# asm 2: addpd <t11=%xmm15,<r11=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: t13 = r16
+# asm 1: movdqa <r16=int6464#7,>t13=int6464#16
+# asm 2: movdqa <r16=%xmm6,>t13=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 t13 *= *(int128 *)(op2 + 128)
+# asm 1: mulpd 128(<op2=int64#3),<t13=int6464#16
+# asm 2: mulpd 128(<op2=%rdx),<t13=%xmm15
+mulpd 128(%rdx),%xmm15
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#16,<r13=int6464#4
+# asm 2: addpd <t13=%xmm15,<r13=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: t14 = r16
+# asm 1: movdqa <r16=int6464#7,>t14=int6464#16
+# asm 2: movdqa <r16=%xmm6,>t14=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 t14 *= *(int128 *)(op2 + 144)
+# asm 1: mulpd 144(<op2=int64#3),<t14=int6464#16
+# asm 2: mulpd 144(<op2=%rdx),<t14=%xmm15
+mulpd 144(%rdx),%xmm15
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#16,<r14=int6464#5
+# asm 2: addpd <t14=%xmm15,<r14=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: t15 = r16
+# asm 1: movdqa <r16=int6464#7,>t15=int6464#16
+# asm 2: movdqa <r16=%xmm6,>t15=%xmm15
+movdqa %xmm6,%xmm15
+
+# qhasm: float6464 t15 *= *(int128 *)(op2 + 160)
+# asm 1: mulpd 160(<op2=int64#3),<t15=int6464#16
+# asm 2: mulpd 160(<op2=%rdx),<t15=%xmm15
+mulpd 160(%rdx),%xmm15
+
+# qhasm: float6464 r15 +=t15
+# asm 1: addpd <t15=int6464#16,<r15=int6464#6
+# asm 2: addpd <t15=%xmm15,<r15=%xmm5
+addpd %xmm15,%xmm5
+
+# qhasm: float6464 r16 *= b11
+# asm 1: mulpd <b11=int6464#2,<r16=int6464#7
+# asm 2: mulpd <b11=%xmm1,<r16=%xmm6
+mulpd %xmm1,%xmm6
+
+# qhasm: t6 = ab5six
+# asm 1: movdqa <ab5six=int6464#8,>t6=int6464#16
+# asm 2: movdqa <ab5six=%xmm7,>t6=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm: float6464 t6 *= *(int128 *)(op2 + 16)
+# asm 1: mulpd 16(<op2=int64#3),<t6=int6464#16
+# asm 2: mulpd 16(<op2=%rdx),<t6=%xmm15
+mulpd 16(%rdx),%xmm15
+
+# qhasm: float6464 r6 +=t6
+# asm 1: addpd <t6=int6464#16,<r6=int6464#10
+# asm 2: addpd <t6=%xmm15,<r6=%xmm9
+addpd %xmm15,%xmm9
+
+# qhasm: float6464 ab5six *= *(int128 *)(op2 + 112)
+# asm 1: mulpd 112(<op2=int64#3),<ab5six=int6464#8
+# asm 2: mulpd 112(<op2=%rdx),<ab5six=%xmm7
+mulpd 112(%rdx),%xmm7
+
+# qhasm: float6464 r12 += ab5six
+# asm 1: addpd <ab5six=int6464#8,<r12=int6464#15
+# asm 2: addpd <ab5six=%xmm7,<r12=%xmm14
+addpd %xmm7,%xmm14
+
+# qhasm: *(int128 *)(rp + 80) = r5
+# asm 1: movdqa <r5=int6464#9,80(<rp=int64#4)
+# asm 2: movdqa <r5=%xmm8,80(<rp=%rcx)
+movdqa %xmm8,80(%rcx)
+
+# qhasm: r17 = *(int128 *)(op1 + 96)
+# asm 1: movdqa 96(<op1=int64#2),>r17=int6464#8
+# asm 2: movdqa 96(<op1=%rsi),>r17=%xmm7
+movdqa 96(%rsi),%xmm7
+
+# qhasm: t6 = r17
+# asm 1: movdqa <r17=int6464#8,>t6=int6464#9
+# asm 2: movdqa <r17=%xmm7,>t6=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm: float6464 t6 *= *(int128 *)(op2 + 0)
+# asm 1: mulpd 0(<op2=int64#3),<t6=int6464#9
+# asm 2: mulpd 0(<op2=%rdx),<t6=%xmm8
+mulpd 0(%rdx),%xmm8
+
+# qhasm: float6464 r6 +=t6
+# asm 1: addpd <t6=int6464#9,<r6=int6464#10
+# asm 2: addpd <t6=%xmm8,<r6=%xmm9
+addpd %xmm8,%xmm9
+
+# qhasm: t7 = r17
+# asm 1: movdqa <r17=int6464#8,>t7=int6464#9
+# asm 2: movdqa <r17=%xmm7,>t7=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm: float6464 t7 *= *(int128 *)(op2 + 16)
+# asm 1: mulpd 16(<op2=int64#3),<t7=int6464#9
+# asm 2: mulpd 16(<op2=%rdx),<t7=%xmm8
+mulpd 16(%rdx),%xmm8
+
+# qhasm: float6464 r7 +=t7
+# asm 1: addpd <t7=int6464#9,<r7=int6464#11
+# asm 2: addpd <t7=%xmm8,<r7=%xmm10
+addpd %xmm8,%xmm10
+
+# qhasm: t8 = r17
+# asm 1: movdqa <r17=int6464#8,>t8=int6464#9
+# asm 2: movdqa <r17=%xmm7,>t8=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm: float6464 t8 *= *(int128 *)(op2 + 32)
+# asm 1: mulpd 32(<op2=int64#3),<t8=int6464#9
+# asm 2: mulpd 32(<op2=%rdx),<t8=%xmm8
+mulpd 32(%rdx),%xmm8
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#9,<r8=int6464#12
+# asm 2: addpd <t8=%xmm8,<r8=%xmm11
+addpd %xmm8,%xmm11
+
+# qhasm: t9 = r17
+# asm 1: movdqa <r17=int6464#8,>t9=int6464#9
+# asm 2: movdqa <r17=%xmm7,>t9=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm: float6464 t9 *= *(int128 *)(op2 + 48)
+# asm 1: mulpd 48(<op2=int64#3),<t9=int6464#9
+# asm 2: mulpd 48(<op2=%rdx),<t9=%xmm8
+mulpd 48(%rdx),%xmm8
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#9,<r9=int6464#13
+# asm 2: addpd <t9=%xmm8,<r9=%xmm12
+addpd %xmm8,%xmm12
+
+# qhasm: t10 = r17
+# asm 1: movdqa <r17=int6464#8,>t10=int6464#9
+# asm 2: movdqa <r17=%xmm7,>t10=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm: float6464 t10 *= *(int128 *)(op2 + 64)
+# asm 1: mulpd 64(<op2=int64#3),<t10=int6464#9
+# asm 2: mulpd 64(<op2=%rdx),<t10=%xmm8
+mulpd 64(%rdx),%xmm8
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#9,<r10=int6464#14
+# asm 2: addpd <t10=%xmm8,<r10=%xmm13
+addpd %xmm8,%xmm13
+
+# qhasm: t11 = r17
+# asm 1: movdqa <r17=int6464#8,>t11=int6464#9
+# asm 2: movdqa <r17=%xmm7,>t11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm: float6464 t11 *= *(int128 *)(op2 + 80)
+# asm 1: mulpd 80(<op2=int64#3),<t11=int6464#9
+# asm 2: mulpd 80(<op2=%rdx),<t11=%xmm8
+mulpd 80(%rdx),%xmm8
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#9,<r11=int6464#3
+# asm 2: addpd <t11=%xmm8,<r11=%xmm2
+addpd %xmm8,%xmm2
+
+# qhasm: t12 = r17
+# asm 1: movdqa <r17=int6464#8,>t12=int6464#9
+# asm 2: movdqa <r17=%xmm7,>t12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm: float6464 t12 *= *(int128 *)(op2 + 96)
+# asm 1: mulpd 96(<op2=int64#3),<t12=int6464#9
+# asm 2: mulpd 96(<op2=%rdx),<t12=%xmm8
+mulpd 96(%rdx),%xmm8
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#9,<r12=int6464#15
+# asm 2: addpd <t12=%xmm8,<r12=%xmm14
+addpd %xmm8,%xmm14
+
+# qhasm: t13 = r17
+# asm 1: movdqa <r17=int6464#8,>t13=int6464#9
+# asm 2: movdqa <r17=%xmm7,>t13=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm: float6464 t13 *= *(int128 *)(op2 + 112)
+# asm 1: mulpd 112(<op2=int64#3),<t13=int6464#9
+# asm 2: mulpd 112(<op2=%rdx),<t13=%xmm8
+mulpd 112(%rdx),%xmm8
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#9,<r13=int6464#4
+# asm 2: addpd <t13=%xmm8,<r13=%xmm3
+addpd %xmm8,%xmm3
+
+# qhasm: t14 = r17
+# asm 1: movdqa <r17=int6464#8,>t14=int6464#9
+# asm 2: movdqa <r17=%xmm7,>t14=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm: float6464 t14 *= *(int128 *)(op2 + 128)
+# asm 1: mulpd 128(<op2=int64#3),<t14=int6464#9
+# asm 2: mulpd 128(<op2=%rdx),<t14=%xmm8
+mulpd 128(%rdx),%xmm8
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#9,<r14=int6464#5
+# asm 2: addpd <t14=%xmm8,<r14=%xmm4
+addpd %xmm8,%xmm4
+
+# qhasm: t15 = r17
+# asm 1: movdqa <r17=int6464#8,>t15=int6464#9
+# asm 2: movdqa <r17=%xmm7,>t15=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm: float6464 t15 *= *(int128 *)(op2 + 144)
+# asm 1: mulpd 144(<op2=int64#3),<t15=int6464#9
+# asm 2: mulpd 144(<op2=%rdx),<t15=%xmm8
+mulpd 144(%rdx),%xmm8
+
+# qhasm: float6464 r15 +=t15
+# asm 1: addpd <t15=int6464#9,<r15=int6464#6
+# asm 2: addpd <t15=%xmm8,<r15=%xmm5
+addpd %xmm8,%xmm5
+
+# qhasm: t16 = r17
+# asm 1: movdqa <r17=int6464#8,>t16=int6464#9
+# asm 2: movdqa <r17=%xmm7,>t16=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm: float6464 t16 *= *(int128 *)(op2 + 160)
+# asm 1: mulpd 160(<op2=int64#3),<t16=int6464#9
+# asm 2: mulpd 160(<op2=%rdx),<t16=%xmm8
+mulpd 160(%rdx),%xmm8
+
+# qhasm: float6464 r16 +=t16
+# asm 1: addpd <t16=int6464#9,<r16=int6464#7
+# asm 2: addpd <t16=%xmm8,<r16=%xmm6
+addpd %xmm8,%xmm6
+
+# qhasm: float6464 r17 *= b11
+# asm 1: mulpd <b11=int6464#2,<r17=int6464#8
+# asm 2: mulpd <b11=%xmm1,<r17=%xmm7
+mulpd %xmm1,%xmm7
+
+# qhasm: *(int128 *)(rp + 96) = r6
+# asm 1: movdqa <r6=int6464#10,96(<rp=int64#4)
+# asm 2: movdqa <r6=%xmm9,96(<rp=%rcx)
+movdqa %xmm9,96(%rcx)
+
+# qhasm: ab7 = *(int128 *)(op1 + 112)
+# asm 1: movdqa 112(<op1=int64#2),>ab7=int6464#9
+# asm 2: movdqa 112(<op1=%rsi),>ab7=%xmm8
+movdqa 112(%rsi),%xmm8
+
+# qhasm: r18 = ab7
+# asm 1: movdqa <ab7=int6464#9,>r18=int6464#10
+# asm 2: movdqa <ab7=%xmm8,>r18=%xmm9
+movdqa %xmm8,%xmm9
+
+# qhasm: float6464 r18 *= sixsix
+# asm 1: mulpd <sixsix=int6464#1,<r18=int6464#10
+# asm 2: mulpd <sixsix=%xmm0,<r18=%xmm9
+mulpd %xmm0,%xmm9
+
+# qhasm: t7 = ab7
+# asm 1: movdqa <ab7=int6464#9,>t7=int6464#16
+# asm 2: movdqa <ab7=%xmm8,>t7=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm: float6464 t7 *= *(int128 *)(op2 + 0)
+# asm 1: mulpd 0(<op2=int64#3),<t7=int6464#16
+# asm 2: mulpd 0(<op2=%rdx),<t7=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r7 +=t7
+# asm 1: addpd <t7=int6464#16,<r7=int6464#11
+# asm 2: addpd <t7=%xmm15,<r7=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: float6464 ab7 *= *(int128 *)(op2 + 96)
+# asm 1: mulpd 96(<op2=int64#3),<ab7=int6464#9
+# asm 2: mulpd 96(<op2=%rdx),<ab7=%xmm8
+mulpd 96(%rdx),%xmm8
+
+# qhasm: float6464 r13 +=ab7
+# asm 1: addpd <ab7=int6464#9,<r13=int6464#4
+# asm 2: addpd <ab7=%xmm8,<r13=%xmm3
+addpd %xmm8,%xmm3
+
+# qhasm: t8 = r18
+# asm 1: movdqa <r18=int6464#10,>t8=int6464#9
+# asm 2: movdqa <r18=%xmm9,>t8=%xmm8
+movdqa %xmm9,%xmm8
+
+# qhasm: float6464 t8 *= *(int128 *)(op2 + 16)
+# asm 1: mulpd 16(<op2=int64#3),<t8=int6464#9
+# asm 2: mulpd 16(<op2=%rdx),<t8=%xmm8
+mulpd 16(%rdx),%xmm8
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#9,<r8=int6464#12
+# asm 2: addpd <t8=%xmm8,<r8=%xmm11
+addpd %xmm8,%xmm11
+
+# qhasm: t9 = r18
+# asm 1: movdqa <r18=int6464#10,>t9=int6464#9
+# asm 2: movdqa <r18=%xmm9,>t9=%xmm8
+movdqa %xmm9,%xmm8
+
+# qhasm: float6464 t9 *= *(int128 *)(op2 + 32)
+# asm 1: mulpd 32(<op2=int64#3),<t9=int6464#9
+# asm 2: mulpd 32(<op2=%rdx),<t9=%xmm8
+mulpd 32(%rdx),%xmm8
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#9,<r9=int6464#13
+# asm 2: addpd <t9=%xmm8,<r9=%xmm12
+addpd %xmm8,%xmm12
+
+# qhasm: t10 = r18
+# asm 1: movdqa <r18=int6464#10,>t10=int6464#9
+# asm 2: movdqa <r18=%xmm9,>t10=%xmm8
+movdqa %xmm9,%xmm8
+
+# qhasm: float6464 t10 *= *(int128 *)(op2 + 48)
+# asm 1: mulpd 48(<op2=int64#3),<t10=int6464#9
+# asm 2: mulpd 48(<op2=%rdx),<t10=%xmm8
+mulpd 48(%rdx),%xmm8
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#9,<r10=int6464#14
+# asm 2: addpd <t10=%xmm8,<r10=%xmm13
+addpd %xmm8,%xmm13
+
+# qhasm: t11 = r18
+# asm 1: movdqa <r18=int6464#10,>t11=int6464#9
+# asm 2: movdqa <r18=%xmm9,>t11=%xmm8
+movdqa %xmm9,%xmm8
+
+# qhasm: float6464 t11 *= *(int128 *)(op2 + 64)
+# asm 1: mulpd 64(<op2=int64#3),<t11=int6464#9
+# asm 2: mulpd 64(<op2=%rdx),<t11=%xmm8
+mulpd 64(%rdx),%xmm8
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#9,<r11=int6464#3
+# asm 2: addpd <t11=%xmm8,<r11=%xmm2
+addpd %xmm8,%xmm2
+
+# qhasm: t12 = r18
+# asm 1: movdqa <r18=int6464#10,>t12=int6464#9
+# asm 2: movdqa <r18=%xmm9,>t12=%xmm8
+movdqa %xmm9,%xmm8
+
+# qhasm: float6464 t12 *= *(int128 *)(op2 + 80)
+# asm 1: mulpd 80(<op2=int64#3),<t12=int6464#9
+# asm 2: mulpd 80(<op2=%rdx),<t12=%xmm8
+mulpd 80(%rdx),%xmm8
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#9,<r12=int6464#15
+# asm 2: addpd <t12=%xmm8,<r12=%xmm14
+addpd %xmm8,%xmm14
+
+# qhasm: t14 = r18
+# asm 1: movdqa <r18=int6464#10,>t14=int6464#9
+# asm 2: movdqa <r18=%xmm9,>t14=%xmm8
+movdqa %xmm9,%xmm8
+
+# qhasm: float6464 t14 *= *(int128 *)(op2 + 112)
+# asm 1: mulpd 112(<op2=int64#3),<t14=int6464#9
+# asm 2: mulpd 112(<op2=%rdx),<t14=%xmm8
+mulpd 112(%rdx),%xmm8
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#9,<r14=int6464#5
+# asm 2: addpd <t14=%xmm8,<r14=%xmm4
+addpd %xmm8,%xmm4
+
+# qhasm: t15 = r18
+# asm 1: movdqa <r18=int6464#10,>t15=int6464#9
+# asm 2: movdqa <r18=%xmm9,>t15=%xmm8
+movdqa %xmm9,%xmm8
+
+# qhasm: float6464 t15 *= *(int128 *)(op2 + 128)
+# asm 1: mulpd 128(<op2=int64#3),<t15=int6464#9
+# asm 2: mulpd 128(<op2=%rdx),<t15=%xmm8
+mulpd 128(%rdx),%xmm8
+
+# qhasm: float6464 r15 +=t15
+# asm 1: addpd <t15=int6464#9,<r15=int6464#6
+# asm 2: addpd <t15=%xmm8,<r15=%xmm5
+addpd %xmm8,%xmm5
+
+# qhasm: t16 = r18
+# asm 1: movdqa <r18=int6464#10,>t16=int6464#9
+# asm 2: movdqa <r18=%xmm9,>t16=%xmm8
+movdqa %xmm9,%xmm8
+
+# qhasm: float6464 t16 *= *(int128 *)(op2 + 144)
+# asm 1: mulpd 144(<op2=int64#3),<t16=int6464#9
+# asm 2: mulpd 144(<op2=%rdx),<t16=%xmm8
+mulpd 144(%rdx),%xmm8
+
+# qhasm: float6464 r16 +=t16
+# asm 1: addpd <t16=int6464#9,<r16=int6464#7
+# asm 2: addpd <t16=%xmm8,<r16=%xmm6
+addpd %xmm8,%xmm6
+
+# qhasm: t17 = r18
+# asm 1: movdqa <r18=int6464#10,>t17=int6464#9
+# asm 2: movdqa <r18=%xmm9,>t17=%xmm8
+movdqa %xmm9,%xmm8
+
+# qhasm: float6464 t17 *= *(int128 *)(op2 + 160)
+# asm 1: mulpd 160(<op2=int64#3),<t17=int6464#9
+# asm 2: mulpd 160(<op2=%rdx),<t17=%xmm8
+mulpd 160(%rdx),%xmm8
+
+# qhasm: float6464 r17 +=t17
+# asm 1: addpd <t17=int6464#9,<r17=int6464#8
+# asm 2: addpd <t17=%xmm8,<r17=%xmm7
+addpd %xmm8,%xmm7
+
+# qhasm: float6464 r18 *= b11
+# asm 1: mulpd <b11=int6464#2,<r18=int6464#10
+# asm 2: mulpd <b11=%xmm1,<r18=%xmm9
+mulpd %xmm1,%xmm9
+
+# qhasm: *(int128 *)(rp + 112) = r7
+# asm 1: movdqa <r7=int6464#11,112(<rp=int64#4)
+# asm 2: movdqa <r7=%xmm10,112(<rp=%rcx)
+movdqa %xmm10,112(%rcx)
+
+# qhasm: r19 = *(int128 *)(op1 + 128)
+# asm 1: movdqa 128(<op1=int64#2),>r19=int6464#9
+# asm 2: movdqa 128(<op1=%rsi),>r19=%xmm8
+movdqa 128(%rsi),%xmm8
+
+# qhasm: ab8six = r19
+# asm 1: movdqa <r19=int6464#9,>ab8six=int6464#11
+# asm 2: movdqa <r19=%xmm8,>ab8six=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm: float6464 ab8six *= sixsix
+# asm 1: mulpd <sixsix=int6464#1,<ab8six=int6464#11
+# asm 2: mulpd <sixsix=%xmm0,<ab8six=%xmm10
+mulpd %xmm0,%xmm10
+
+# qhasm: t8 = r19
+# asm 1: movdqa <r19=int6464#9,>t8=int6464#16
+# asm 2: movdqa <r19=%xmm8,>t8=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm: float6464 t8 *= *(int128 *)(op2 + 0)
+# asm 1: mulpd 0(<op2=int64#3),<t8=int6464#16
+# asm 2: mulpd 0(<op2=%rdx),<t8=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#16,<r8=int6464#12
+# asm 2: addpd <t8=%xmm15,<r8=%xmm11
+addpd %xmm15,%xmm11
+
+# qhasm: t13 = r19
+# asm 1: movdqa <r19=int6464#9,>t13=int6464#16
+# asm 2: movdqa <r19=%xmm8,>t13=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm: float6464 t13 *= *(int128 *)(op2 + 80)
+# asm 1: mulpd 80(<op2=int64#3),<t13=int6464#16
+# asm 2: mulpd 80(<op2=%rdx),<t13=%xmm15
+mulpd 80(%rdx),%xmm15
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#16,<r13=int6464#4
+# asm 2: addpd <t13=%xmm15,<r13=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: t14 = r19
+# asm 1: movdqa <r19=int6464#9,>t14=int6464#16
+# asm 2: movdqa <r19=%xmm8,>t14=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm: float6464 t14 *= *(int128 *)(op2 + 96)
+# asm 1: mulpd 96(<op2=int64#3),<t14=int6464#16
+# asm 2: mulpd 96(<op2=%rdx),<t14=%xmm15
+mulpd 96(%rdx),%xmm15
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#16,<r14=int6464#5
+# asm 2: addpd <t14=%xmm15,<r14=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: float6464 r19 *= b11
+# asm 1: mulpd <b11=int6464#2,<r19=int6464#9
+# asm 2: mulpd <b11=%xmm1,<r19=%xmm8
+mulpd %xmm1,%xmm8
+
+# qhasm: t9 = ab8six
+# asm 1: movdqa <ab8six=int6464#11,>t9=int6464#16
+# asm 2: movdqa <ab8six=%xmm10,>t9=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t9 *= *(int128 *)(op2 + 16)
+# asm 1: mulpd 16(<op2=int64#3),<t9=int6464#16
+# asm 2: mulpd 16(<op2=%rdx),<t9=%xmm15
+mulpd 16(%rdx),%xmm15
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#16,<r9=int6464#13
+# asm 2: addpd <t9=%xmm15,<r9=%xmm12
+addpd %xmm15,%xmm12
+
+# qhasm: t10 = ab8six
+# asm 1: movdqa <ab8six=int6464#11,>t10=int6464#16
+# asm 2: movdqa <ab8six=%xmm10,>t10=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t10 *= *(int128 *)(op2 + 32)
+# asm 1: mulpd 32(<op2=int64#3),<t10=int6464#16
+# asm 2: mulpd 32(<op2=%rdx),<t10=%xmm15
+mulpd 32(%rdx),%xmm15
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#16,<r10=int6464#14
+# asm 2: addpd <t10=%xmm15,<r10=%xmm13
+addpd %xmm15,%xmm13
+
+# qhasm: t11 = ab8six
+# asm 1: movdqa <ab8six=int6464#11,>t11=int6464#16
+# asm 2: movdqa <ab8six=%xmm10,>t11=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t11 *= *(int128 *)(op2 + 48)
+# asm 1: mulpd 48(<op2=int64#3),<t11=int6464#16
+# asm 2: mulpd 48(<op2=%rdx),<t11=%xmm15
+mulpd 48(%rdx),%xmm15
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#16,<r11=int6464#3
+# asm 2: addpd <t11=%xmm15,<r11=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: t12 = ab8six
+# asm 1: movdqa <ab8six=int6464#11,>t12=int6464#16
+# asm 2: movdqa <ab8six=%xmm10,>t12=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t12 *= *(int128 *)(op2 + 64)
+# asm 1: mulpd 64(<op2=int64#3),<t12=int6464#16
+# asm 2: mulpd 64(<op2=%rdx),<t12=%xmm15
+mulpd 64(%rdx),%xmm15
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#16,<r12=int6464#15
+# asm 2: addpd <t12=%xmm15,<r12=%xmm14
+addpd %xmm15,%xmm14
+
+# qhasm: t15 = ab8six
+# asm 1: movdqa <ab8six=int6464#11,>t15=int6464#16
+# asm 2: movdqa <ab8six=%xmm10,>t15=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t15 *= *(int128 *)(op2 + 112)
+# asm 1: mulpd 112(<op2=int64#3),<t15=int6464#16
+# asm 2: mulpd 112(<op2=%rdx),<t15=%xmm15
+mulpd 112(%rdx),%xmm15
+
+# qhasm: float6464 r15 +=t15
+# asm 1: addpd <t15=int6464#16,<r15=int6464#6
+# asm 2: addpd <t15=%xmm15,<r15=%xmm5
+addpd %xmm15,%xmm5
+
+# qhasm: t16 = ab8six
+# asm 1: movdqa <ab8six=int6464#11,>t16=int6464#16
+# asm 2: movdqa <ab8six=%xmm10,>t16=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t16 *= *(int128 *)(op2 + 128)
+# asm 1: mulpd 128(<op2=int64#3),<t16=int6464#16
+# asm 2: mulpd 128(<op2=%rdx),<t16=%xmm15
+mulpd 128(%rdx),%xmm15
+
+# qhasm: float6464 r16 +=t16
+# asm 1: addpd <t16=int6464#16,<r16=int6464#7
+# asm 2: addpd <t16=%xmm15,<r16=%xmm6
+addpd %xmm15,%xmm6
+
+# qhasm: t17 = ab8six
+# asm 1: movdqa <ab8six=int6464#11,>t17=int6464#16
+# asm 2: movdqa <ab8six=%xmm10,>t17=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t17 *= *(int128 *)(op2 + 144)
+# asm 1: mulpd 144(<op2=int64#3),<t17=int6464#16
+# asm 2: mulpd 144(<op2=%rdx),<t17=%xmm15
+mulpd 144(%rdx),%xmm15
+
+# qhasm: float6464 r17 +=t17
+# asm 1: addpd <t17=int6464#16,<r17=int6464#8
+# asm 2: addpd <t17=%xmm15,<r17=%xmm7
+addpd %xmm15,%xmm7
+
+# qhasm: float6464 ab8six *= *(int128 *)(op2 + 160)
+# asm 1: mulpd 160(<op2=int64#3),<ab8six=int6464#11
+# asm 2: mulpd 160(<op2=%rdx),<ab8six=%xmm10
+mulpd 160(%rdx),%xmm10
+
+# qhasm: float6464 r18 += ab8six
+# asm 1: addpd <ab8six=int6464#11,<r18=int6464#10
+# asm 2: addpd <ab8six=%xmm10,<r18=%xmm9
+addpd %xmm10,%xmm9
+
+# qhasm: *(int128 *)(rp + 128) = r8
+# asm 1: movdqa <r8=int6464#12,128(<rp=int64#4)
+# asm 2: movdqa <r8=%xmm11,128(<rp=%rcx)
+movdqa %xmm11,128(%rcx)
+
+# qhasm: r20 = *(int128 *)(op1 + 144)
+# asm 1: movdqa 144(<op1=int64#2),>r20=int6464#11
+# asm 2: movdqa 144(<op1=%rsi),>r20=%xmm10
+movdqa 144(%rsi),%xmm10
+
+# qhasm: ab9six = r20
+# asm 1: movdqa <r20=int6464#11,>ab9six=int6464#12
+# asm 2: movdqa <r20=%xmm10,>ab9six=%xmm11
+movdqa %xmm10,%xmm11
+
+# qhasm: float6464 ab9six *= sixsix
+# asm 1: mulpd <sixsix=int6464#1,<ab9six=int6464#12
+# asm 2: mulpd <sixsix=%xmm0,<ab9six=%xmm11
+mulpd %xmm0,%xmm11
+
+# qhasm: t9 = r20
+# asm 1: movdqa <r20=int6464#11,>t9=int6464#16
+# asm 2: movdqa <r20=%xmm10,>t9=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t9 *= *(int128 *)(op2 + 0)
+# asm 1: mulpd 0(<op2=int64#3),<t9=int6464#16
+# asm 2: mulpd 0(<op2=%rdx),<t9=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#16,<r9=int6464#13
+# asm 2: addpd <t9=%xmm15,<r9=%xmm12
+addpd %xmm15,%xmm12
+
+# qhasm: t13 = r20
+# asm 1: movdqa <r20=int6464#11,>t13=int6464#16
+# asm 2: movdqa <r20=%xmm10,>t13=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t13 *= *(int128 *)(op2 + 64)
+# asm 1: mulpd 64(<op2=int64#3),<t13=int6464#16
+# asm 2: mulpd 64(<op2=%rdx),<t13=%xmm15
+mulpd 64(%rdx),%xmm15
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#16,<r13=int6464#4
+# asm 2: addpd <t13=%xmm15,<r13=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: t14 = r20
+# asm 1: movdqa <r20=int6464#11,>t14=int6464#16
+# asm 2: movdqa <r20=%xmm10,>t14=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t14 *= *(int128 *)(op2 + 80)
+# asm 1: mulpd 80(<op2=int64#3),<t14=int6464#16
+# asm 2: mulpd 80(<op2=%rdx),<t14=%xmm15
+mulpd 80(%rdx),%xmm15
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#16,<r14=int6464#5
+# asm 2: addpd <t14=%xmm15,<r14=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: t15 = r20
+# asm 1: movdqa <r20=int6464#11,>t15=int6464#16
+# asm 2: movdqa <r20=%xmm10,>t15=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t15 *= *(int128 *)(op2 + 96)
+# asm 1: mulpd 96(<op2=int64#3),<t15=int6464#16
+# asm 2: mulpd 96(<op2=%rdx),<t15=%xmm15
+mulpd 96(%rdx),%xmm15
+
+# qhasm: float6464 r15 +=t15
+# asm 1: addpd <t15=int6464#16,<r15=int6464#6
+# asm 2: addpd <t15=%xmm15,<r15=%xmm5
+addpd %xmm15,%xmm5
+
+# qhasm: t19 = r20
+# asm 1: movdqa <r20=int6464#11,>t19=int6464#16
+# asm 2: movdqa <r20=%xmm10,>t19=%xmm15
+movdqa %xmm10,%xmm15
+
+# qhasm: float6464 t19 *= *(int128 *)(op2 + 160)
+# asm 1: mulpd 160(<op2=int64#3),<t19=int6464#16
+# asm 2: mulpd 160(<op2=%rdx),<t19=%xmm15
+mulpd 160(%rdx),%xmm15
+
+# qhasm: float6464 r19 +=t19
+# asm 1: addpd <t19=int6464#16,<r19=int6464#9
+# asm 2: addpd <t19=%xmm15,<r19=%xmm8
+addpd %xmm15,%xmm8
+
+# qhasm: float6464 r20 *= b11
+# asm 1: mulpd <b11=int6464#2,<r20=int6464#11
+# asm 2: mulpd <b11=%xmm1,<r20=%xmm10
+mulpd %xmm1,%xmm10
+
+# qhasm: t10 = ab9six
+# asm 1: movdqa <ab9six=int6464#12,>t10=int6464#16
+# asm 2: movdqa <ab9six=%xmm11,>t10=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t10 *= *(int128 *)(op2 + 16)
+# asm 1: mulpd 16(<op2=int64#3),<t10=int6464#16
+# asm 2: mulpd 16(<op2=%rdx),<t10=%xmm15
+mulpd 16(%rdx),%xmm15
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#16,<r10=int6464#14
+# asm 2: addpd <t10=%xmm15,<r10=%xmm13
+addpd %xmm15,%xmm13
+
+# qhasm: t11 = ab9six
+# asm 1: movdqa <ab9six=int6464#12,>t11=int6464#16
+# asm 2: movdqa <ab9six=%xmm11,>t11=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t11 *= *(int128 *)(op2 + 32)
+# asm 1: mulpd 32(<op2=int64#3),<t11=int6464#16
+# asm 2: mulpd 32(<op2=%rdx),<t11=%xmm15
+mulpd 32(%rdx),%xmm15
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#16,<r11=int6464#3
+# asm 2: addpd <t11=%xmm15,<r11=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: t12 = ab9six
+# asm 1: movdqa <ab9six=int6464#12,>t12=int6464#16
+# asm 2: movdqa <ab9six=%xmm11,>t12=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t12 *= *(int128 *)(op2 + 48)
+# asm 1: mulpd 48(<op2=int64#3),<t12=int6464#16
+# asm 2: mulpd 48(<op2=%rdx),<t12=%xmm15
+mulpd 48(%rdx),%xmm15
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#16,<r12=int6464#15
+# asm 2: addpd <t12=%xmm15,<r12=%xmm14
+addpd %xmm15,%xmm14
+
+# qhasm: t16 = ab9six
+# asm 1: movdqa <ab9six=int6464#12,>t16=int6464#16
+# asm 2: movdqa <ab9six=%xmm11,>t16=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t16 *= *(int128 *)(op2 + 112)
+# asm 1: mulpd 112(<op2=int64#3),<t16=int6464#16
+# asm 2: mulpd 112(<op2=%rdx),<t16=%xmm15
+mulpd 112(%rdx),%xmm15
+
+# qhasm: float6464 r16 +=t16
+# asm 1: addpd <t16=int6464#16,<r16=int6464#7
+# asm 2: addpd <t16=%xmm15,<r16=%xmm6
+addpd %xmm15,%xmm6
+
+# qhasm: t17 = ab9six
+# asm 1: movdqa <ab9six=int6464#12,>t17=int6464#16
+# asm 2: movdqa <ab9six=%xmm11,>t17=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t17 *= *(int128 *)(op2 + 128)
+# asm 1: mulpd 128(<op2=int64#3),<t17=int6464#16
+# asm 2: mulpd 128(<op2=%rdx),<t17=%xmm15
+mulpd 128(%rdx),%xmm15
+
+# qhasm: float6464 r17 +=t17
+# asm 1: addpd <t17=int6464#16,<r17=int6464#8
+# asm 2: addpd <t17=%xmm15,<r17=%xmm7
+addpd %xmm15,%xmm7
+
+# qhasm: float6464 ab9six *= *(int128 *)(op2 + 144)
+# asm 1: mulpd 144(<op2=int64#3),<ab9six=int6464#12
+# asm 2: mulpd 144(<op2=%rdx),<ab9six=%xmm11
+mulpd 144(%rdx),%xmm11
+
+# qhasm: float6464 r18 +=ab9six
+# asm 1: addpd <ab9six=int6464#12,<r18=int6464#10
+# asm 2: addpd <ab9six=%xmm11,<r18=%xmm9
+addpd %xmm11,%xmm9
+
+# qhasm: *(int128 *)(rp + 144) = r9
+# asm 1: movdqa <r9=int6464#13,144(<rp=int64#4)
+# asm 2: movdqa <r9=%xmm12,144(<rp=%rcx)
+movdqa %xmm12,144(%rcx)
+
+# qhasm: r21 = *(int128 *)(op1 + 160)
+# asm 1: movdqa 160(<op1=int64#2),>r21=int6464#12
+# asm 2: movdqa 160(<op1=%rsi),>r21=%xmm11
+movdqa 160(%rsi),%xmm11
+
+# qhasm: ab10six = r21
+# asm 1: movdqa <r21=int6464#12,>ab10six=int6464#13
+# asm 2: movdqa <r21=%xmm11,>ab10six=%xmm12
+movdqa %xmm11,%xmm12
+
+# qhasm: float6464 ab10six *= sixsix
+# asm 1: mulpd <sixsix=int6464#1,<ab10six=int6464#13
+# asm 2: mulpd <sixsix=%xmm0,<ab10six=%xmm12
+mulpd %xmm0,%xmm12
+
+# qhasm: t10 = r21
+# asm 1: movdqa <r21=int6464#12,>t10=int6464#16
+# asm 2: movdqa <r21=%xmm11,>t10=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t10 *= *(int128 *)(op2 + 0)
+# asm 1: mulpd 0(<op2=int64#3),<t10=int6464#16
+# asm 2: mulpd 0(<op2=%rdx),<t10=%xmm15
+mulpd 0(%rdx),%xmm15
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#16,<r10=int6464#14
+# asm 2: addpd <t10=%xmm15,<r10=%xmm13
+addpd %xmm15,%xmm13
+
+# qhasm: t13 = r21
+# asm 1: movdqa <r21=int6464#12,>t13=int6464#16
+# asm 2: movdqa <r21=%xmm11,>t13=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t13 *= *(int128 *)(op2 + 48)
+# asm 1: mulpd 48(<op2=int64#3),<t13=int6464#16
+# asm 2: mulpd 48(<op2=%rdx),<t13=%xmm15
+mulpd 48(%rdx),%xmm15
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#16,<r13=int6464#4
+# asm 2: addpd <t13=%xmm15,<r13=%xmm3
+addpd %xmm15,%xmm3
+
+# qhasm: t14 = r21
+# asm 1: movdqa <r21=int6464#12,>t14=int6464#16
+# asm 2: movdqa <r21=%xmm11,>t14=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t14 *= *(int128 *)(op2 + 64)
+# asm 1: mulpd 64(<op2=int64#3),<t14=int6464#16
+# asm 2: mulpd 64(<op2=%rdx),<t14=%xmm15
+mulpd 64(%rdx),%xmm15
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#16,<r14=int6464#5
+# asm 2: addpd <t14=%xmm15,<r14=%xmm4
+addpd %xmm15,%xmm4
+
+# qhasm: t16 = r21
+# asm 1: movdqa <r21=int6464#12,>t16=int6464#16
+# asm 2: movdqa <r21=%xmm11,>t16=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t16 *= *(int128 *)(op2 + 96)
+# asm 1: mulpd 96(<op2=int64#3),<t16=int6464#16
+# asm 2: mulpd 96(<op2=%rdx),<t16=%xmm15
+mulpd 96(%rdx),%xmm15
+
+# qhasm: float6464 r16 +=t16
+# asm 1: addpd <t16=int6464#16,<r16=int6464#7
+# asm 2: addpd <t16=%xmm15,<r16=%xmm6
+addpd %xmm15,%xmm6
+
+# qhasm: t15 = r21
+# asm 1: movdqa <r21=int6464#12,>t15=int6464#16
+# asm 2: movdqa <r21=%xmm11,>t15=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t15 *= *(int128 *)(op2 + 80)
+# asm 1: mulpd 80(<op2=int64#3),<t15=int6464#16
+# asm 2: mulpd 80(<op2=%rdx),<t15=%xmm15
+mulpd 80(%rdx),%xmm15
+
+# qhasm: float6464 r15 +=t15
+# asm 1: addpd <t15=int6464#16,<r15=int6464#6
+# asm 2: addpd <t15=%xmm15,<r15=%xmm5
+addpd %xmm15,%xmm5
+
+# qhasm: t19 = r21
+# asm 1: movdqa <r21=int6464#12,>t19=int6464#16
+# asm 2: movdqa <r21=%xmm11,>t19=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t19 *= *(int128 *)(op2 + 144)
+# asm 1: mulpd 144(<op2=int64#3),<t19=int6464#16
+# asm 2: mulpd 144(<op2=%rdx),<t19=%xmm15
+mulpd 144(%rdx),%xmm15
+
+# qhasm: float6464 r19 +=t19
+# asm 1: addpd <t19=int6464#16,<r19=int6464#9
+# asm 2: addpd <t19=%xmm15,<r19=%xmm8
+addpd %xmm15,%xmm8
+
+# qhasm: t20 = r21
+# asm 1: movdqa <r21=int6464#12,>t20=int6464#16
+# asm 2: movdqa <r21=%xmm11,>t20=%xmm15
+movdqa %xmm11,%xmm15
+
+# qhasm: float6464 t20 *= *(int128 *)(op2 + 160)
+# asm 1: mulpd 160(<op2=int64#3),<t20=int6464#16
+# asm 2: mulpd 160(<op2=%rdx),<t20=%xmm15
+mulpd 160(%rdx),%xmm15
+
+# qhasm: float6464 r20 +=t20
+# asm 1: addpd <t20=int6464#16,<r20=int6464#11
+# asm 2: addpd <t20=%xmm15,<r20=%xmm10
+addpd %xmm15,%xmm10
+
+# qhasm: float6464 r21 *= b11
+# asm 1: mulpd <b11=int6464#2,<r21=int6464#12
+# asm 2: mulpd <b11=%xmm1,<r21=%xmm11
+mulpd %xmm1,%xmm11
+
+# qhasm: t11 = ab10six
+# asm 1: movdqa <ab10six=int6464#13,>t11=int6464#16
+# asm 2: movdqa <ab10six=%xmm12,>t11=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm: float6464 t11 *= *(int128 *)(op2 + 16)
+# asm 1: mulpd 16(<op2=int64#3),<t11=int6464#16
+# asm 2: mulpd 16(<op2=%rdx),<t11=%xmm15
+mulpd 16(%rdx),%xmm15
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#16,<r11=int6464#3
+# asm 2: addpd <t11=%xmm15,<r11=%xmm2
+addpd %xmm15,%xmm2
+
+# qhasm: t12 = ab10six
+# asm 1: movdqa <ab10six=int6464#13,>t12=int6464#16
+# asm 2: movdqa <ab10six=%xmm12,>t12=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm: float6464 t12 *= *(int128 *)(op2 + 32)
+# asm 1: mulpd 32(<op2=int64#3),<t12=int6464#16
+# asm 2: mulpd 32(<op2=%rdx),<t12=%xmm15
+mulpd 32(%rdx),%xmm15
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#16,<r12=int6464#15
+# asm 2: addpd <t12=%xmm15,<r12=%xmm14
+addpd %xmm15,%xmm14
+
+# qhasm: t17 = ab10six
+# asm 1: movdqa <ab10six=int6464#13,>t17=int6464#16
+# asm 2: movdqa <ab10six=%xmm12,>t17=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm: float6464 t17 *= *(int128 *)(op2 + 112)
+# asm 1: mulpd 112(<op2=int64#3),<t17=int6464#16
+# asm 2: mulpd 112(<op2=%rdx),<t17=%xmm15
+mulpd 112(%rdx),%xmm15
+
+# qhasm: float6464 r17 +=t17
+# asm 1: addpd <t17=int6464#16,<r17=int6464#8
+# asm 2: addpd <t17=%xmm15,<r17=%xmm7
+addpd %xmm15,%xmm7
+
+# qhasm: float6464 ab10six *= *(int128 *)(op2 + 128)
+# asm 1: mulpd 128(<op2=int64#3),<ab10six=int6464#13
+# asm 2: mulpd 128(<op2=%rdx),<ab10six=%xmm12
+mulpd 128(%rdx),%xmm12
+
+# qhasm: float6464 r18 +=ab10six
+# asm 1: addpd <ab10six=int6464#13,<r18=int6464#10
+# asm 2: addpd <ab10six=%xmm12,<r18=%xmm9
+addpd %xmm12,%xmm9
+
+# qhasm: *(int128 *)(rp + 160) = r10
+# asm 1: movdqa <r10=int6464#14,160(<rp=int64#4)
+# asm 2: movdqa <r10=%xmm13,160(<rp=%rcx)
+movdqa %xmm13,160(%rcx)
+
+# qhasm: r22 = *(int128 *)(op1 + 176)
+# asm 1: movdqa 176(<op1=int64#2),>r22=int6464#13
+# asm 2: movdqa 176(<op1=%rsi),>r22=%xmm12
+movdqa 176(%rsi),%xmm12
+
+# qhasm: ab11six = r22
+# asm 1: movdqa <r22=int6464#13,>ab11six=int6464#14
+# asm 2: movdqa <r22=%xmm12,>ab11six=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 ab11six *= sixsix
+# asm 1: mulpd <sixsix=int6464#1,<ab11six=int6464#14
+# asm 2: mulpd <sixsix=%xmm0,<ab11six=%xmm13
+mulpd %xmm0,%xmm13
+
+# qhasm: t11 = r22
+# asm 1: movdqa <r22=int6464#13,>t11=int6464#1
+# asm 2: movdqa <r22=%xmm12,>t11=%xmm0
+movdqa %xmm12,%xmm0
+
+# qhasm: float6464 t11 *= *(int128 *)(op2 + 0)
+# asm 1: mulpd 0(<op2=int64#3),<t11=int6464#1
+# asm 2: mulpd 0(<op2=%rdx),<t11=%xmm0
+mulpd 0(%rdx),%xmm0
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#1,<r11=int6464#3
+# asm 2: addpd <t11=%xmm0,<r11=%xmm2
+addpd %xmm0,%xmm2
+
+# qhasm: t13 = r22
+# asm 1: movdqa <r22=int6464#13,>t13=int6464#1
+# asm 2: movdqa <r22=%xmm12,>t13=%xmm0
+movdqa %xmm12,%xmm0
+
+# qhasm: float6464 t13 *= *(int128 *)(op2 + 32)
+# asm 1: mulpd 32(<op2=int64#3),<t13=int6464#1
+# asm 2: mulpd 32(<op2=%rdx),<t13=%xmm0
+mulpd 32(%rdx),%xmm0
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#1,<r13=int6464#4
+# asm 2: addpd <t13=%xmm0,<r13=%xmm3
+addpd %xmm0,%xmm3
+
+# qhasm: t14 = r22
+# asm 1: movdqa <r22=int6464#13,>t14=int6464#1
+# asm 2: movdqa <r22=%xmm12,>t14=%xmm0
+movdqa %xmm12,%xmm0
+
+# qhasm: float6464 t14 *= *(int128 *)(op2 + 48)
+# asm 1: mulpd 48(<op2=int64#3),<t14=int6464#1
+# asm 2: mulpd 48(<op2=%rdx),<t14=%xmm0
+mulpd 48(%rdx),%xmm0
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#1,<r14=int6464#5
+# asm 2: addpd <t14=%xmm0,<r14=%xmm4
+addpd %xmm0,%xmm4
+
+# qhasm: t15 = r22
+# asm 1: movdqa <r22=int6464#13,>t15=int6464#1
+# asm 2: movdqa <r22=%xmm12,>t15=%xmm0
+movdqa %xmm12,%xmm0
+
+# qhasm: float6464 t15 *= *(int128 *)(op2 + 64)
+# asm 1: mulpd 64(<op2=int64#3),<t15=int6464#1
+# asm 2: mulpd 64(<op2=%rdx),<t15=%xmm0
+mulpd 64(%rdx),%xmm0
+
+# qhasm: float6464 r15 +=t15
+# asm 1: addpd <t15=int6464#1,<r15=int6464#6
+# asm 2: addpd <t15=%xmm0,<r15=%xmm5
+addpd %xmm0,%xmm5
+
+# qhasm: t16 = r22
+# asm 1: movdqa <r22=int6464#13,>t16=int6464#1
+# asm 2: movdqa <r22=%xmm12,>t16=%xmm0
+movdqa %xmm12,%xmm0
+
+# qhasm: float6464 t16 *= *(int128 *)(op2 + 80)
+# asm 1: mulpd 80(<op2=int64#3),<t16=int6464#1
+# asm 2: mulpd 80(<op2=%rdx),<t16=%xmm0
+mulpd 80(%rdx),%xmm0
+
+# qhasm: float6464 r16 +=t16
+# asm 1: addpd <t16=int6464#1,<r16=int6464#7
+# asm 2: addpd <t16=%xmm0,<r16=%xmm6
+addpd %xmm0,%xmm6
+
+# qhasm: t17 = r22
+# asm 1: movdqa <r22=int6464#13,>t17=int6464#1
+# asm 2: movdqa <r22=%xmm12,>t17=%xmm0
+movdqa %xmm12,%xmm0
+
+# qhasm: float6464 t17 *= *(int128 *)(op2 + 96)
+# asm 1: mulpd 96(<op2=int64#3),<t17=int6464#1
+# asm 2: mulpd 96(<op2=%rdx),<t17=%xmm0
+mulpd 96(%rdx),%xmm0
+
+# qhasm: float6464 r17 +=t17
+# asm 1: addpd <t17=int6464#1,<r17=int6464#8
+# asm 2: addpd <t17=%xmm0,<r17=%xmm7
+addpd %xmm0,%xmm7
+
+# qhasm: t19 = r22
+# asm 1: movdqa <r22=int6464#13,>t19=int6464#1
+# asm 2: movdqa <r22=%xmm12,>t19=%xmm0
+movdqa %xmm12,%xmm0
+
+# qhasm: float6464 t19 *= *(int128 *)(op2 + 128)
+# asm 1: mulpd 128(<op2=int64#3),<t19=int6464#1
+# asm 2: mulpd 128(<op2=%rdx),<t19=%xmm0
+mulpd 128(%rdx),%xmm0
+
+# qhasm: float6464 r19 +=t19
+# asm 1: addpd <t19=int6464#1,<r19=int6464#9
+# asm 2: addpd <t19=%xmm0,<r19=%xmm8
+addpd %xmm0,%xmm8
+
+# qhasm: t20 = r22
+# asm 1: movdqa <r22=int6464#13,>t20=int6464#1
+# asm 2: movdqa <r22=%xmm12,>t20=%xmm0
+movdqa %xmm12,%xmm0
+
+# qhasm: float6464 t20 *= *(int128 *)(op2 + 144)
+# asm 1: mulpd 144(<op2=int64#3),<t20=int6464#1
+# asm 2: mulpd 144(<op2=%rdx),<t20=%xmm0
+mulpd 144(%rdx),%xmm0
+
+# qhasm: float6464 r20 +=t20
+# asm 1: addpd <t20=int6464#1,<r20=int6464#11
+# asm 2: addpd <t20=%xmm0,<r20=%xmm10
+addpd %xmm0,%xmm10
+
+# qhasm: t21 = r22
+# asm 1: movdqa <r22=int6464#13,>t21=int6464#1
+# asm 2: movdqa <r22=%xmm12,>t21=%xmm0
+movdqa %xmm12,%xmm0
+
+# qhasm: float6464 t21 *= *(int128 *)(op2 + 160)
+# asm 1: mulpd 160(<op2=int64#3),<t21=int6464#1
+# asm 2: mulpd 160(<op2=%rdx),<t21=%xmm0
+mulpd 160(%rdx),%xmm0
+
+# qhasm: float6464 r21 +=t21
+# asm 1: addpd <t21=int6464#1,<r21=int6464#12
+# asm 2: addpd <t21=%xmm0,<r21=%xmm11
+addpd %xmm0,%xmm11
+
+# qhasm: float6464 r22 *= b11
+# asm 1: mulpd <b11=int6464#2,<r22=int6464#13
+# asm 2: mulpd <b11=%xmm1,<r22=%xmm12
+mulpd %xmm1,%xmm12
+
+# qhasm: t12 = ab11six
+# asm 1: movdqa <ab11six=int6464#14,>t12=int6464#1
+# asm 2: movdqa <ab11six=%xmm13,>t12=%xmm0
+movdqa %xmm13,%xmm0
+
+# qhasm: float6464 t12 *= *(int128 *)(op2 + 16)
+# asm 1: mulpd 16(<op2=int64#3),<t12=int6464#1
+# asm 2: mulpd 16(<op2=%rdx),<t12=%xmm0
+mulpd 16(%rdx),%xmm0
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#1,<r12=int6464#15
+# asm 2: addpd <t12=%xmm0,<r12=%xmm14
+addpd %xmm0,%xmm14
+
+# qhasm: float6464 ab11six *= *(int128 *)(op2 + 112)
+# asm 1: mulpd 112(<op2=int64#3),<ab11six=int6464#14
+# asm 2: mulpd 112(<op2=%rdx),<ab11six=%xmm13
+mulpd 112(%rdx),%xmm13
+
+# qhasm: float6464 r18 +=ab11six
+# asm 1: addpd <ab11six=int6464#14,<r18=int6464#10
+# asm 2: addpd <ab11six=%xmm13,<r18=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: *(int128 *)(rp + 176) = r11
+# asm 1: movdqa <r11=int6464#3,176(<rp=int64#4)
+# asm 2: movdqa <r11=%xmm2,176(<rp=%rcx)
+movdqa %xmm2,176(%rcx)
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: int6464 0t0
+
+# qhasm: int6464 0t1
+
+# qhasm: int6464 0t2
+
+# qhasm: int6464 0t3
+
+# qhasm: int6464 0t4
+
+# qhasm: int6464 0t5
+
+# qhasm: int6464 0t6
+
+# qhasm: int6464 0t7
+
+# qhasm: int6464 0t8
+
+# qhasm: int6464 0t9
+
+# qhasm: int6464 0t10
+
+# qhasm: int6464 0t11
+
+# qhasm: int6464 0t12
+
+# qhasm: int6464 0t13
+
+# qhasm: int6464 0t14
+
+# qhasm: int6464 0t15
+
+# qhasm: int6464 0t16
+
+# qhasm: int6464 0t17
+
+# qhasm: int6464 0t18
+
+# qhasm: int6464 0t19
+
+# qhasm: int6464 0t20
+
+# qhasm: int6464 0t21
+
+# qhasm: int6464 0t22
+
+# qhasm: 0r0 = *(int128 *)(rp + 0)
+# asm 1: movdqa 0(<rp=int64#4),>0r0=int6464#1
+# asm 2: movdqa 0(<rp=%rcx),>0r0=%xmm0
+movdqa 0(%rcx),%xmm0
+
+# qhasm: float6464 0r0 -= r12
+# asm 1: subpd <r12=int6464#15,<0r0=int6464#1
+# asm 2: subpd <r12=%xmm14,<0r0=%xmm0
+subpd %xmm14,%xmm0
+
+# qhasm: 0t15 = r15
+# asm 1: movdqa <r15=int6464#6,>0t15=int6464#2
+# asm 2: movdqa <r15=%xmm5,>0t15=%xmm1
+movdqa %xmm5,%xmm1
+
+# qhasm: float6464 0t15 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0t15=int6464#2
+# asm 2: mulpd SIX_SIX,<0t15=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r0 += 0t15
+# asm 1: addpd <0t15=int6464#2,<0r0=int6464#1
+# asm 2: addpd <0t15=%xmm1,<0r0=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 0t18 = r18
+# asm 1: movdqa <r18=int6464#10,>0t18=int6464#2
+# asm 2: movdqa <r18=%xmm9,>0t18=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm: float6464 0t18 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<0t18=int6464#2
+# asm 2: mulpd TWO_TWO,<0t18=%xmm1
+mulpd TWO_TWO,%xmm1
+
+# qhasm: float6464 0r0 -= 0t18
+# asm 1: subpd <0t18=int6464#2,<0r0=int6464#1
+# asm 2: subpd <0t18=%xmm1,<0r0=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 0t21 = r21
+# asm 1: movdqa <r21=int6464#12,>0t21=int6464#2
+# asm 2: movdqa <r21=%xmm11,>0t21=%xmm1
+movdqa %xmm11,%xmm1
+
+# qhasm: float6464 0t21 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0t21=int6464#2
+# asm 2: mulpd SIX_SIX,<0t21=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r0 -= 0t21
+# asm 1: subpd <0t21=int6464#2,<0r0=int6464#1
+# asm 2: subpd <0t21=%xmm1,<0r0=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(rp + 0) = 0r0
+# asm 1: movdqa <0r0=int6464#1,0(<rp=int64#4)
+# asm 2: movdqa <0r0=%xmm0,0(<rp=%rcx)
+movdqa %xmm0,0(%rcx)
+
+# qhasm: 0r3 = *(int128 *)(rp + 48)
+# asm 1: movdqa 48(<rp=int64#4),>0r3=int6464#1
+# asm 2: movdqa 48(<rp=%rcx),>0r3=%xmm0
+movdqa 48(%rcx),%xmm0
+
+# qhasm: float6464 0r3 -= r12
+# asm 1: subpd <r12=int6464#15,<0r3=int6464#1
+# asm 2: subpd <r12=%xmm14,<0r3=%xmm0
+subpd %xmm14,%xmm0
+
+# qhasm: 0t15 = r15
+# asm 1: movdqa <r15=int6464#6,>0t15=int6464#2
+# asm 2: movdqa <r15=%xmm5,>0t15=%xmm1
+movdqa %xmm5,%xmm1
+
+# qhasm: float6464 0t15 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<0t15=int6464#2
+# asm 2: mulpd FIVE_FIVE,<0t15=%xmm1
+mulpd FIVE_FIVE,%xmm1
+
+# qhasm: float6464 0r3 += 0t15
+# asm 1: addpd <0t15=int6464#2,<0r3=int6464#1
+# asm 2: addpd <0t15=%xmm1,<0r3=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: float6464 0r3 -= r18
+# asm 1: subpd <r18=int6464#10,<0r3=int6464#1
+# asm 2: subpd <r18=%xmm9,<0r3=%xmm0
+subpd %xmm9,%xmm0
+
+# qhasm: 0t21 = r21
+# asm 1: movdqa <r21=int6464#12,>0t21=int6464#2
+# asm 2: movdqa <r21=%xmm11,>0t21=%xmm1
+movdqa %xmm11,%xmm1
+
+# qhasm: float6464 0t21 *= EIGHT_EIGHT
+# asm 1: mulpd EIGHT_EIGHT,<0t21=int6464#2
+# asm 2: mulpd EIGHT_EIGHT,<0t21=%xmm1
+mulpd EIGHT_EIGHT,%xmm1
+
+# qhasm: float6464 0r3 -= 0t21
+# asm 1: subpd <0t21=int6464#2,<0r3=int6464#1
+# asm 2: subpd <0t21=%xmm1,<0r3=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(rp + 48) = 0r3
+# asm 1: movdqa <0r3=int6464#1,48(<rp=int64#4)
+# asm 2: movdqa <0r3=%xmm0,48(<rp=%rcx)
+movdqa %xmm0,48(%rcx)
+
+# qhasm: 0r6 = *(int128 *)(rp + 96)
+# asm 1: movdqa 96(<rp=int64#4),>0r6=int6464#1
+# asm 2: movdqa 96(<rp=%rcx),>0r6=%xmm0
+movdqa 96(%rcx),%xmm0
+
+# qhasm: 0t12 = r12
+# asm 1: movdqa <r12=int6464#15,>0t12=int6464#2
+# asm 2: movdqa <r12=%xmm14,>0t12=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: float6464 0t12 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<0t12=int6464#2
+# asm 2: mulpd FOUR_FOUR,<0t12=%xmm1
+mulpd FOUR_FOUR,%xmm1
+
+# qhasm: float6464 0r6 -= 0t12
+# asm 1: subpd <0t12=int6464#2,<0r6=int6464#1
+# asm 2: subpd <0t12=%xmm1,<0r6=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 0t15 = r15
+# asm 1: movdqa <r15=int6464#6,>0t15=int6464#2
+# asm 2: movdqa <r15=%xmm5,>0t15=%xmm1
+movdqa %xmm5,%xmm1
+
+# qhasm: float6464 0t15 *= EIGHTEEN_EIGHTEEN
+# asm 1: mulpd EIGHTEEN_EIGHTEEN,<0t15=int6464#2
+# asm 2: mulpd EIGHTEEN_EIGHTEEN,<0t15=%xmm1
+mulpd EIGHTEEN_EIGHTEEN,%xmm1
+
+# qhasm: float6464 0r6 += 0t15
+# asm 1: addpd <0t15=int6464#2,<0r6=int6464#1
+# asm 2: addpd <0t15=%xmm1,<0r6=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 0t18 = r18
+# asm 1: movdqa <r18=int6464#10,>0t18=int6464#2
+# asm 2: movdqa <r18=%xmm9,>0t18=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm: float6464 0t18 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<0t18=int6464#2
+# asm 2: mulpd THREE_THREE,<0t18=%xmm1
+mulpd THREE_THREE,%xmm1
+
+# qhasm: float6464 0r6 -= 0t18
+# asm 1: subpd <0t18=int6464#2,<0r6=int6464#1
+# asm 2: subpd <0t18=%xmm1,<0r6=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 0t21 = r21
+# asm 1: movdqa <r21=int6464#12,>0t21=int6464#2
+# asm 2: movdqa <r21=%xmm11,>0t21=%xmm1
+movdqa %xmm11,%xmm1
+
+# qhasm: float6464 0t21 *= THIRTY_THIRTY
+# asm 1: mulpd THIRTY_THIRTY,<0t21=int6464#2
+# asm 2: mulpd THIRTY_THIRTY,<0t21=%xmm1
+mulpd THIRTY_THIRTY,%xmm1
+
+# qhasm: float6464 0r6 -= 0t21
+# asm 1: subpd <0t21=int6464#2,<0r6=int6464#1
+# asm 2: subpd <0t21=%xmm1,<0r6=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(rp + 96) = 0r6
+# asm 1: movdqa <0r6=int6464#1,96(<rp=int64#4)
+# asm 2: movdqa <0r6=%xmm0,96(<rp=%rcx)
+movdqa %xmm0,96(%rcx)
+
+# qhasm: 0r9 = *(int128 *)(rp + 144)
+# asm 1: movdqa 144(<rp=int64#4),>0r9=int6464#1
+# asm 2: movdqa 144(<rp=%rcx),>0r9=%xmm0
+movdqa 144(%rcx),%xmm0
+
+# qhasm: float6464 0r9 -= r12
+# asm 1: subpd <r12=int6464#15,<0r9=int6464#1
+# asm 2: subpd <r12=%xmm14,<0r9=%xmm0
+subpd %xmm14,%xmm0
+
+# qhasm: 0t15 = r15
+# asm 1: movdqa <r15=int6464#6,>0t15=int6464#2
+# asm 2: movdqa <r15=%xmm5,>0t15=%xmm1
+movdqa %xmm5,%xmm1
+
+# qhasm: float6464 0t15 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<0t15=int6464#2
+# asm 2: mulpd TWO_TWO,<0t15=%xmm1
+mulpd TWO_TWO,%xmm1
+
+# qhasm: float6464 0r9 += 0t15
+# asm 1: addpd <0t15=int6464#2,<0r9=int6464#1
+# asm 2: addpd <0t15=%xmm1,<0r9=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: float6464 0r9 += r18
+# asm 1: addpd <r18=int6464#10,<0r9=int6464#1
+# asm 2: addpd <r18=%xmm9,<0r9=%xmm0
+addpd %xmm9,%xmm0
+
+# qhasm: 0t21 = r21
+# asm 1: movdqa <r21=int6464#12,>0t21=int6464#2
+# asm 2: movdqa <r21=%xmm11,>0t21=%xmm1
+movdqa %xmm11,%xmm1
+
+# qhasm: float6464 0t21 *= NINE_NINE
+# asm 1: mulpd NINE_NINE,<0t21=int6464#2
+# asm 2: mulpd NINE_NINE,<0t21=%xmm1
+mulpd NINE_NINE,%xmm1
+
+# qhasm: float6464 0r9 -= 0t21
+# asm 1: subpd <0t21=int6464#2,<0r9=int6464#1
+# asm 2: subpd <0t21=%xmm1,<0r9=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(rp + 144) = 0r9
+# asm 1: movdqa <0r9=int6464#1,144(<rp=int64#4)
+# asm 2: movdqa <0r9=%xmm0,144(<rp=%rcx)
+movdqa %xmm0,144(%rcx)
+
+# qhasm: 0r1 = *(int128 *)(rp + 16)
+# asm 1: movdqa 16(<rp=int64#4),>0r1=int6464#1
+# asm 2: movdqa 16(<rp=%rcx),>0r1=%xmm0
+movdqa 16(%rcx),%xmm0
+
+# qhasm: float6464 0r1 -= r13
+# asm 1: subpd <r13=int6464#4,<0r1=int6464#1
+# asm 2: subpd <r13=%xmm3,<0r1=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 0r1 += r16
+# asm 1: addpd <r16=int6464#7,<0r1=int6464#1
+# asm 2: addpd <r16=%xmm6,<0r1=%xmm0
+addpd %xmm6,%xmm0
+
+# qhasm: 0t19 = r19
+# asm 1: movdqa <r19=int6464#9,>0t19=int6464#2
+# asm 2: movdqa <r19=%xmm8,>0t19=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: float6464 0t19 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<0t19=int6464#2
+# asm 2: mulpd TWO_TWO,<0t19=%xmm1
+mulpd TWO_TWO,%xmm1
+
+# qhasm: float6464 0r1 -= 0t19
+# asm 1: subpd <0t19=int6464#2,<0r1=int6464#1
+# asm 2: subpd <0t19=%xmm1,<0r1=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: float6464 0r1 -= r22
+# asm 1: subpd <r22=int6464#13,<0r1=int6464#1
+# asm 2: subpd <r22=%xmm12,<0r1=%xmm0
+subpd %xmm12,%xmm0
+
+# qhasm: *(int128 *)(rp + 16) = 0r1
+# asm 1: movdqa <0r1=int6464#1,16(<rp=int64#4)
+# asm 2: movdqa <0r1=%xmm0,16(<rp=%rcx)
+movdqa %xmm0,16(%rcx)
+
+# qhasm: 0r4 = *(int128 *)(rp + 64)
+# asm 1: movdqa 64(<rp=int64#4),>0r4=int6464#1
+# asm 2: movdqa 64(<rp=%rcx),>0r4=%xmm0
+movdqa 64(%rcx),%xmm0
+
+# qhasm: 0t13 = r13
+# asm 1: movdqa <r13=int6464#4,>0t13=int6464#2
+# asm 2: movdqa <r13=%xmm3,>0t13=%xmm1
+movdqa %xmm3,%xmm1
+
+# qhasm: float6464 0t13 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0t13=int6464#2
+# asm 2: mulpd SIX_SIX,<0t13=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r4 -= 0t13
+# asm 1: subpd <0t13=int6464#2,<0r4=int6464#1
+# asm 2: subpd <0t13=%xmm1,<0r4=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 0t16 = r16
+# asm 1: movdqa <r16=int6464#7,>0t16=int6464#2
+# asm 2: movdqa <r16=%xmm6,>0t16=%xmm1
+movdqa %xmm6,%xmm1
+
+# qhasm: float6464 0t16 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<0t16=int6464#2
+# asm 2: mulpd FIVE_FIVE,<0t16=%xmm1
+mulpd FIVE_FIVE,%xmm1
+
+# qhasm: float6464 0r4 += 0t16
+# asm 1: addpd <0t16=int6464#2,<0r4=int6464#1
+# asm 2: addpd <0t16=%xmm1,<0r4=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 0t19 = r19
+# asm 1: movdqa <r19=int6464#9,>0t19=int6464#2
+# asm 2: movdqa <r19=%xmm8,>0t19=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: float6464 0t19 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0t19=int6464#2
+# asm 2: mulpd SIX_SIX,<0t19=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r4 -= 0t19
+# asm 1: subpd <0t19=int6464#2,<0r4=int6464#1
+# asm 2: subpd <0t19=%xmm1,<0r4=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 0t22 = r22
+# asm 1: movdqa <r22=int6464#13,>0t22=int6464#2
+# asm 2: movdqa <r22=%xmm12,>0t22=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t22 *= EIGHT_EIGHT
+# asm 1: mulpd EIGHT_EIGHT,<0t22=int6464#2
+# asm 2: mulpd EIGHT_EIGHT,<0t22=%xmm1
+mulpd EIGHT_EIGHT,%xmm1
+
+# qhasm: float6464 0r4 -= 0t22
+# asm 1: subpd <0t22=int6464#2,<0r4=int6464#1
+# asm 2: subpd <0t22=%xmm1,<0r4=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(rp + 64) = 0r4
+# asm 1: movdqa <0r4=int6464#1,64(<rp=int64#4)
+# asm 2: movdqa <0r4=%xmm0,64(<rp=%rcx)
+movdqa %xmm0,64(%rcx)
+
+# qhasm: 0r7 = *(int128 *)(rp + 112)
+# asm 1: movdqa 112(<rp=int64#4),>0r7=int6464#1
+# asm 2: movdqa 112(<rp=%rcx),>0r7=%xmm0
+movdqa 112(%rcx),%xmm0
+
+# qhasm: 0t13 = r13
+# asm 1: movdqa <r13=int6464#4,>0t13=int6464#2
+# asm 2: movdqa <r13=%xmm3,>0t13=%xmm1
+movdqa %xmm3,%xmm1
+
+# qhasm: float6464 0t13 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<0t13=int6464#2
+# asm 2: mulpd FOUR_FOUR,<0t13=%xmm1
+mulpd FOUR_FOUR,%xmm1
+
+# qhasm: float6464 0r7 -= 0t13
+# asm 1: subpd <0t13=int6464#2,<0r7=int6464#1
+# asm 2: subpd <0t13=%xmm1,<0r7=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 0t16 = r16
+# asm 1: movdqa <r16=int6464#7,>0t16=int6464#2
+# asm 2: movdqa <r16=%xmm6,>0t16=%xmm1
+movdqa %xmm6,%xmm1
+
+# qhasm: float6464 0t16 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<0t16=int6464#2
+# asm 2: mulpd THREE_THREE,<0t16=%xmm1
+mulpd THREE_THREE,%xmm1
+
+# qhasm: float6464 0r7 += 0t16
+# asm 1: addpd <0t16=int6464#2,<0r7=int6464#1
+# asm 2: addpd <0t16=%xmm1,<0r7=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 0t19 = r19
+# asm 1: movdqa <r19=int6464#9,>0t19=int6464#2
+# asm 2: movdqa <r19=%xmm8,>0t19=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: float6464 0t19 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<0t19=int6464#2
+# asm 2: mulpd THREE_THREE,<0t19=%xmm1
+mulpd THREE_THREE,%xmm1
+
+# qhasm: float6464 0r7 -= 0t19
+# asm 1: subpd <0t19=int6464#2,<0r7=int6464#1
+# asm 2: subpd <0t19=%xmm1,<0r7=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 0t22 = r22
+# asm 1: movdqa <r22=int6464#13,>0t22=int6464#2
+# asm 2: movdqa <r22=%xmm12,>0t22=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t22 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<0t22=int6464#2
+# asm 2: mulpd FIVE_FIVE,<0t22=%xmm1
+mulpd FIVE_FIVE,%xmm1
+
+# qhasm: float6464 0r7 -= 0t22
+# asm 1: subpd <0t22=int6464#2,<0r7=int6464#1
+# asm 2: subpd <0t22=%xmm1,<0r7=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(rp + 112) = 0r7
+# asm 1: movdqa <0r7=int6464#1,112(<rp=int64#4)
+# asm 2: movdqa <0r7=%xmm0,112(<rp=%rcx)
+movdqa %xmm0,112(%rcx)
+
+# qhasm: 0r10 = *(int128 *)(rp + 160)
+# asm 1: movdqa 160(<rp=int64#4),>0r10=int6464#1
+# asm 2: movdqa 160(<rp=%rcx),>0r10=%xmm0
+movdqa 160(%rcx),%xmm0
+
+# qhasm: 0t13 = r13
+# asm 1: movdqa <r13=int6464#4,>0t13=int6464#2
+# asm 2: movdqa <r13=%xmm3,>0t13=%xmm1
+movdqa %xmm3,%xmm1
+
+# qhasm: float6464 0t13 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0t13=int6464#2
+# asm 2: mulpd SIX_SIX,<0t13=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r10 -= 0t13
+# asm 1: subpd <0t13=int6464#2,<0r10=int6464#1
+# asm 2: subpd <0t13=%xmm1,<0r10=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 0t16 = r16
+# asm 1: movdqa <r16=int6464#7,>0t16=int6464#2
+# asm 2: movdqa <r16=%xmm6,>0t16=%xmm1
+movdqa %xmm6,%xmm1
+
+# qhasm: float6464 0t16 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<0t16=int6464#2
+# asm 2: mulpd TWO_TWO,<0t16=%xmm1
+mulpd TWO_TWO,%xmm1
+
+# qhasm: float6464 0r10 += 0t16
+# asm 1: addpd <0t16=int6464#2,<0r10=int6464#1
+# asm 2: addpd <0t16=%xmm1,<0r10=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 0t19 = r19
+# asm 1: movdqa <r19=int6464#9,>0t19=int6464#2
+# asm 2: movdqa <r19=%xmm8,>0t19=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: float6464 0t19 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0t19=int6464#2
+# asm 2: mulpd SIX_SIX,<0t19=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r10 += 0t19
+# asm 1: addpd <0t19=int6464#2,<0r10=int6464#1
+# asm 2: addpd <0t19=%xmm1,<0r10=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 0t22 = r22
+# asm 1: movdqa <r22=int6464#13,>0t22=int6464#2
+# asm 2: movdqa <r22=%xmm12,>0t22=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t22 *= NINE_NINE
+# asm 1: mulpd NINE_NINE,<0t22=int6464#2
+# asm 2: mulpd NINE_NINE,<0t22=%xmm1
+mulpd NINE_NINE,%xmm1
+
+# qhasm: float6464 0r10 -= 0t22
+# asm 1: subpd <0t22=int6464#2,<0r10=int6464#1
+# asm 2: subpd <0t22=%xmm1,<0r10=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(rp + 160) = 0r10
+# asm 1: movdqa <0r10=int6464#1,160(<rp=int64#4)
+# asm 2: movdqa <0r10=%xmm0,160(<rp=%rcx)
+movdqa %xmm0,160(%rcx)
+
+# qhasm: 0r2 = *(int128 *)(rp + 32)
+# asm 1: movdqa 32(<rp=int64#4),>0r2=int6464#1
+# asm 2: movdqa 32(<rp=%rcx),>0r2=%xmm0
+movdqa 32(%rcx),%xmm0
+
+# qhasm: float6464 0r2 -= r14
+# asm 1: subpd <r14=int6464#5,<0r2=int6464#1
+# asm 2: subpd <r14=%xmm4,<0r2=%xmm0
+subpd %xmm4,%xmm0
+
+# qhasm: float6464 0r2 += r17
+# asm 1: addpd <r17=int6464#8,<0r2=int6464#1
+# asm 2: addpd <r17=%xmm7,<0r2=%xmm0
+addpd %xmm7,%xmm0
+
+# qhasm: 0t20 = r20
+# asm 1: movdqa <r20=int6464#11,>0t20=int6464#2
+# asm 2: movdqa <r20=%xmm10,>0t20=%xmm1
+movdqa %xmm10,%xmm1
+
+# qhasm: float6464 0t20 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<0t20=int6464#2
+# asm 2: mulpd TWO_TWO,<0t20=%xmm1
+mulpd TWO_TWO,%xmm1
+
+# qhasm: float6464 0r2 -= 0t20
+# asm 1: subpd <0t20=int6464#2,<0r2=int6464#1
+# asm 2: subpd <0t20=%xmm1,<0r2=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(rp + 32) = 0r2
+# asm 1: movdqa <0r2=int6464#1,32(<rp=int64#4)
+# asm 2: movdqa <0r2=%xmm0,32(<rp=%rcx)
+movdqa %xmm0,32(%rcx)
+
+# qhasm: 0r5 = *(int128 *)(rp + 80)
+# asm 1: movdqa 80(<rp=int64#4),>0r5=int6464#1
+# asm 2: movdqa 80(<rp=%rcx),>0r5=%xmm0
+movdqa 80(%rcx),%xmm0
+
+# qhasm: 0t14 = r14
+# asm 1: movdqa <r14=int6464#5,>0t14=int6464#2
+# asm 2: movdqa <r14=%xmm4,>0t14=%xmm1
+movdqa %xmm4,%xmm1
+
+# qhasm: float6464 0t14 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0t14=int6464#2
+# asm 2: mulpd SIX_SIX,<0t14=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r5 -= 0t14
+# asm 1: subpd <0t14=int6464#2,<0r5=int6464#1
+# asm 2: subpd <0t14=%xmm1,<0r5=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 0t17 = r17
+# asm 1: movdqa <r17=int6464#8,>0t17=int6464#2
+# asm 2: movdqa <r17=%xmm7,>0t17=%xmm1
+movdqa %xmm7,%xmm1
+
+# qhasm: float6464 0t17 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<0t17=int6464#2
+# asm 2: mulpd FIVE_FIVE,<0t17=%xmm1
+mulpd FIVE_FIVE,%xmm1
+
+# qhasm: float6464 0r5 += 0t17
+# asm 1: addpd <0t17=int6464#2,<0r5=int6464#1
+# asm 2: addpd <0t17=%xmm1,<0r5=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 0t20 = r20
+# asm 1: movdqa <r20=int6464#11,>0t20=int6464#2
+# asm 2: movdqa <r20=%xmm10,>0t20=%xmm1
+movdqa %xmm10,%xmm1
+
+# qhasm: float6464 0t20 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0t20=int6464#2
+# asm 2: mulpd SIX_SIX,<0t20=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r5 -= 0t20
+# asm 1: subpd <0t20=int6464#2,<0r5=int6464#1
+# asm 2: subpd <0t20=%xmm1,<0r5=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(rp + 80) = 0r5
+# asm 1: movdqa <0r5=int6464#1,80(<rp=int64#4)
+# asm 2: movdqa <0r5=%xmm0,80(<rp=%rcx)
+movdqa %xmm0,80(%rcx)
+
+# qhasm: 0r8 = *(int128 *)(rp + 128)
+# asm 1: movdqa 128(<rp=int64#4),>0r8=int6464#1
+# asm 2: movdqa 128(<rp=%rcx),>0r8=%xmm0
+movdqa 128(%rcx),%xmm0
+
+# qhasm: 0t14 = r14
+# asm 1: movdqa <r14=int6464#5,>0t14=int6464#2
+# asm 2: movdqa <r14=%xmm4,>0t14=%xmm1
+movdqa %xmm4,%xmm1
+
+# qhasm: float6464 0t14 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<0t14=int6464#2
+# asm 2: mulpd FOUR_FOUR,<0t14=%xmm1
+mulpd FOUR_FOUR,%xmm1
+
+# qhasm: float6464 0r8 -= 0t14
+# asm 1: subpd <0t14=int6464#2,<0r8=int6464#1
+# asm 2: subpd <0t14=%xmm1,<0r8=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 0t17 = r17
+# asm 1: movdqa <r17=int6464#8,>0t17=int6464#2
+# asm 2: movdqa <r17=%xmm7,>0t17=%xmm1
+movdqa %xmm7,%xmm1
+
+# qhasm: float6464 0t17 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<0t17=int6464#2
+# asm 2: mulpd THREE_THREE,<0t17=%xmm1
+mulpd THREE_THREE,%xmm1
+
+# qhasm: float6464 0r8 += 0t17
+# asm 1: addpd <0t17=int6464#2,<0r8=int6464#1
+# asm 2: addpd <0t17=%xmm1,<0r8=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 0t20 = r20
+# asm 1: movdqa <r20=int6464#11,>0t20=int6464#2
+# asm 2: movdqa <r20=%xmm10,>0t20=%xmm1
+movdqa %xmm10,%xmm1
+
+# qhasm: float6464 0t20 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<0t20=int6464#2
+# asm 2: mulpd THREE_THREE,<0t20=%xmm1
+mulpd THREE_THREE,%xmm1
+
+# qhasm: float6464 0r8 -= 0t20
+# asm 1: subpd <0t20=int6464#2,<0r8=int6464#1
+# asm 2: subpd <0t20=%xmm1,<0r8=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(rp + 128) = 0r8
+# asm 1: movdqa <0r8=int6464#1,128(<rp=int64#4)
+# asm 2: movdqa <0r8=%xmm0,128(<rp=%rcx)
+movdqa %xmm0,128(%rcx)
+
+# qhasm: 0r11 = *(int128 *)(rp + 176)
+# asm 1: movdqa 176(<rp=int64#4),>0r11=int6464#1
+# asm 2: movdqa 176(<rp=%rcx),>0r11=%xmm0
+movdqa 176(%rcx),%xmm0
+
+# qhasm: 0t14 = r14
+# asm 1: movdqa <r14=int6464#5,>0t14=int6464#2
+# asm 2: movdqa <r14=%xmm4,>0t14=%xmm1
+movdqa %xmm4,%xmm1
+
+# qhasm: float6464 0t14 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0t14=int6464#2
+# asm 2: mulpd SIX_SIX,<0t14=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r11 -= 0t14
+# asm 1: subpd <0t14=int6464#2,<0r11=int6464#1
+# asm 2: subpd <0t14=%xmm1,<0r11=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: 0t17 = r17
+# asm 1: movdqa <r17=int6464#8,>0t17=int6464#2
+# asm 2: movdqa <r17=%xmm7,>0t17=%xmm1
+movdqa %xmm7,%xmm1
+
+# qhasm: float6464 0t17 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<0t17=int6464#2
+# asm 2: mulpd TWO_TWO,<0t17=%xmm1
+mulpd TWO_TWO,%xmm1
+
+# qhasm: float6464 0r11 += 0t17
+# asm 1: addpd <0t17=int6464#2,<0r11=int6464#1
+# asm 2: addpd <0t17=%xmm1,<0r11=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: 0t20 = r20
+# asm 1: movdqa <r20=int6464#11,>0t20=int6464#2
+# asm 2: movdqa <r20=%xmm10,>0t20=%xmm1
+movdqa %xmm10,%xmm1
+
+# qhasm: float6464 0t20 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0t20=int6464#2
+# asm 2: mulpd SIX_SIX,<0t20=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r11 += 0t20
+# asm 1: addpd <0t20=int6464#2,<0r11=int6464#1
+# asm 2: addpd <0t20=%xmm1,<0r11=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(rp + 176) = 0r11
+# asm 1: movdqa <0r11=int6464#1,176(<rp=int64#4)
+# asm 2: movdqa <0r11=%xmm0,176(<rp=%rcx)
+movdqa %xmm0,176(%rcx)
+
+# qhasm: int6464 0round
+
+# qhasm: int6464 0carry
+
+# qhasm: int6464 1t6
+
+# qhasm: r0 = *(int128 *)(rp + 0)
+# asm 1: movdqa 0(<rp=int64#4),>r0=int6464#1
+# asm 2: movdqa 0(<rp=%rcx),>r0=%xmm0
+movdqa 0(%rcx),%xmm0
+
+# qhasm: r1 = *(int128 *)(rp + 16)
+# asm 1: movdqa 16(<rp=int64#4),>r1=int6464#2
+# asm 2: movdqa 16(<rp=%rcx),>r1=%xmm1
+movdqa 16(%rcx),%xmm1
+
+# qhasm: r2 = *(int128 *)(rp + 32)
+# asm 1: movdqa 32(<rp=int64#4),>r2=int6464#3
+# asm 2: movdqa 32(<rp=%rcx),>r2=%xmm2
+movdqa 32(%rcx),%xmm2
+
+# qhasm: r3 = *(int128 *)(rp + 48)
+# asm 1: movdqa 48(<rp=int64#4),>r3=int6464#4
+# asm 2: movdqa 48(<rp=%rcx),>r3=%xmm3
+movdqa 48(%rcx),%xmm3
+
+# qhasm: r4 = *(int128 *)(rp + 64)
+# asm 1: movdqa 64(<rp=int64#4),>r4=int6464#5
+# asm 2: movdqa 64(<rp=%rcx),>r4=%xmm4
+movdqa 64(%rcx),%xmm4
+
+# qhasm: r5 = *(int128 *)(rp + 80)
+# asm 1: movdqa 80(<rp=int64#4),>r5=int6464#6
+# asm 2: movdqa 80(<rp=%rcx),>r5=%xmm5
+movdqa 80(%rcx),%xmm5
+
+# qhasm: r6 = *(int128 *)(rp + 96)
+# asm 1: movdqa 96(<rp=int64#4),>r6=int6464#7
+# asm 2: movdqa 96(<rp=%rcx),>r6=%xmm6
+movdqa 96(%rcx),%xmm6
+
+# qhasm: r7 = *(int128 *)(rp + 112)
+# asm 1: movdqa 112(<rp=int64#4),>r7=int6464#8
+# asm 2: movdqa 112(<rp=%rcx),>r7=%xmm7
+movdqa 112(%rcx),%xmm7
+
+# qhasm: r8 = *(int128 *)(rp + 128)
+# asm 1: movdqa 128(<rp=int64#4),>r8=int6464#9
+# asm 2: movdqa 128(<rp=%rcx),>r8=%xmm8
+movdqa 128(%rcx),%xmm8
+
+# qhasm: r9 = *(int128 *)(rp + 144)
+# asm 1: movdqa 144(<rp=int64#4),>r9=int6464#10
+# asm 2: movdqa 144(<rp=%rcx),>r9=%xmm9
+movdqa 144(%rcx),%xmm9
+
+# qhasm: r10 = *(int128 *)(rp + 160)
+# asm 1: movdqa 160(<rp=int64#4),>r10=int6464#11
+# asm 2: movdqa 160(<rp=%rcx),>r10=%xmm10
+movdqa 160(%rcx),%xmm10
+
+# qhasm: r11 = *(int128 *)(rp + 176)
+# asm 1: movdqa 176(<rp=int64#4),>r11=int6464#12
+# asm 2: movdqa 176(<rp=%rcx),>r11=%xmm11
+movdqa 176(%rcx),%xmm11
+
+# qhasm: 0round = ROUND_ROUND
+# asm 1: movdqa ROUND_ROUND,<0round=int6464#13
+# asm 2: movdqa ROUND_ROUND,<0round=%xmm12
+movdqa ROUND_ROUND,%xmm12
+
+# qhasm: 0carry = r1
+# asm 1: movdqa <r1=int6464#2,>0carry=int6464#14
+# asm 2: movdqa <r1=%xmm1,>0carry=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r2 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r2=int6464#3
+# asm 2: addpd <0carry=%xmm13,<r2=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r1 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r1=int6464#2
+# asm 2: subpd <0carry=%xmm13,<r1=%xmm1
+subpd %xmm13,%xmm1
+
+# qhasm: 0carry = r4
+# asm 1: movdqa <r4=int6464#5,>0carry=int6464#14
+# asm 2: movdqa <r4=%xmm4,>0carry=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r5 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r5=int6464#6
+# asm 2: addpd <0carry=%xmm13,<r5=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r4 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r4=int6464#5
+# asm 2: subpd <0carry=%xmm13,<r4=%xmm4
+subpd %xmm13,%xmm4
+
+# qhasm: 0carry = r7
+# asm 1: movdqa <r7=int6464#8,>0carry=int6464#14
+# asm 2: movdqa <r7=%xmm7,>0carry=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r8 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r8=int6464#9
+# asm 2: addpd <0carry=%xmm13,<r8=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r7 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r7=int6464#8
+# asm 2: subpd <0carry=%xmm13,<r7=%xmm7
+subpd %xmm13,%xmm7
+
+# qhasm: 0carry = r10
+# asm 1: movdqa <r10=int6464#11,>0carry=int6464#14
+# asm 2: movdqa <r10=%xmm10,>0carry=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r11 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r11=int6464#12
+# asm 2: addpd <0carry=%xmm13,<r11=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r10 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r10=int6464#11
+# asm 2: subpd <0carry=%xmm13,<r10=%xmm10
+subpd %xmm13,%xmm10
+
+# qhasm: 0carry = r2
+# asm 1: movdqa <r2=int6464#3,>0carry=int6464#14
+# asm 2: movdqa <r2=%xmm2,>0carry=%xmm13
+movdqa %xmm2,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r3 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r3=int6464#4
+# asm 2: addpd <0carry=%xmm13,<r3=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r2 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r2=int6464#3
+# asm 2: subpd <0carry=%xmm13,<r2=%xmm2
+subpd %xmm13,%xmm2
+
+# qhasm: 0carry = r5
+# asm 1: movdqa <r5=int6464#6,>0carry=int6464#14
+# asm 2: movdqa <r5=%xmm5,>0carry=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r6 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r6=int6464#7
+# asm 2: addpd <0carry=%xmm13,<r6=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r5 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r5=int6464#6
+# asm 2: subpd <0carry=%xmm13,<r5=%xmm5
+subpd %xmm13,%xmm5
+
+# qhasm: 0carry = r8
+# asm 1: movdqa <r8=int6464#9,>0carry=int6464#14
+# asm 2: movdqa <r8=%xmm8,>0carry=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r9 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r9=int6464#10
+# asm 2: addpd <0carry=%xmm13,<r9=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r8 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r8=int6464#9
+# asm 2: subpd <0carry=%xmm13,<r8=%xmm8
+subpd %xmm13,%xmm8
+
+# qhasm: 0carry = r11
+# asm 1: movdqa <r11=int6464#12,>0carry=int6464#14
+# asm 2: movdqa <r11=%xmm11,>0carry=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r0 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r0=int6464#1
+# asm 2: subpd <0carry=%xmm13,<r0=%xmm0
+subpd %xmm13,%xmm0
+
+# qhasm: float6464 r3 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r3=int6464#4
+# asm 2: subpd <0carry=%xmm13,<r3=%xmm3
+subpd %xmm13,%xmm3
+
+# qhasm: 1t6 = 0carry
+# asm 1: movdqa <0carry=int6464#14,>1t6=int6464#15
+# asm 2: movdqa <0carry=%xmm13,>1t6=%xmm14
+movdqa %xmm13,%xmm14
+
+# qhasm: float6464 1t6 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<1t6=int6464#15
+# asm 2: mulpd FOUR_FOUR,<1t6=%xmm14
+mulpd FOUR_FOUR,%xmm14
+
+# qhasm: float6464 r6 -= 1t6
+# asm 1: subpd <1t6=int6464#15,<r6=int6464#7
+# asm 2: subpd <1t6=%xmm14,<r6=%xmm6
+subpd %xmm14,%xmm6
+
+# qhasm: float6464 r9 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r9=int6464#10
+# asm 2: subpd <0carry=%xmm13,<r9=%xmm9
+subpd %xmm13,%xmm9
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r11 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r11=int6464#12
+# asm 2: subpd <0carry=%xmm13,<r11=%xmm11
+subpd %xmm13,%xmm11
+
+# qhasm: 0carry = r0
+# asm 1: movdqa <r0=int6464#1,>0carry=int6464#14
+# asm 2: movdqa <r0=%xmm0,>0carry=%xmm13
+movdqa %xmm0,%xmm13
+
+# qhasm: float6464 0carry *= V6INV_V6INV
+# asm 1: mulpd V6INV_V6INV,<0carry=int6464#14
+# asm 2: mulpd V6INV_V6INV,<0carry=%xmm13
+mulpd V6INV_V6INV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r1 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r1=int6464#2
+# asm 2: addpd <0carry=%xmm13,<r1=%xmm1
+addpd %xmm13,%xmm1
+
+# qhasm: float6464 0carry *= V6_V6
+# asm 1: mulpd V6_V6,<0carry=int6464#14
+# asm 2: mulpd V6_V6,<0carry=%xmm13
+mulpd V6_V6,%xmm13
+
+# qhasm: float6464 r0 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r0=int6464#1
+# asm 2: subpd <0carry=%xmm13,<r0=%xmm0
+subpd %xmm13,%xmm0
+
+# qhasm: 0carry = r3
+# asm 1: movdqa <r3=int6464#4,>0carry=int6464#14
+# asm 2: movdqa <r3=%xmm3,>0carry=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r4 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r4=int6464#5
+# asm 2: addpd <0carry=%xmm13,<r4=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r3 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r3=int6464#4
+# asm 2: subpd <0carry=%xmm13,<r3=%xmm3
+subpd %xmm13,%xmm3
+
+# qhasm: 0carry = r6
+# asm 1: movdqa <r6=int6464#7,>0carry=int6464#14
+# asm 2: movdqa <r6=%xmm6,>0carry=%xmm13
+movdqa %xmm6,%xmm13
+
+# qhasm: float6464 0carry *= V6INV_V6INV
+# asm 1: mulpd V6INV_V6INV,<0carry=int6464#14
+# asm 2: mulpd V6INV_V6INV,<0carry=%xmm13
+mulpd V6INV_V6INV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r7 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r7=int6464#8
+# asm 2: addpd <0carry=%xmm13,<r7=%xmm7
+addpd %xmm13,%xmm7
+
+# qhasm: float6464 0carry *= V6_V6
+# asm 1: mulpd V6_V6,<0carry=int6464#14
+# asm 2: mulpd V6_V6,<0carry=%xmm13
+mulpd V6_V6,%xmm13
+
+# qhasm: float6464 r6 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r6=int6464#7
+# asm 2: subpd <0carry=%xmm13,<r6=%xmm6
+subpd %xmm13,%xmm6
+
+# qhasm: 0carry = r9
+# asm 1: movdqa <r9=int6464#10,>0carry=int6464#14
+# asm 2: movdqa <r9=%xmm9,>0carry=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r10 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r10=int6464#11
+# asm 2: addpd <0carry=%xmm13,<r10=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r9 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r9=int6464#10
+# asm 2: subpd <0carry=%xmm13,<r9=%xmm9
+subpd %xmm13,%xmm9
+
+# qhasm: 0carry = r1
+# asm 1: movdqa <r1=int6464#2,>0carry=int6464#14
+# asm 2: movdqa <r1=%xmm1,>0carry=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r2 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r2=int6464#3
+# asm 2: addpd <0carry=%xmm13,<r2=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r1 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r1=int6464#2
+# asm 2: subpd <0carry=%xmm13,<r1=%xmm1
+subpd %xmm13,%xmm1
+
+# qhasm: 0carry = r4
+# asm 1: movdqa <r4=int6464#5,>0carry=int6464#14
+# asm 2: movdqa <r4=%xmm4,>0carry=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r5 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r5=int6464#6
+# asm 2: addpd <0carry=%xmm13,<r5=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r4 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r4=int6464#5
+# asm 2: subpd <0carry=%xmm13,<r4=%xmm4
+subpd %xmm13,%xmm4
+
+# qhasm: 0carry = r7
+# asm 1: movdqa <r7=int6464#8,>0carry=int6464#14
+# asm 2: movdqa <r7=%xmm7,>0carry=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r8 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r8=int6464#9
+# asm 2: addpd <0carry=%xmm13,<r8=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r7 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r7=int6464#8
+# asm 2: subpd <0carry=%xmm13,<r7=%xmm7
+subpd %xmm13,%xmm7
+
+# qhasm: 0carry = r10
+# asm 1: movdqa <r10=int6464#11,>0carry=int6464#14
+# asm 2: movdqa <r10=%xmm10,>0carry=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 r11 += 0carry
+# asm 1: addpd <0carry=int6464#14,<r11=int6464#12
+# asm 2: addpd <0carry=%xmm13,<r11=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 r10 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<r10=int6464#11
+# asm 2: subpd <0carry=%xmm13,<r10=%xmm10
+subpd %xmm13,%xmm10
+
+# qhasm: *(int128 *)(rop +   0) =  r0
+# asm 1: movdqa <r0=int6464#1,0(<rop=int64#1)
+# asm 2: movdqa <r0=%xmm0,0(<rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: *(int128 *)(rop +  16) =  r1
+# asm 1: movdqa <r1=int6464#2,16(<rop=int64#1)
+# asm 2: movdqa <r1=%xmm1,16(<rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(rop +  32) =  r2
+# asm 1: movdqa <r2=int6464#3,32(<rop=int64#1)
+# asm 2: movdqa <r2=%xmm2,32(<rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: *(int128 *)(rop +  48) =  r3
+# asm 1: movdqa <r3=int6464#4,48(<rop=int64#1)
+# asm 2: movdqa <r3=%xmm3,48(<rop=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm: *(int128 *)(rop +  64) =  r4
+# asm 1: movdqa <r4=int6464#5,64(<rop=int64#1)
+# asm 2: movdqa <r4=%xmm4,64(<rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(rop +  80) =  r5
+# asm 1: movdqa <r5=int6464#6,80(<rop=int64#1)
+# asm 2: movdqa <r5=%xmm5,80(<rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: *(int128 *)(rop +  96) =  r6
+# asm 1: movdqa <r6=int6464#7,96(<rop=int64#1)
+# asm 2: movdqa <r6=%xmm6,96(<rop=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm: *(int128 *)(rop + 112) =  r7
+# asm 1: movdqa <r7=int6464#8,112(<rop=int64#1)
+# asm 2: movdqa <r7=%xmm7,112(<rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(rop + 128) =  r8
+# asm 1: movdqa <r8=int6464#9,128(<rop=int64#1)
+# asm 2: movdqa <r8=%xmm8,128(<rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: *(int128 *)(rop + 144) =  r9
+# asm 1: movdqa <r9=int6464#10,144(<rop=int64#1)
+# asm 2: movdqa <r9=%xmm9,144(<rop=%rdi)
+movdqa %xmm9,144(%rdi)
+
+# qhasm: *(int128 *)(rop + 160) = r10
+# asm 1: movdqa <r10=int6464#11,160(<rop=int64#1)
+# asm 2: movdqa <r10=%xmm10,160(<rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(rop + 176) = r11
+# asm 1: movdqa <r11=int6464#12,176(<rop=int64#1)
+# asm 2: movdqa <r11=%xmm11,176(<rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 626 - 0
dclxvi-20130329/fp2e_short_coeffred.s

@@ -0,0 +1,626 @@
+# File:   dclxvi-20130329/fp2e_short_coeffred.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fp2e_short_coeffred_qhasm
+.text
+.p2align 5
+.globl _fp2e_short_coeffred_qhasm
+.globl fp2e_short_coeffred_qhasm
+_fp2e_short_coeffred_qhasm:
+fp2e_short_coeffred_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: int64 rop
+
+# qhasm: input rop
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: 0r0  = *(int128 *)(rop +   0)
+# asm 1: movdqa 0(<rop=int64#1),>0r0=int6464#1
+# asm 2: movdqa 0(<rop=%rdi),>0r0=%xmm0
+movdqa 0(%rdi),%xmm0
+
+# qhasm: 0r1  = *(int128 *)(rop +  16)
+# asm 1: movdqa 16(<rop=int64#1),>0r1=int6464#2
+# asm 2: movdqa 16(<rop=%rdi),>0r1=%xmm1
+movdqa 16(%rdi),%xmm1
+
+# qhasm: 0r2  = *(int128 *)(rop +  32)
+# asm 1: movdqa 32(<rop=int64#1),>0r2=int6464#3
+# asm 2: movdqa 32(<rop=%rdi),>0r2=%xmm2
+movdqa 32(%rdi),%xmm2
+
+# qhasm: 0r3  = *(int128 *)(rop +  48)
+# asm 1: movdqa 48(<rop=int64#1),>0r3=int6464#4
+# asm 2: movdqa 48(<rop=%rdi),>0r3=%xmm3
+movdqa 48(%rdi),%xmm3
+
+# qhasm: 0r4  = *(int128 *)(rop +  64)
+# asm 1: movdqa 64(<rop=int64#1),>0r4=int6464#5
+# asm 2: movdqa 64(<rop=%rdi),>0r4=%xmm4
+movdqa 64(%rdi),%xmm4
+
+# qhasm: 0r5  = *(int128 *)(rop +  80)
+# asm 1: movdqa 80(<rop=int64#1),>0r5=int6464#6
+# asm 2: movdqa 80(<rop=%rdi),>0r5=%xmm5
+movdqa 80(%rdi),%xmm5
+
+# qhasm: 0r6  = *(int128 *)(rop +  96)
+# asm 1: movdqa 96(<rop=int64#1),>0r6=int6464#7
+# asm 2: movdqa 96(<rop=%rdi),>0r6=%xmm6
+movdqa 96(%rdi),%xmm6
+
+# qhasm: 0r7  = *(int128 *)(rop + 112)
+# asm 1: movdqa 112(<rop=int64#1),>0r7=int6464#8
+# asm 2: movdqa 112(<rop=%rdi),>0r7=%xmm7
+movdqa 112(%rdi),%xmm7
+
+# qhasm: 0r8  = *(int128 *)(rop + 128)
+# asm 1: movdqa 128(<rop=int64#1),>0r8=int6464#9
+# asm 2: movdqa 128(<rop=%rdi),>0r8=%xmm8
+movdqa 128(%rdi),%xmm8
+
+# qhasm: 0r9  = *(int128 *)(rop + 144)
+# asm 1: movdqa 144(<rop=int64#1),>0r9=int6464#10
+# asm 2: movdqa 144(<rop=%rdi),>0r9=%xmm9
+movdqa 144(%rdi),%xmm9
+
+# qhasm: 0r10 = *(int128 *)(rop + 160)
+# asm 1: movdqa 160(<rop=int64#1),>0r10=int6464#11
+# asm 2: movdqa 160(<rop=%rdi),>0r10=%xmm10
+movdqa 160(%rdi),%xmm10
+
+# qhasm: 0r11 = *(int128 *)(rop + 176)
+# asm 1: movdqa 176(<rop=int64#1),>0r11=int6464#12
+# asm 2: movdqa 176(<rop=%rdi),>0r11=%xmm11
+movdqa 176(%rdi),%xmm11
+
+# qhasm: int6464 0round
+
+# qhasm: int6464 0carry
+
+# qhasm: int6464 0t6
+
+# qhasm: 0round = ROUND_ROUND
+# asm 1: movdqa ROUND_ROUND,<0round=int6464#13
+# asm 2: movdqa ROUND_ROUND,<0round=%xmm12
+movdqa ROUND_ROUND,%xmm12
+
+# qhasm: 0carry = 0r11
+# asm 1: movdqa <0r11=int6464#12,>0carry=int6464#14
+# asm 2: movdqa <0r11=%xmm11,>0carry=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 0r0 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<0r0=int6464#1
+# asm 2: subpd <0carry=%xmm13,<0r0=%xmm0
+subpd %xmm13,%xmm0
+
+# qhasm: float6464 0r3 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<0r3=int6464#4
+# asm 2: subpd <0carry=%xmm13,<0r3=%xmm3
+subpd %xmm13,%xmm3
+
+# qhasm: 0t6 = 0carry
+# asm 1: movdqa <0carry=int6464#14,>0t6=int6464#15
+# asm 2: movdqa <0carry=%xmm13,>0t6=%xmm14
+movdqa %xmm13,%xmm14
+
+# qhasm: float6464 0t6 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<0t6=int6464#15
+# asm 2: mulpd FOUR_FOUR,<0t6=%xmm14
+mulpd FOUR_FOUR,%xmm14
+
+# qhasm: float6464 0r6 -= 0t6
+# asm 1: subpd <0t6=int6464#15,<0r6=int6464#7
+# asm 2: subpd <0t6=%xmm14,<0r6=%xmm6
+subpd %xmm14,%xmm6
+
+# qhasm: float6464 0r9 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<0r9=int6464#10
+# asm 2: subpd <0carry=%xmm13,<0r9=%xmm9
+subpd %xmm13,%xmm9
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 0r11 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<0r11=int6464#12
+# asm 2: subpd <0carry=%xmm13,<0r11=%xmm11
+subpd %xmm13,%xmm11
+
+# qhasm: 0carry = 0r1
+# asm 1: movdqa <0r1=int6464#2,>0carry=int6464#14
+# asm 2: movdqa <0r1=%xmm1,>0carry=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 0r2 += 0carry
+# asm 1: addpd <0carry=int6464#14,<0r2=int6464#3
+# asm 2: addpd <0carry=%xmm13,<0r2=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 0r1 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<0r1=int6464#2
+# asm 2: subpd <0carry=%xmm13,<0r1=%xmm1
+subpd %xmm13,%xmm1
+
+# qhasm: 0carry = 0r3
+# asm 1: movdqa <0r3=int6464#4,>0carry=int6464#14
+# asm 2: movdqa <0r3=%xmm3,>0carry=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 0r4 += 0carry
+# asm 1: addpd <0carry=int6464#14,<0r4=int6464#5
+# asm 2: addpd <0carry=%xmm13,<0r4=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 0r3 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<0r3=int6464#4
+# asm 2: subpd <0carry=%xmm13,<0r3=%xmm3
+subpd %xmm13,%xmm3
+
+# qhasm: 0carry = 0r5
+# asm 1: movdqa <0r5=int6464#6,>0carry=int6464#14
+# asm 2: movdqa <0r5=%xmm5,>0carry=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 0r6 += 0carry
+# asm 1: addpd <0carry=int6464#14,<0r6=int6464#7
+# asm 2: addpd <0carry=%xmm13,<0r6=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 0r5 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<0r5=int6464#6
+# asm 2: subpd <0carry=%xmm13,<0r5=%xmm5
+subpd %xmm13,%xmm5
+
+# qhasm: 0carry = 0r7
+# asm 1: movdqa <0r7=int6464#8,>0carry=int6464#14
+# asm 2: movdqa <0r7=%xmm7,>0carry=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 0r8 += 0carry
+# asm 1: addpd <0carry=int6464#14,<0r8=int6464#9
+# asm 2: addpd <0carry=%xmm13,<0r8=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 0r7 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<0r7=int6464#8
+# asm 2: subpd <0carry=%xmm13,<0r7=%xmm7
+subpd %xmm13,%xmm7
+
+# qhasm: 0carry = 0r9
+# asm 1: movdqa <0r9=int6464#10,>0carry=int6464#14
+# asm 2: movdqa <0r9=%xmm9,>0carry=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 0r10 += 0carry
+# asm 1: addpd <0carry=int6464#14,<0r10=int6464#11
+# asm 2: addpd <0carry=%xmm13,<0r10=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 0r9 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<0r9=int6464#10
+# asm 2: subpd <0carry=%xmm13,<0r9=%xmm9
+subpd %xmm13,%xmm9
+
+# qhasm: 0carry = 0r0
+# asm 1: movdqa <0r0=int6464#1,>0carry=int6464#14
+# asm 2: movdqa <0r0=%xmm0,>0carry=%xmm13
+movdqa %xmm0,%xmm13
+
+# qhasm: float6464 0carry *= V6INV_V6INV
+# asm 1: mulpd V6INV_V6INV,<0carry=int6464#14
+# asm 2: mulpd V6INV_V6INV,<0carry=%xmm13
+mulpd V6INV_V6INV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 0r1 += 0carry
+# asm 1: addpd <0carry=int6464#14,<0r1=int6464#2
+# asm 2: addpd <0carry=%xmm13,<0r1=%xmm1
+addpd %xmm13,%xmm1
+
+# qhasm: float6464 0carry *= V6_V6
+# asm 1: mulpd V6_V6,<0carry=int6464#14
+# asm 2: mulpd V6_V6,<0carry=%xmm13
+mulpd V6_V6,%xmm13
+
+# qhasm: float6464 0r0 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<0r0=int6464#1
+# asm 2: subpd <0carry=%xmm13,<0r0=%xmm0
+subpd %xmm13,%xmm0
+
+# qhasm: 0carry = 0r2
+# asm 1: movdqa <0r2=int6464#3,>0carry=int6464#14
+# asm 2: movdqa <0r2=%xmm2,>0carry=%xmm13
+movdqa %xmm2,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 0r3 += 0carry
+# asm 1: addpd <0carry=int6464#14,<0r3=int6464#4
+# asm 2: addpd <0carry=%xmm13,<0r3=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 0r2 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<0r2=int6464#3
+# asm 2: subpd <0carry=%xmm13,<0r2=%xmm2
+subpd %xmm13,%xmm2
+
+# qhasm: 0carry = 0r4
+# asm 1: movdqa <0r4=int6464#5,>0carry=int6464#14
+# asm 2: movdqa <0r4=%xmm4,>0carry=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 0r5 += 0carry
+# asm 1: addpd <0carry=int6464#14,<0r5=int6464#6
+# asm 2: addpd <0carry=%xmm13,<0r5=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 0r4 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<0r4=int6464#5
+# asm 2: subpd <0carry=%xmm13,<0r4=%xmm4
+subpd %xmm13,%xmm4
+
+# qhasm: 0carry = 0r6
+# asm 1: movdqa <0r6=int6464#7,>0carry=int6464#14
+# asm 2: movdqa <0r6=%xmm6,>0carry=%xmm13
+movdqa %xmm6,%xmm13
+
+# qhasm: float6464 0carry *= V6INV_V6INV
+# asm 1: mulpd V6INV_V6INV,<0carry=int6464#14
+# asm 2: mulpd V6INV_V6INV,<0carry=%xmm13
+mulpd V6INV_V6INV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 0r7 += 0carry
+# asm 1: addpd <0carry=int6464#14,<0r7=int6464#8
+# asm 2: addpd <0carry=%xmm13,<0r7=%xmm7
+addpd %xmm13,%xmm7
+
+# qhasm: float6464 0carry *= V6_V6
+# asm 1: mulpd V6_V6,<0carry=int6464#14
+# asm 2: mulpd V6_V6,<0carry=%xmm13
+mulpd V6_V6,%xmm13
+
+# qhasm: float6464 0r6 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<0r6=int6464#7
+# asm 2: subpd <0carry=%xmm13,<0r6=%xmm6
+subpd %xmm13,%xmm6
+
+# qhasm: 0carry = 0r8
+# asm 1: movdqa <0r8=int6464#9,>0carry=int6464#14
+# asm 2: movdqa <0r8=%xmm8,>0carry=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 0r9 += 0carry
+# asm 1: addpd <0carry=int6464#14,<0r9=int6464#10
+# asm 2: addpd <0carry=%xmm13,<0r9=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 0r8 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<0r8=int6464#9
+# asm 2: subpd <0carry=%xmm13,<0r8=%xmm8
+subpd %xmm13,%xmm8
+
+# qhasm: 0carry = 0r10
+# asm 1: movdqa <0r10=int6464#11,>0carry=int6464#14
+# asm 2: movdqa <0r10=%xmm10,>0carry=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 0carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<0carry=int6464#14
+# asm 2: mulpd VINV_VINV,<0carry=%xmm13
+mulpd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry += 0round
+# asm 1: addpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addpd <0round=%xmm12,<0carry=%xmm13
+addpd %xmm12,%xmm13
+
+# qhasm: float6464 0carry -= 0round
+# asm 1: subpd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subpd <0round=%xmm12,<0carry=%xmm13
+subpd %xmm12,%xmm13
+
+# qhasm: float6464 0r11 += 0carry
+# asm 1: addpd <0carry=int6464#14,<0r11=int6464#12
+# asm 2: addpd <0carry=%xmm13,<0r11=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: float6464 0carry *= V_V
+# asm 1: mulpd V_V,<0carry=int6464#14
+# asm 2: mulpd V_V,<0carry=%xmm13
+mulpd V_V,%xmm13
+
+# qhasm: float6464 0r10 -= 0carry
+# asm 1: subpd <0carry=int6464#14,<0r10=int6464#11
+# asm 2: subpd <0carry=%xmm13,<0r10=%xmm10
+subpd %xmm13,%xmm10
+
+# qhasm: *(int128 *)(rop +   0) =  0r0
+# asm 1: movdqa <0r0=int6464#1,0(<rop=int64#1)
+# asm 2: movdqa <0r0=%xmm0,0(<rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: *(int128 *)(rop +  16) =  0r1
+# asm 1: movdqa <0r1=int6464#2,16(<rop=int64#1)
+# asm 2: movdqa <0r1=%xmm1,16(<rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(rop +  32) =  0r2
+# asm 1: movdqa <0r2=int6464#3,32(<rop=int64#1)
+# asm 2: movdqa <0r2=%xmm2,32(<rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: *(int128 *)(rop +  48) =  0r3
+# asm 1: movdqa <0r3=int6464#4,48(<rop=int64#1)
+# asm 2: movdqa <0r3=%xmm3,48(<rop=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm: *(int128 *)(rop +  64) =  0r4
+# asm 1: movdqa <0r4=int6464#5,64(<rop=int64#1)
+# asm 2: movdqa <0r4=%xmm4,64(<rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(rop +  80) =  0r5
+# asm 1: movdqa <0r5=int6464#6,80(<rop=int64#1)
+# asm 2: movdqa <0r5=%xmm5,80(<rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: *(int128 *)(rop +  96) =  0r6
+# asm 1: movdqa <0r6=int6464#7,96(<rop=int64#1)
+# asm 2: movdqa <0r6=%xmm6,96(<rop=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm: *(int128 *)(rop + 112) =  0r7
+# asm 1: movdqa <0r7=int6464#8,112(<rop=int64#1)
+# asm 2: movdqa <0r7=%xmm7,112(<rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(rop + 128) =  0r8
+# asm 1: movdqa <0r8=int6464#9,128(<rop=int64#1)
+# asm 2: movdqa <0r8=%xmm8,128(<rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: *(int128 *)(rop + 144) =  0r9
+# asm 1: movdqa <0r9=int6464#10,144(<rop=int64#1)
+# asm 2: movdqa <0r9=%xmm9,144(<rop=%rdi)
+movdqa %xmm9,144(%rdi)
+
+# qhasm: *(int128 *)(rop + 160) = 0r10
+# asm 1: movdqa <0r10=int6464#11,160(<rop=int64#1)
+# asm 2: movdqa <0r10=%xmm10,160(<rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(rop + 176) = 0r11
+# asm 1: movdqa <0r11=int6464#12,176(<rop=int64#1)
+# asm 2: movdqa <0r11=%xmm11,176(<rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 4362 - 0
dclxvi-20130329/fp2e_square.s

@@ -0,0 +1,4362 @@
+# File:   dclxvi-20130329/fp2e_square.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: int64 rop
+
+# qhasm: int64 op
+
+# qhasm: input rop
+
+# qhasm: input op
+
+# qhasm: int6464 r0
+
+# qhasm: int6464 r1
+
+# qhasm: int6464 r2
+
+# qhasm: int6464 r3
+
+# qhasm: int6464 r4
+
+# qhasm: int6464 r5
+
+# qhasm: int6464 r6
+
+# qhasm: int6464 r7
+
+# qhasm: int6464 r8
+
+# qhasm: int6464 r9
+
+# qhasm: int6464 r10
+
+# qhasm: int6464 r11
+
+# qhasm: int6464 r12
+
+# qhasm: int6464 r13
+
+# qhasm: int6464 r14
+
+# qhasm: int6464 r15
+
+# qhasm: int6464 r16
+
+# qhasm: int6464 r17
+
+# qhasm: int6464 r18
+
+# qhasm: int6464 r19
+
+# qhasm: int6464 r20
+
+# qhasm: int6464 r21
+
+# qhasm: int6464 r22
+
+# qhasm: int6464 tmp0
+
+# qhasm: int6464 tmp1
+
+# qhasm: int6464 tmp2
+
+# qhasm: int6464 tmp3
+
+# qhasm: int6464 tmp4
+
+# qhasm: int6464 tmp5
+
+# qhasm: int6464 tmp6
+
+# qhasm: int6464 tmp7
+
+# qhasm: int6464 tmp8
+
+# qhasm: int6464 tmp9
+
+# qhasm: int6464 tmp10
+
+# qhasm: int6464 tmp11
+
+# qhasm: int64 t1p
+
+# qhasm: int64 t2p
+
+# qhasm: int64 rp
+
+# qhasm: int6464 0yoff
+
+# qhasm: int6464 t0
+
+# qhasm: int6464 t1
+
+# qhasm: int6464 t2
+
+# qhasm: int6464 t3
+
+# qhasm: int6464 t4
+
+# qhasm: int6464 t5
+
+# qhasm: int6464 t6
+
+# qhasm: int6464 t7
+
+# qhasm: int6464 t8
+
+# qhasm: int6464 t9
+
+# qhasm: int6464 t10
+
+# qhasm: int6464 t11
+
+# qhasm: int6464 t12
+
+# qhasm: int6464 t13
+
+# qhasm: int6464 t14
+
+# qhasm: int6464 t15
+
+# qhasm: int6464 t16
+
+# qhasm: int6464 t17
+
+# qhasm: int6464 t18
+
+# qhasm: int6464 t19
+
+# qhasm: int6464 t20
+
+# qhasm: int6464 t21
+
+# qhasm: int6464 t22
+
+# qhasm: int6464 ab0
+
+# qhasm: int6464 ab1
+
+# qhasm: int6464 ab2
+
+# qhasm: int6464 ab3
+
+# qhasm: int6464 ab4
+
+# qhasm: int6464 ab5
+
+# qhasm: int6464 ab6
+
+# qhasm: int6464 ab7
+
+# qhasm: int6464 ab8
+
+# qhasm: int6464 ab9
+
+# qhasm: int6464 ab10
+
+# qhasm: int6464 ab11
+
+# qhasm: int6464 ab0six
+
+# qhasm: int6464 ab1six
+
+# qhasm: int6464 ab2six
+
+# qhasm: int6464 ab3six
+
+# qhasm: int6464 ab4six
+
+# qhasm: int6464 ab5six
+
+# qhasm: int6464 ab6six
+
+# qhasm: int6464 ab7six
+
+# qhasm: int6464 ab8six
+
+# qhasm: int6464 ab9six
+
+# qhasm: int6464 ab10six
+
+# qhasm: int6464 ab11six
+
+# qhasm: int64 myp
+
+# qhasm: int6464 round
+
+# qhasm: int6464 carry
+
+# qhasm: int6464 2t6
+
+# qhasm: stack6144 mystack
+
+# qhasm: enter fp2e_square_qhasm
+.text
+.p2align 5
+.globl _fp2e_square_qhasm
+.globl fp2e_square_qhasm
+_fp2e_square_qhasm:
+fp2e_square_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $768,%r11
+sub %r11,%rsp
+
+# qhasm: myp = &mystack
+# asm 1: leaq <mystack=stack6144#1,>myp=int64#3
+# asm 2: leaq <mystack=0(%rsp),>myp=%rdx
+leaq 0(%rsp),%rdx
+
+# qhasm: r0  = *(int128 *)(op +   0)
+# asm 1: movdqa 0(<op=int64#2),>r0=int6464#1
+# asm 2: movdqa 0(<op=%rsi),>r0=%xmm0
+movdqa 0(%rsi),%xmm0
+
+# qhasm: tmp0 = r0                                                          
+# asm 1: movdqa <r0=int6464#1,>tmp0=int6464#2
+# asm 2: movdqa <r0=%xmm0,>tmp0=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: tmp0 = shuffle float64 of tmp0 and tmp0 by 0x1                   
+# asm 1: shufpd $0x1,<tmp0=int6464#2,<tmp0=int6464#2
+# asm 2: shufpd $0x1,<tmp0=%xmm1,<tmp0=%xmm1
+shufpd $0x1,%xmm1,%xmm1
+
+# qhasm: float6464 r0[0] -= tmp0[0]                                       
+# asm 1: subsd <tmp0=int6464#2,<r0=int6464#1
+# asm 2: subsd <tmp0=%xmm1,<r0=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(myp + 0) = r0
+# asm 1: movdqa <r0=int6464#1,0(<myp=int64#3)
+# asm 2: movdqa <r0=%xmm0,0(<myp=%rdx)
+movdqa %xmm0,0(%rdx)
+
+# qhasm: r0 = tmp0
+# asm 1: movdqa <tmp0=int6464#2,>r0=int6464#1
+# asm 2: movdqa <tmp0=%xmm1,>r0=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: r0 = unpack high double of r0 and r0                      
+# asm 1: unpckhpd <r0=int6464#1,<r0=int6464#1
+# asm 2: unpckhpd <r0=%xmm0,<r0=%xmm0
+unpckhpd %xmm0,%xmm0
+
+# qhasm: float6464 tmp0 += r0
+# asm 1: addpd <r0=int6464#1,<tmp0=int6464#2
+# asm 2: addpd <r0=%xmm0,<tmp0=%xmm1
+addpd %xmm0,%xmm1
+
+# qhasm: *(int128 *)(myp + 192) = tmp0
+# asm 1: movdqa <tmp0=int6464#2,192(<myp=int64#3)
+# asm 2: movdqa <tmp0=%xmm1,192(<myp=%rdx)
+movdqa %xmm1,192(%rdx)
+
+# qhasm: r1  = *(int128 *)(op +  16)
+# asm 1: movdqa 16(<op=int64#2),>r1=int6464#1
+# asm 2: movdqa 16(<op=%rsi),>r1=%xmm0
+movdqa 16(%rsi),%xmm0
+
+# qhasm: tmp1 = r1                                                          
+# asm 1: movdqa <r1=int6464#1,>tmp1=int6464#2
+# asm 2: movdqa <r1=%xmm0,>tmp1=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: tmp1 = shuffle float64 of tmp1 and tmp1 by 0x1                   
+# asm 1: shufpd $0x1,<tmp1=int6464#2,<tmp1=int6464#2
+# asm 2: shufpd $0x1,<tmp1=%xmm1,<tmp1=%xmm1
+shufpd $0x1,%xmm1,%xmm1
+
+# qhasm: float6464 r1[0] -= tmp1[0]                                       
+# asm 1: subsd <tmp1=int6464#2,<r1=int6464#1
+# asm 2: subsd <tmp1=%xmm1,<r1=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(myp + 16) = r1
+# asm 1: movdqa <r1=int6464#1,16(<myp=int64#3)
+# asm 2: movdqa <r1=%xmm0,16(<myp=%rdx)
+movdqa %xmm0,16(%rdx)
+
+# qhasm: r1 = tmp1
+# asm 1: movdqa <tmp1=int6464#2,>r1=int6464#1
+# asm 2: movdqa <tmp1=%xmm1,>r1=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: r1 = unpack high double of r1 and r1                      
+# asm 1: unpckhpd <r1=int6464#1,<r1=int6464#1
+# asm 2: unpckhpd <r1=%xmm0,<r1=%xmm0
+unpckhpd %xmm0,%xmm0
+
+# qhasm: float6464 tmp1 += r1
+# asm 1: addpd <r1=int6464#1,<tmp1=int6464#2
+# asm 2: addpd <r1=%xmm0,<tmp1=%xmm1
+addpd %xmm0,%xmm1
+
+# qhasm: *(int128 *)(myp + 208) = tmp1
+# asm 1: movdqa <tmp1=int6464#2,208(<myp=int64#3)
+# asm 2: movdqa <tmp1=%xmm1,208(<myp=%rdx)
+movdqa %xmm1,208(%rdx)
+
+# qhasm: r2  = *(int128 *)(op +  32)
+# asm 1: movdqa 32(<op=int64#2),>r2=int6464#1
+# asm 2: movdqa 32(<op=%rsi),>r2=%xmm0
+movdqa 32(%rsi),%xmm0
+
+# qhasm: tmp2 = r2                                                          
+# asm 1: movdqa <r2=int6464#1,>tmp2=int6464#2
+# asm 2: movdqa <r2=%xmm0,>tmp2=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: tmp2 = shuffle float64 of tmp2 and tmp2 by 0x1                   
+# asm 1: shufpd $0x1,<tmp2=int6464#2,<tmp2=int6464#2
+# asm 2: shufpd $0x1,<tmp2=%xmm1,<tmp2=%xmm1
+shufpd $0x1,%xmm1,%xmm1
+
+# qhasm: float6464 r2[0] -= tmp2[0]                                       
+# asm 1: subsd <tmp2=int6464#2,<r2=int6464#1
+# asm 2: subsd <tmp2=%xmm1,<r2=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(myp + 32) = r2
+# asm 1: movdqa <r2=int6464#1,32(<myp=int64#3)
+# asm 2: movdqa <r2=%xmm0,32(<myp=%rdx)
+movdqa %xmm0,32(%rdx)
+
+# qhasm: r2 = tmp2
+# asm 1: movdqa <tmp2=int6464#2,>r2=int6464#1
+# asm 2: movdqa <tmp2=%xmm1,>r2=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: r2 = unpack high double of r2 and r2                      
+# asm 1: unpckhpd <r2=int6464#1,<r2=int6464#1
+# asm 2: unpckhpd <r2=%xmm0,<r2=%xmm0
+unpckhpd %xmm0,%xmm0
+
+# qhasm: float6464 tmp2 += r2
+# asm 1: addpd <r2=int6464#1,<tmp2=int6464#2
+# asm 2: addpd <r2=%xmm0,<tmp2=%xmm1
+addpd %xmm0,%xmm1
+
+# qhasm: *(int128 *)(myp + 224) = tmp2
+# asm 1: movdqa <tmp2=int6464#2,224(<myp=int64#3)
+# asm 2: movdqa <tmp2=%xmm1,224(<myp=%rdx)
+movdqa %xmm1,224(%rdx)
+
+# qhasm: r3  = *(int128 *)(op +  48)
+# asm 1: movdqa 48(<op=int64#2),>r3=int6464#1
+# asm 2: movdqa 48(<op=%rsi),>r3=%xmm0
+movdqa 48(%rsi),%xmm0
+
+# qhasm: tmp3 = r3                                                          
+# asm 1: movdqa <r3=int6464#1,>tmp3=int6464#2
+# asm 2: movdqa <r3=%xmm0,>tmp3=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: tmp3 = shuffle float64 of tmp3 and tmp3 by 0x1                   
+# asm 1: shufpd $0x1,<tmp3=int6464#2,<tmp3=int6464#2
+# asm 2: shufpd $0x1,<tmp3=%xmm1,<tmp3=%xmm1
+shufpd $0x1,%xmm1,%xmm1
+
+# qhasm: float6464 r3[0] -= tmp3[0]                                       
+# asm 1: subsd <tmp3=int6464#2,<r3=int6464#1
+# asm 2: subsd <tmp3=%xmm1,<r3=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(myp + 48) = r3
+# asm 1: movdqa <r3=int6464#1,48(<myp=int64#3)
+# asm 2: movdqa <r3=%xmm0,48(<myp=%rdx)
+movdqa %xmm0,48(%rdx)
+
+# qhasm: r3 = tmp3
+# asm 1: movdqa <tmp3=int6464#2,>r3=int6464#1
+# asm 2: movdqa <tmp3=%xmm1,>r3=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: r3 = unpack high double of r3 and r3                      
+# asm 1: unpckhpd <r3=int6464#1,<r3=int6464#1
+# asm 2: unpckhpd <r3=%xmm0,<r3=%xmm0
+unpckhpd %xmm0,%xmm0
+
+# qhasm: float6464 tmp3 += r3
+# asm 1: addpd <r3=int6464#1,<tmp3=int6464#2
+# asm 2: addpd <r3=%xmm0,<tmp3=%xmm1
+addpd %xmm0,%xmm1
+
+# qhasm: *(int128 *)(myp + 240) = tmp3
+# asm 1: movdqa <tmp3=int6464#2,240(<myp=int64#3)
+# asm 2: movdqa <tmp3=%xmm1,240(<myp=%rdx)
+movdqa %xmm1,240(%rdx)
+
+# qhasm: r4  = *(int128 *)(op +  64)
+# asm 1: movdqa 64(<op=int64#2),>r4=int6464#1
+# asm 2: movdqa 64(<op=%rsi),>r4=%xmm0
+movdqa 64(%rsi),%xmm0
+
+# qhasm: tmp4 = r4                                                          
+# asm 1: movdqa <r4=int6464#1,>tmp4=int6464#2
+# asm 2: movdqa <r4=%xmm0,>tmp4=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: tmp4 = shuffle float64 of tmp4 and tmp4 by 0x1                   
+# asm 1: shufpd $0x1,<tmp4=int6464#2,<tmp4=int6464#2
+# asm 2: shufpd $0x1,<tmp4=%xmm1,<tmp4=%xmm1
+shufpd $0x1,%xmm1,%xmm1
+
+# qhasm: float6464 r4[0] -= tmp4[0]                                       
+# asm 1: subsd <tmp4=int6464#2,<r4=int6464#1
+# asm 2: subsd <tmp4=%xmm1,<r4=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(myp + 64) = r4
+# asm 1: movdqa <r4=int6464#1,64(<myp=int64#3)
+# asm 2: movdqa <r4=%xmm0,64(<myp=%rdx)
+movdqa %xmm0,64(%rdx)
+
+# qhasm: r4 = tmp4
+# asm 1: movdqa <tmp4=int6464#2,>r4=int6464#1
+# asm 2: movdqa <tmp4=%xmm1,>r4=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: r4 = unpack high double of r4 and r4                      
+# asm 1: unpckhpd <r4=int6464#1,<r4=int6464#1
+# asm 2: unpckhpd <r4=%xmm0,<r4=%xmm0
+unpckhpd %xmm0,%xmm0
+
+# qhasm: float6464 tmp4 += r4
+# asm 1: addpd <r4=int6464#1,<tmp4=int6464#2
+# asm 2: addpd <r4=%xmm0,<tmp4=%xmm1
+addpd %xmm0,%xmm1
+
+# qhasm: *(int128 *)(myp + 256) = tmp4
+# asm 1: movdqa <tmp4=int6464#2,256(<myp=int64#3)
+# asm 2: movdqa <tmp4=%xmm1,256(<myp=%rdx)
+movdqa %xmm1,256(%rdx)
+
+# qhasm: r5  = *(int128 *)(op +  80)
+# asm 1: movdqa 80(<op=int64#2),>r5=int6464#1
+# asm 2: movdqa 80(<op=%rsi),>r5=%xmm0
+movdqa 80(%rsi),%xmm0
+
+# qhasm: tmp5 = r5                                                          
+# asm 1: movdqa <r5=int6464#1,>tmp5=int6464#2
+# asm 2: movdqa <r5=%xmm0,>tmp5=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: tmp5 = shuffle float64 of tmp5 and tmp5 by 0x1                   
+# asm 1: shufpd $0x1,<tmp5=int6464#2,<tmp5=int6464#2
+# asm 2: shufpd $0x1,<tmp5=%xmm1,<tmp5=%xmm1
+shufpd $0x1,%xmm1,%xmm1
+
+# qhasm: float6464 r5[0] -= tmp5[0]                                       
+# asm 1: subsd <tmp5=int6464#2,<r5=int6464#1
+# asm 2: subsd <tmp5=%xmm1,<r5=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(myp + 80) = r5
+# asm 1: movdqa <r5=int6464#1,80(<myp=int64#3)
+# asm 2: movdqa <r5=%xmm0,80(<myp=%rdx)
+movdqa %xmm0,80(%rdx)
+
+# qhasm: r5 = tmp5
+# asm 1: movdqa <tmp5=int6464#2,>r5=int6464#1
+# asm 2: movdqa <tmp5=%xmm1,>r5=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: r5 = unpack high double of r5 and r5                      
+# asm 1: unpckhpd <r5=int6464#1,<r5=int6464#1
+# asm 2: unpckhpd <r5=%xmm0,<r5=%xmm0
+unpckhpd %xmm0,%xmm0
+
+# qhasm: float6464 tmp5 += r5
+# asm 1: addpd <r5=int6464#1,<tmp5=int6464#2
+# asm 2: addpd <r5=%xmm0,<tmp5=%xmm1
+addpd %xmm0,%xmm1
+
+# qhasm: *(int128 *)(myp + 272) = tmp5
+# asm 1: movdqa <tmp5=int6464#2,272(<myp=int64#3)
+# asm 2: movdqa <tmp5=%xmm1,272(<myp=%rdx)
+movdqa %xmm1,272(%rdx)
+
+# qhasm: r6  = *(int128 *)(op +  96)
+# asm 1: movdqa 96(<op=int64#2),>r6=int6464#1
+# asm 2: movdqa 96(<op=%rsi),>r6=%xmm0
+movdqa 96(%rsi),%xmm0
+
+# qhasm: tmp6 = r6                                                          
+# asm 1: movdqa <r6=int6464#1,>tmp6=int6464#2
+# asm 2: movdqa <r6=%xmm0,>tmp6=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: tmp6 = shuffle float64 of tmp6 and tmp6 by 0x1                   
+# asm 1: shufpd $0x1,<tmp6=int6464#2,<tmp6=int6464#2
+# asm 2: shufpd $0x1,<tmp6=%xmm1,<tmp6=%xmm1
+shufpd $0x1,%xmm1,%xmm1
+
+# qhasm: float6464 r6[0] -= tmp6[0]                                       
+# asm 1: subsd <tmp6=int6464#2,<r6=int6464#1
+# asm 2: subsd <tmp6=%xmm1,<r6=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(myp + 96) = r6
+# asm 1: movdqa <r6=int6464#1,96(<myp=int64#3)
+# asm 2: movdqa <r6=%xmm0,96(<myp=%rdx)
+movdqa %xmm0,96(%rdx)
+
+# qhasm: r6 = tmp6
+# asm 1: movdqa <tmp6=int6464#2,>r6=int6464#1
+# asm 2: movdqa <tmp6=%xmm1,>r6=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: r6 = unpack high double of r6 and r6                      
+# asm 1: unpckhpd <r6=int6464#1,<r6=int6464#1
+# asm 2: unpckhpd <r6=%xmm0,<r6=%xmm0
+unpckhpd %xmm0,%xmm0
+
+# qhasm: float6464 tmp6 += r6
+# asm 1: addpd <r6=int6464#1,<tmp6=int6464#2
+# asm 2: addpd <r6=%xmm0,<tmp6=%xmm1
+addpd %xmm0,%xmm1
+
+# qhasm: *(int128 *)(myp + 288) = tmp6
+# asm 1: movdqa <tmp6=int6464#2,288(<myp=int64#3)
+# asm 2: movdqa <tmp6=%xmm1,288(<myp=%rdx)
+movdqa %xmm1,288(%rdx)
+
+# qhasm: r7  = *(int128 *)(op + 112)
+# asm 1: movdqa 112(<op=int64#2),>r7=int6464#1
+# asm 2: movdqa 112(<op=%rsi),>r7=%xmm0
+movdqa 112(%rsi),%xmm0
+
+# qhasm: tmp7 = r7                                                          
+# asm 1: movdqa <r7=int6464#1,>tmp7=int6464#2
+# asm 2: movdqa <r7=%xmm0,>tmp7=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: tmp7 = shuffle float64 of tmp7 and tmp7 by 0x1                   
+# asm 1: shufpd $0x1,<tmp7=int6464#2,<tmp7=int6464#2
+# asm 2: shufpd $0x1,<tmp7=%xmm1,<tmp7=%xmm1
+shufpd $0x1,%xmm1,%xmm1
+
+# qhasm: float6464 r7[0] -= tmp7[0]                                       
+# asm 1: subsd <tmp7=int6464#2,<r7=int6464#1
+# asm 2: subsd <tmp7=%xmm1,<r7=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(myp + 112) = r7
+# asm 1: movdqa <r7=int6464#1,112(<myp=int64#3)
+# asm 2: movdqa <r7=%xmm0,112(<myp=%rdx)
+movdqa %xmm0,112(%rdx)
+
+# qhasm: r7 = tmp7
+# asm 1: movdqa <tmp7=int6464#2,>r7=int6464#1
+# asm 2: movdqa <tmp7=%xmm1,>r7=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: r7 = unpack high double of r7 and r7                      
+# asm 1: unpckhpd <r7=int6464#1,<r7=int6464#1
+# asm 2: unpckhpd <r7=%xmm0,<r7=%xmm0
+unpckhpd %xmm0,%xmm0
+
+# qhasm: float6464 tmp7 += r7
+# asm 1: addpd <r7=int6464#1,<tmp7=int6464#2
+# asm 2: addpd <r7=%xmm0,<tmp7=%xmm1
+addpd %xmm0,%xmm1
+
+# qhasm: *(int128 *)(myp + 304) = tmp7
+# asm 1: movdqa <tmp7=int6464#2,304(<myp=int64#3)
+# asm 2: movdqa <tmp7=%xmm1,304(<myp=%rdx)
+movdqa %xmm1,304(%rdx)
+
+# qhasm: r8  = *(int128 *)(op + 128)
+# asm 1: movdqa 128(<op=int64#2),>r8=int6464#1
+# asm 2: movdqa 128(<op=%rsi),>r8=%xmm0
+movdqa 128(%rsi),%xmm0
+
+# qhasm: tmp8 = r8                                                          
+# asm 1: movdqa <r8=int6464#1,>tmp8=int6464#2
+# asm 2: movdqa <r8=%xmm0,>tmp8=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: tmp8 = shuffle float64 of tmp8 and tmp8 by 0x1                   
+# asm 1: shufpd $0x1,<tmp8=int6464#2,<tmp8=int6464#2
+# asm 2: shufpd $0x1,<tmp8=%xmm1,<tmp8=%xmm1
+shufpd $0x1,%xmm1,%xmm1
+
+# qhasm: float6464 r8[0] -= tmp8[0]                                       
+# asm 1: subsd <tmp8=int6464#2,<r8=int6464#1
+# asm 2: subsd <tmp8=%xmm1,<r8=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(myp + 128) = r8
+# asm 1: movdqa <r8=int6464#1,128(<myp=int64#3)
+# asm 2: movdqa <r8=%xmm0,128(<myp=%rdx)
+movdqa %xmm0,128(%rdx)
+
+# qhasm: r8 = tmp8
+# asm 1: movdqa <tmp8=int6464#2,>r8=int6464#1
+# asm 2: movdqa <tmp8=%xmm1,>r8=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: r8 = unpack high double of r8 and r8                      
+# asm 1: unpckhpd <r8=int6464#1,<r8=int6464#1
+# asm 2: unpckhpd <r8=%xmm0,<r8=%xmm0
+unpckhpd %xmm0,%xmm0
+
+# qhasm: float6464 tmp8 += r8
+# asm 1: addpd <r8=int6464#1,<tmp8=int6464#2
+# asm 2: addpd <r8=%xmm0,<tmp8=%xmm1
+addpd %xmm0,%xmm1
+
+# qhasm: *(int128 *)(myp + 320) = tmp8
+# asm 1: movdqa <tmp8=int6464#2,320(<myp=int64#3)
+# asm 2: movdqa <tmp8=%xmm1,320(<myp=%rdx)
+movdqa %xmm1,320(%rdx)
+
+# qhasm: r9  = *(int128 *)(op + 144)
+# asm 1: movdqa 144(<op=int64#2),>r9=int6464#1
+# asm 2: movdqa 144(<op=%rsi),>r9=%xmm0
+movdqa 144(%rsi),%xmm0
+
+# qhasm: tmp9 = r9                                                          
+# asm 1: movdqa <r9=int6464#1,>tmp9=int6464#2
+# asm 2: movdqa <r9=%xmm0,>tmp9=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: tmp9 = shuffle float64 of tmp9 and tmp9 by 0x1                   
+# asm 1: shufpd $0x1,<tmp9=int6464#2,<tmp9=int6464#2
+# asm 2: shufpd $0x1,<tmp9=%xmm1,<tmp9=%xmm1
+shufpd $0x1,%xmm1,%xmm1
+
+# qhasm: float6464 r9[0] -= tmp9[0]                                       
+# asm 1: subsd <tmp9=int6464#2,<r9=int6464#1
+# asm 2: subsd <tmp9=%xmm1,<r9=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(myp + 144) = r9
+# asm 1: movdqa <r9=int6464#1,144(<myp=int64#3)
+# asm 2: movdqa <r9=%xmm0,144(<myp=%rdx)
+movdqa %xmm0,144(%rdx)
+
+# qhasm: r9 = tmp9
+# asm 1: movdqa <tmp9=int6464#2,>r9=int6464#1
+# asm 2: movdqa <tmp9=%xmm1,>r9=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: r9 = unpack high double of r9 and r9                      
+# asm 1: unpckhpd <r9=int6464#1,<r9=int6464#1
+# asm 2: unpckhpd <r9=%xmm0,<r9=%xmm0
+unpckhpd %xmm0,%xmm0
+
+# qhasm: float6464 tmp9 += r9
+# asm 1: addpd <r9=int6464#1,<tmp9=int6464#2
+# asm 2: addpd <r9=%xmm0,<tmp9=%xmm1
+addpd %xmm0,%xmm1
+
+# qhasm: *(int128 *)(myp + 336) = tmp9
+# asm 1: movdqa <tmp9=int6464#2,336(<myp=int64#3)
+# asm 2: movdqa <tmp9=%xmm1,336(<myp=%rdx)
+movdqa %xmm1,336(%rdx)
+
+# qhasm: r10 = *(int128 *)(op + 160)
+# asm 1: movdqa 160(<op=int64#2),>r10=int6464#1
+# asm 2: movdqa 160(<op=%rsi),>r10=%xmm0
+movdqa 160(%rsi),%xmm0
+
+# qhasm: tmp10 = r10                                                          
+# asm 1: movdqa <r10=int6464#1,>tmp10=int6464#2
+# asm 2: movdqa <r10=%xmm0,>tmp10=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: tmp10 = shuffle float64 of tmp10 and tmp10 by 0x1                   
+# asm 1: shufpd $0x1,<tmp10=int6464#2,<tmp10=int6464#2
+# asm 2: shufpd $0x1,<tmp10=%xmm1,<tmp10=%xmm1
+shufpd $0x1,%xmm1,%xmm1
+
+# qhasm: float6464 r10[0] -= tmp10[0]                                       
+# asm 1: subsd <tmp10=int6464#2,<r10=int6464#1
+# asm 2: subsd <tmp10=%xmm1,<r10=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(myp + 160) = r10
+# asm 1: movdqa <r10=int6464#1,160(<myp=int64#3)
+# asm 2: movdqa <r10=%xmm0,160(<myp=%rdx)
+movdqa %xmm0,160(%rdx)
+
+# qhasm: r10 = tmp10
+# asm 1: movdqa <tmp10=int6464#2,>r10=int6464#1
+# asm 2: movdqa <tmp10=%xmm1,>r10=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: r10 = unpack high double of r10 and r10                      
+# asm 1: unpckhpd <r10=int6464#1,<r10=int6464#1
+# asm 2: unpckhpd <r10=%xmm0,<r10=%xmm0
+unpckhpd %xmm0,%xmm0
+
+# qhasm: float6464 tmp10 += r10
+# asm 1: addpd <r10=int6464#1,<tmp10=int6464#2
+# asm 2: addpd <r10=%xmm0,<tmp10=%xmm1
+addpd %xmm0,%xmm1
+
+# qhasm: *(int128 *)(myp + 352) = tmp10
+# asm 1: movdqa <tmp10=int6464#2,352(<myp=int64#3)
+# asm 2: movdqa <tmp10=%xmm1,352(<myp=%rdx)
+movdqa %xmm1,352(%rdx)
+
+# qhasm: r11 = *(int128 *)(op + 176)
+# asm 1: movdqa 176(<op=int64#2),>r11=int6464#1
+# asm 2: movdqa 176(<op=%rsi),>r11=%xmm0
+movdqa 176(%rsi),%xmm0
+
+# qhasm: tmp11 = r11                                                          
+# asm 1: movdqa <r11=int6464#1,>tmp11=int6464#2
+# asm 2: movdqa <r11=%xmm0,>tmp11=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: tmp11 = shuffle float64 of tmp11 and tmp11 by 0x1                   
+# asm 1: shufpd $0x1,<tmp11=int6464#2,<tmp11=int6464#2
+# asm 2: shufpd $0x1,<tmp11=%xmm1,<tmp11=%xmm1
+shufpd $0x1,%xmm1,%xmm1
+
+# qhasm: float6464 r11[0] -= tmp11[0]                                       
+# asm 1: subsd <tmp11=int6464#2,<r11=int6464#1
+# asm 2: subsd <tmp11=%xmm1,<r11=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(int128 *)(myp + 176) = r11
+# asm 1: movdqa <r11=int6464#1,176(<myp=int64#3)
+# asm 2: movdqa <r11=%xmm0,176(<myp=%rdx)
+movdqa %xmm0,176(%rdx)
+
+# qhasm: r11 = tmp11
+# asm 1: movdqa <tmp11=int6464#2,>r11=int6464#1
+# asm 2: movdqa <tmp11=%xmm1,>r11=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: r11 = unpack high double of r11 and r11                      
+# asm 1: unpckhpd <r11=int6464#1,<r11=int6464#1
+# asm 2: unpckhpd <r11=%xmm0,<r11=%xmm0
+unpckhpd %xmm0,%xmm0
+
+# qhasm: float6464 tmp11 += r11
+# asm 1: addpd <r11=int6464#1,<tmp11=int6464#2
+# asm 2: addpd <r11=%xmm0,<tmp11=%xmm1
+addpd %xmm0,%xmm1
+
+# qhasm: *(int128 *)(myp + 368) = tmp11
+# asm 1: movdqa <tmp11=int6464#2,368(<myp=int64#3)
+# asm 2: movdqa <tmp11=%xmm1,368(<myp=%rdx)
+movdqa %xmm1,368(%rdx)
+
+# qhasm: t1p  = myp
+# asm 1: mov  <myp=int64#3,>t1p=int64#2
+# asm 2: mov  <myp=%rdx,>t1p=%rsi
+mov  %rdx,%rsi
+
+# qhasm: t2p  = myp + 192
+# asm 1: lea  192(<myp=int64#3),>t2p=int64#4
+# asm 2: lea  192(<myp=%rdx),>t2p=%rcx
+lea  192(%rdx),%rcx
+
+# qhasm: rp = myp + 384
+# asm 1: lea  384(<myp=int64#3),>rp=int64#3
+# asm 2: lea  384(<myp=%rdx),>rp=%rdx
+lea  384(%rdx),%rdx
+
+# qhasm: ab0 = *(int128 *)(t1p + 0)
+# asm 1: movdqa 0(<t1p=int64#2),>ab0=int6464#1
+# asm 2: movdqa 0(<t1p=%rsi),>ab0=%xmm0
+movdqa 0(%rsi),%xmm0
+
+# qhasm: t0 = ab0
+# asm 1: movdqa <ab0=int6464#1,>t0=int6464#2
+# asm 2: movdqa <ab0=%xmm0,>t0=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: float6464 t0 *= *(int128 *)(t2p + 0)
+# asm 1: mulpd 0(<t2p=int64#4),<t0=int6464#2
+# asm 2: mulpd 0(<t2p=%rcx),<t0=%xmm1
+mulpd 0(%rcx),%xmm1
+
+# qhasm: r0 =t0
+# asm 1: movdqa <t0=int6464#2,>r0=int6464#2
+# asm 2: movdqa <t0=%xmm1,>r0=%xmm1
+movdqa %xmm1,%xmm1
+
+# qhasm: t1 = ab0
+# asm 1: movdqa <ab0=int6464#1,>t1=int6464#3
+# asm 2: movdqa <ab0=%xmm0,>t1=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: float6464 t1 *= *(int128 *)(t2p + 16)
+# asm 1: mulpd 16(<t2p=int64#4),<t1=int6464#3
+# asm 2: mulpd 16(<t2p=%rcx),<t1=%xmm2
+mulpd 16(%rcx),%xmm2
+
+# qhasm: r1 =t1
+# asm 1: movdqa <t1=int6464#3,>r1=int6464#3
+# asm 2: movdqa <t1=%xmm2,>r1=%xmm2
+movdqa %xmm2,%xmm2
+
+# qhasm: t2 = ab0
+# asm 1: movdqa <ab0=int6464#1,>t2=int6464#4
+# asm 2: movdqa <ab0=%xmm0,>t2=%xmm3
+movdqa %xmm0,%xmm3
+
+# qhasm: float6464 t2 *= *(int128 *)(t2p + 32)
+# asm 1: mulpd 32(<t2p=int64#4),<t2=int6464#4
+# asm 2: mulpd 32(<t2p=%rcx),<t2=%xmm3
+mulpd 32(%rcx),%xmm3
+
+# qhasm: r2 =t2
+# asm 1: movdqa <t2=int6464#4,>r2=int6464#4
+# asm 2: movdqa <t2=%xmm3,>r2=%xmm3
+movdqa %xmm3,%xmm3
+
+# qhasm: t3 = ab0
+# asm 1: movdqa <ab0=int6464#1,>t3=int6464#5
+# asm 2: movdqa <ab0=%xmm0,>t3=%xmm4
+movdqa %xmm0,%xmm4
+
+# qhasm: float6464 t3 *= *(int128 *)(t2p + 48)
+# asm 1: mulpd 48(<t2p=int64#4),<t3=int6464#5
+# asm 2: mulpd 48(<t2p=%rcx),<t3=%xmm4
+mulpd 48(%rcx),%xmm4
+
+# qhasm: r3 =t3
+# asm 1: movdqa <t3=int6464#5,>r3=int6464#5
+# asm 2: movdqa <t3=%xmm4,>r3=%xmm4
+movdqa %xmm4,%xmm4
+
+# qhasm: t4 = ab0
+# asm 1: movdqa <ab0=int6464#1,>t4=int6464#6
+# asm 2: movdqa <ab0=%xmm0,>t4=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm: float6464 t4 *= *(int128 *)(t2p + 64)
+# asm 1: mulpd 64(<t2p=int64#4),<t4=int6464#6
+# asm 2: mulpd 64(<t2p=%rcx),<t4=%xmm5
+mulpd 64(%rcx),%xmm5
+
+# qhasm: r4 =t4
+# asm 1: movdqa <t4=int6464#6,>r4=int6464#6
+# asm 2: movdqa <t4=%xmm5,>r4=%xmm5
+movdqa %xmm5,%xmm5
+
+# qhasm: t5 = ab0
+# asm 1: movdqa <ab0=int6464#1,>t5=int6464#7
+# asm 2: movdqa <ab0=%xmm0,>t5=%xmm6
+movdqa %xmm0,%xmm6
+
+# qhasm: float6464 t5 *= *(int128 *)(t2p + 80)
+# asm 1: mulpd 80(<t2p=int64#4),<t5=int6464#7
+# asm 2: mulpd 80(<t2p=%rcx),<t5=%xmm6
+mulpd 80(%rcx),%xmm6
+
+# qhasm: r5 =t5
+# asm 1: movdqa <t5=int6464#7,>r5=int6464#7
+# asm 2: movdqa <t5=%xmm6,>r5=%xmm6
+movdqa %xmm6,%xmm6
+
+# qhasm: t6 = ab0
+# asm 1: movdqa <ab0=int6464#1,>t6=int6464#8
+# asm 2: movdqa <ab0=%xmm0,>t6=%xmm7
+movdqa %xmm0,%xmm7
+
+# qhasm: float6464 t6 *= *(int128 *)(t2p + 96)
+# asm 1: mulpd 96(<t2p=int64#4),<t6=int6464#8
+# asm 2: mulpd 96(<t2p=%rcx),<t6=%xmm7
+mulpd 96(%rcx),%xmm7
+
+# qhasm: r6 =t6
+# asm 1: movdqa <t6=int6464#8,>r6=int6464#8
+# asm 2: movdqa <t6=%xmm7,>r6=%xmm7
+movdqa %xmm7,%xmm7
+
+# qhasm: t7 = ab0
+# asm 1: movdqa <ab0=int6464#1,>t7=int6464#9
+# asm 2: movdqa <ab0=%xmm0,>t7=%xmm8
+movdqa %xmm0,%xmm8
+
+# qhasm: float6464 t7 *= *(int128 *)(t2p + 112)
+# asm 1: mulpd 112(<t2p=int64#4),<t7=int6464#9
+# asm 2: mulpd 112(<t2p=%rcx),<t7=%xmm8
+mulpd 112(%rcx),%xmm8
+
+# qhasm: r7 =t7
+# asm 1: movdqa <t7=int6464#9,>r7=int6464#9
+# asm 2: movdqa <t7=%xmm8,>r7=%xmm8
+movdqa %xmm8,%xmm8
+
+# qhasm: t8 = ab0
+# asm 1: movdqa <ab0=int6464#1,>t8=int6464#10
+# asm 2: movdqa <ab0=%xmm0,>t8=%xmm9
+movdqa %xmm0,%xmm9
+
+# qhasm: float6464 t8 *= *(int128 *)(t2p + 128)
+# asm 1: mulpd 128(<t2p=int64#4),<t8=int6464#10
+# asm 2: mulpd 128(<t2p=%rcx),<t8=%xmm9
+mulpd 128(%rcx),%xmm9
+
+# qhasm: r8 =t8
+# asm 1: movdqa <t8=int6464#10,>r8=int6464#10
+# asm 2: movdqa <t8=%xmm9,>r8=%xmm9
+movdqa %xmm9,%xmm9
+
+# qhasm: t9 = ab0
+# asm 1: movdqa <ab0=int6464#1,>t9=int6464#11
+# asm 2: movdqa <ab0=%xmm0,>t9=%xmm10
+movdqa %xmm0,%xmm10
+
+# qhasm: float6464 t9 *= *(int128 *)(t2p + 144)
+# asm 1: mulpd 144(<t2p=int64#4),<t9=int6464#11
+# asm 2: mulpd 144(<t2p=%rcx),<t9=%xmm10
+mulpd 144(%rcx),%xmm10
+
+# qhasm: r9 =t9
+# asm 1: movdqa <t9=int6464#11,>r9=int6464#11
+# asm 2: movdqa <t9=%xmm10,>r9=%xmm10
+movdqa %xmm10,%xmm10
+
+# qhasm: t10 = ab0
+# asm 1: movdqa <ab0=int6464#1,>t10=int6464#12
+# asm 2: movdqa <ab0=%xmm0,>t10=%xmm11
+movdqa %xmm0,%xmm11
+
+# qhasm: float6464 t10 *= *(int128 *)(t2p + 160)
+# asm 1: mulpd 160(<t2p=int64#4),<t10=int6464#12
+# asm 2: mulpd 160(<t2p=%rcx),<t10=%xmm11
+mulpd 160(%rcx),%xmm11
+
+# qhasm: r10 =t10
+# asm 1: movdqa <t10=int6464#12,>r10=int6464#12
+# asm 2: movdqa <t10=%xmm11,>r10=%xmm11
+movdqa %xmm11,%xmm11
+
+# qhasm: r11 = ab0
+# asm 1: movdqa <ab0=int6464#1,>r11=int6464#1
+# asm 2: movdqa <ab0=%xmm0,>r11=%xmm0
+movdqa %xmm0,%xmm0
+
+# qhasm: float6464 r11 *= *(int128 *)(t2p + 176)
+# asm 1: mulpd 176(<t2p=int64#4),<r11=int6464#1
+# asm 2: mulpd 176(<t2p=%rcx),<r11=%xmm0
+mulpd 176(%rcx),%xmm0
+
+# qhasm: *(int128 *)(rp + 0) = r0
+# asm 1: movdqa <r0=int6464#2,0(<rp=int64#3)
+# asm 2: movdqa <r0=%xmm1,0(<rp=%rdx)
+movdqa %xmm1,0(%rdx)
+
+# qhasm: ab1 = *(int128 *)(t1p + 16)
+# asm 1: movdqa 16(<t1p=int64#2),>ab1=int6464#2
+# asm 2: movdqa 16(<t1p=%rsi),>ab1=%xmm1
+movdqa 16(%rsi),%xmm1
+
+# qhasm: ab1six = ab1
+# asm 1: movdqa <ab1=int6464#2,>ab1six=int6464#13
+# asm 2: movdqa <ab1=%xmm1,>ab1six=%xmm12
+movdqa %xmm1,%xmm12
+
+# qhasm: float6464 ab1six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab1six=int6464#13
+# asm 2: mulpd SIX_SIX,<ab1six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: t1 = ab1
+# asm 1: movdqa <ab1=int6464#2,>t1=int6464#14
+# asm 2: movdqa <ab1=%xmm1,>t1=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm: float6464 t1 *= *(int128 *)(t2p + 0)
+# asm 1: mulpd 0(<t2p=int64#4),<t1=int6464#14
+# asm 2: mulpd 0(<t2p=%rcx),<t1=%xmm13
+mulpd 0(%rcx),%xmm13
+
+# qhasm: float6464 r1 +=t1
+# asm 1: addpd <t1=int6464#14,<r1=int6464#3
+# asm 2: addpd <t1=%xmm13,<r1=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: t7 = ab1
+# asm 1: movdqa <ab1=int6464#2,>t7=int6464#2
+# asm 2: movdqa <ab1=%xmm1,>t7=%xmm1
+movdqa %xmm1,%xmm1
+
+# qhasm: float6464 t7 *= *(int128 *)(t2p + 96)
+# asm 1: mulpd 96(<t2p=int64#4),<t7=int6464#2
+# asm 2: mulpd 96(<t2p=%rcx),<t7=%xmm1
+mulpd 96(%rcx),%xmm1
+
+# qhasm: float6464 r7 +=t7
+# asm 1: addpd <t7=int6464#2,<r7=int6464#9
+# asm 2: addpd <t7=%xmm1,<r7=%xmm8
+addpd %xmm1,%xmm8
+
+# qhasm: t2 = ab1six
+# asm 1: movdqa <ab1six=int6464#13,>t2=int6464#2
+# asm 2: movdqa <ab1six=%xmm12,>t2=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 t2 *= *(int128 *)(t2p + 16)
+# asm 1: mulpd 16(<t2p=int64#4),<t2=int6464#2
+# asm 2: mulpd 16(<t2p=%rcx),<t2=%xmm1
+mulpd 16(%rcx),%xmm1
+
+# qhasm: float6464 r2 +=t2
+# asm 1: addpd <t2=int6464#2,<r2=int6464#4
+# asm 2: addpd <t2=%xmm1,<r2=%xmm3
+addpd %xmm1,%xmm3
+
+# qhasm: t3 = ab1six
+# asm 1: movdqa <ab1six=int6464#13,>t3=int6464#2
+# asm 2: movdqa <ab1six=%xmm12,>t3=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 t3 *= *(int128 *)(t2p + 32)
+# asm 1: mulpd 32(<t2p=int64#4),<t3=int6464#2
+# asm 2: mulpd 32(<t2p=%rcx),<t3=%xmm1
+mulpd 32(%rcx),%xmm1
+
+# qhasm: float6464 r3 +=t3
+# asm 1: addpd <t3=int6464#2,<r3=int6464#5
+# asm 2: addpd <t3=%xmm1,<r3=%xmm4
+addpd %xmm1,%xmm4
+
+# qhasm: t4 = ab1six
+# asm 1: movdqa <ab1six=int6464#13,>t4=int6464#2
+# asm 2: movdqa <ab1six=%xmm12,>t4=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 t4 *= *(int128 *)(t2p + 48)
+# asm 1: mulpd 48(<t2p=int64#4),<t4=int6464#2
+# asm 2: mulpd 48(<t2p=%rcx),<t4=%xmm1
+mulpd 48(%rcx),%xmm1
+
+# qhasm: float6464 r4 +=t4
+# asm 1: addpd <t4=int6464#2,<r4=int6464#6
+# asm 2: addpd <t4=%xmm1,<r4=%xmm5
+addpd %xmm1,%xmm5
+
+# qhasm: t5 = ab1six
+# asm 1: movdqa <ab1six=int6464#13,>t5=int6464#2
+# asm 2: movdqa <ab1six=%xmm12,>t5=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 t5 *= *(int128 *)(t2p + 64)
+# asm 1: mulpd 64(<t2p=int64#4),<t5=int6464#2
+# asm 2: mulpd 64(<t2p=%rcx),<t5=%xmm1
+mulpd 64(%rcx),%xmm1
+
+# qhasm: float6464 r5 +=t5
+# asm 1: addpd <t5=int6464#2,<r5=int6464#7
+# asm 2: addpd <t5=%xmm1,<r5=%xmm6
+addpd %xmm1,%xmm6
+
+# qhasm: t6 = ab1six
+# asm 1: movdqa <ab1six=int6464#13,>t6=int6464#2
+# asm 2: movdqa <ab1six=%xmm12,>t6=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 t6 *= *(int128 *)(t2p + 80)
+# asm 1: mulpd 80(<t2p=int64#4),<t6=int6464#2
+# asm 2: mulpd 80(<t2p=%rcx),<t6=%xmm1
+mulpd 80(%rcx),%xmm1
+
+# qhasm: float6464 r6 +=t6
+# asm 1: addpd <t6=int6464#2,<r6=int6464#8
+# asm 2: addpd <t6=%xmm1,<r6=%xmm7
+addpd %xmm1,%xmm7
+
+# qhasm: t8 = ab1six
+# asm 1: movdqa <ab1six=int6464#13,>t8=int6464#2
+# asm 2: movdqa <ab1six=%xmm12,>t8=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 t8 *= *(int128 *)(t2p + 112)
+# asm 1: mulpd 112(<t2p=int64#4),<t8=int6464#2
+# asm 2: mulpd 112(<t2p=%rcx),<t8=%xmm1
+mulpd 112(%rcx),%xmm1
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#2,<r8=int6464#10
+# asm 2: addpd <t8=%xmm1,<r8=%xmm9
+addpd %xmm1,%xmm9
+
+# qhasm: t9 = ab1six
+# asm 1: movdqa <ab1six=int6464#13,>t9=int6464#2
+# asm 2: movdqa <ab1six=%xmm12,>t9=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 t9 *= *(int128 *)(t2p + 128)
+# asm 1: mulpd 128(<t2p=int64#4),<t9=int6464#2
+# asm 2: mulpd 128(<t2p=%rcx),<t9=%xmm1
+mulpd 128(%rcx),%xmm1
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#2,<r9=int6464#11
+# asm 2: addpd <t9=%xmm1,<r9=%xmm10
+addpd %xmm1,%xmm10
+
+# qhasm: t10 = ab1six
+# asm 1: movdqa <ab1six=int6464#13,>t10=int6464#2
+# asm 2: movdqa <ab1six=%xmm12,>t10=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 t10 *= *(int128 *)(t2p + 144)
+# asm 1: mulpd 144(<t2p=int64#4),<t10=int6464#2
+# asm 2: mulpd 144(<t2p=%rcx),<t10=%xmm1
+mulpd 144(%rcx),%xmm1
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#2,<r10=int6464#12
+# asm 2: addpd <t10=%xmm1,<r10=%xmm11
+addpd %xmm1,%xmm11
+
+# qhasm: t11 = ab1six
+# asm 1: movdqa <ab1six=int6464#13,>t11=int6464#2
+# asm 2: movdqa <ab1six=%xmm12,>t11=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 t11 *= *(int128 *)(t2p + 160)
+# asm 1: mulpd 160(<t2p=int64#4),<t11=int6464#2
+# asm 2: mulpd 160(<t2p=%rcx),<t11=%xmm1
+mulpd 160(%rcx),%xmm1
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#2,<r11=int6464#1
+# asm 2: addpd <t11=%xmm1,<r11=%xmm0
+addpd %xmm1,%xmm0
+
+# qhasm: r12 = ab1six
+# asm 1: movdqa <ab1six=int6464#13,>r12=int6464#2
+# asm 2: movdqa <ab1six=%xmm12,>r12=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 r12 *= *(int128 *)(t2p + 176)
+# asm 1: mulpd 176(<t2p=int64#4),<r12=int6464#2
+# asm 2: mulpd 176(<t2p=%rcx),<r12=%xmm1
+mulpd 176(%rcx),%xmm1
+
+# qhasm: *(int128 *)(rp + 16) = r1
+# asm 1: movdqa <r1=int6464#3,16(<rp=int64#3)
+# asm 2: movdqa <r1=%xmm2,16(<rp=%rdx)
+movdqa %xmm2,16(%rdx)
+
+# qhasm: ab2 = *(int128 *)(t1p + 32)
+# asm 1: movdqa 32(<t1p=int64#2),>ab2=int6464#3
+# asm 2: movdqa 32(<t1p=%rsi),>ab2=%xmm2
+movdqa 32(%rsi),%xmm2
+
+# qhasm: ab2six = ab2
+# asm 1: movdqa <ab2=int6464#3,>ab2six=int6464#13
+# asm 2: movdqa <ab2=%xmm2,>ab2six=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm: float6464 ab2six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab2six=int6464#13
+# asm 2: mulpd SIX_SIX,<ab2six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: t2 = ab2
+# asm 1: movdqa <ab2=int6464#3,>t2=int6464#14
+# asm 2: movdqa <ab2=%xmm2,>t2=%xmm13
+movdqa %xmm2,%xmm13
+
+# qhasm: float6464 t2 *= *(int128 *)(t2p + 0)
+# asm 1: mulpd 0(<t2p=int64#4),<t2=int6464#14
+# asm 2: mulpd 0(<t2p=%rcx),<t2=%xmm13
+mulpd 0(%rcx),%xmm13
+
+# qhasm: float6464 r2 +=t2
+# asm 1: addpd <t2=int6464#14,<r2=int6464#4
+# asm 2: addpd <t2=%xmm13,<r2=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: t7 = ab2
+# asm 1: movdqa <ab2=int6464#3,>t7=int6464#14
+# asm 2: movdqa <ab2=%xmm2,>t7=%xmm13
+movdqa %xmm2,%xmm13
+
+# qhasm: float6464 t7 *= *(int128 *)(t2p + 80)
+# asm 1: mulpd 80(<t2p=int64#4),<t7=int6464#14
+# asm 2: mulpd 80(<t2p=%rcx),<t7=%xmm13
+mulpd 80(%rcx),%xmm13
+
+# qhasm: float6464 r7 +=t7
+# asm 1: addpd <t7=int6464#14,<r7=int6464#9
+# asm 2: addpd <t7=%xmm13,<r7=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: t8 = ab2
+# asm 1: movdqa <ab2=int6464#3,>t8=int6464#14
+# asm 2: movdqa <ab2=%xmm2,>t8=%xmm13
+movdqa %xmm2,%xmm13
+
+# qhasm: float6464 t8 *= *(int128 *)(t2p + 96)
+# asm 1: mulpd 96(<t2p=int64#4),<t8=int6464#14
+# asm 2: mulpd 96(<t2p=%rcx),<t8=%xmm13
+mulpd 96(%rcx),%xmm13
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#14,<r8=int6464#10
+# asm 2: addpd <t8=%xmm13,<r8=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: r13 = ab2
+# asm 1: movdqa <ab2=int6464#3,>r13=int6464#3
+# asm 2: movdqa <ab2=%xmm2,>r13=%xmm2
+movdqa %xmm2,%xmm2
+
+# qhasm: float6464 r13 *= *(int128 *)(t2p + 176)
+# asm 1: mulpd 176(<t2p=int64#4),<r13=int6464#3
+# asm 2: mulpd 176(<t2p=%rcx),<r13=%xmm2
+mulpd 176(%rcx),%xmm2
+
+# qhasm: t3 = ab2six
+# asm 1: movdqa <ab2six=int6464#13,>t3=int6464#14
+# asm 2: movdqa <ab2six=%xmm12,>t3=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t3 *= *(int128 *)(t2p + 16)
+# asm 1: mulpd 16(<t2p=int64#4),<t3=int6464#14
+# asm 2: mulpd 16(<t2p=%rcx),<t3=%xmm13
+mulpd 16(%rcx),%xmm13
+
+# qhasm: float6464 r3 +=t3
+# asm 1: addpd <t3=int6464#14,<r3=int6464#5
+# asm 2: addpd <t3=%xmm13,<r3=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: t4 = ab2six
+# asm 1: movdqa <ab2six=int6464#13,>t4=int6464#14
+# asm 2: movdqa <ab2six=%xmm12,>t4=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t4 *= *(int128 *)(t2p + 32)
+# asm 1: mulpd 32(<t2p=int64#4),<t4=int6464#14
+# asm 2: mulpd 32(<t2p=%rcx),<t4=%xmm13
+mulpd 32(%rcx),%xmm13
+
+# qhasm: float6464 r4 +=t4
+# asm 1: addpd <t4=int6464#14,<r4=int6464#6
+# asm 2: addpd <t4=%xmm13,<r4=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: t5 = ab2six
+# asm 1: movdqa <ab2six=int6464#13,>t5=int6464#14
+# asm 2: movdqa <ab2six=%xmm12,>t5=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t5 *= *(int128 *)(t2p + 48)
+# asm 1: mulpd 48(<t2p=int64#4),<t5=int6464#14
+# asm 2: mulpd 48(<t2p=%rcx),<t5=%xmm13
+mulpd 48(%rcx),%xmm13
+
+# qhasm: float6464 r5 +=t5
+# asm 1: addpd <t5=int6464#14,<r5=int6464#7
+# asm 2: addpd <t5=%xmm13,<r5=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: t6 = ab2six
+# asm 1: movdqa <ab2six=int6464#13,>t6=int6464#14
+# asm 2: movdqa <ab2six=%xmm12,>t6=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t6 *= *(int128 *)(t2p + 64)
+# asm 1: mulpd 64(<t2p=int64#4),<t6=int6464#14
+# asm 2: mulpd 64(<t2p=%rcx),<t6=%xmm13
+mulpd 64(%rcx),%xmm13
+
+# qhasm: float6464 r6 +=t6
+# asm 1: addpd <t6=int6464#14,<r6=int6464#8
+# asm 2: addpd <t6=%xmm13,<r6=%xmm7
+addpd %xmm13,%xmm7
+
+# qhasm: t9 = ab2six
+# asm 1: movdqa <ab2six=int6464#13,>t9=int6464#14
+# asm 2: movdqa <ab2six=%xmm12,>t9=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t9 *= *(int128 *)(t2p + 112)
+# asm 1: mulpd 112(<t2p=int64#4),<t9=int6464#14
+# asm 2: mulpd 112(<t2p=%rcx),<t9=%xmm13
+mulpd 112(%rcx),%xmm13
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#14,<r9=int6464#11
+# asm 2: addpd <t9=%xmm13,<r9=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: t10 = ab2six
+# asm 1: movdqa <ab2six=int6464#13,>t10=int6464#14
+# asm 2: movdqa <ab2six=%xmm12,>t10=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t10 *= *(int128 *)(t2p + 128)
+# asm 1: mulpd 128(<t2p=int64#4),<t10=int6464#14
+# asm 2: mulpd 128(<t2p=%rcx),<t10=%xmm13
+mulpd 128(%rcx),%xmm13
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#14,<r10=int6464#12
+# asm 2: addpd <t10=%xmm13,<r10=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: t11 = ab2six
+# asm 1: movdqa <ab2six=int6464#13,>t11=int6464#14
+# asm 2: movdqa <ab2six=%xmm12,>t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t11 *= *(int128 *)(t2p + 144)
+# asm 1: mulpd 144(<t2p=int64#4),<t11=int6464#14
+# asm 2: mulpd 144(<t2p=%rcx),<t11=%xmm13
+mulpd 144(%rcx),%xmm13
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#14,<r11=int6464#1
+# asm 2: addpd <t11=%xmm13,<r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: t12 = ab2six
+# asm 1: movdqa <ab2six=int6464#13,>t12=int6464#13
+# asm 2: movdqa <ab2six=%xmm12,>t12=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 t12 *= *(int128 *)(t2p + 160)
+# asm 1: mulpd 160(<t2p=int64#4),<t12=int6464#13
+# asm 2: mulpd 160(<t2p=%rcx),<t12=%xmm12
+mulpd 160(%rcx),%xmm12
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#13,<r12=int6464#2
+# asm 2: addpd <t12=%xmm12,<r12=%xmm1
+addpd %xmm12,%xmm1
+
+# qhasm: *(int128 *)(rp + 32) = r2
+# asm 1: movdqa <r2=int6464#4,32(<rp=int64#3)
+# asm 2: movdqa <r2=%xmm3,32(<rp=%rdx)
+movdqa %xmm3,32(%rdx)
+
+# qhasm: ab3 = *(int128 *)(t1p + 48)
+# asm 1: movdqa 48(<t1p=int64#2),>ab3=int6464#4
+# asm 2: movdqa 48(<t1p=%rsi),>ab3=%xmm3
+movdqa 48(%rsi),%xmm3
+
+# qhasm: ab3six = ab3
+# asm 1: movdqa <ab3=int6464#4,>ab3six=int6464#13
+# asm 2: movdqa <ab3=%xmm3,>ab3six=%xmm12
+movdqa %xmm3,%xmm12
+
+# qhasm: float6464 ab3six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab3six=int6464#13
+# asm 2: mulpd SIX_SIX,<ab3six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: t3 = ab3
+# asm 1: movdqa <ab3=int6464#4,>t3=int6464#14
+# asm 2: movdqa <ab3=%xmm3,>t3=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 t3 *= *(int128 *)(t2p + 0)
+# asm 1: mulpd 0(<t2p=int64#4),<t3=int6464#14
+# asm 2: mulpd 0(<t2p=%rcx),<t3=%xmm13
+mulpd 0(%rcx),%xmm13
+
+# qhasm: float6464 r3 +=t3
+# asm 1: addpd <t3=int6464#14,<r3=int6464#5
+# asm 2: addpd <t3=%xmm13,<r3=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: t7 = ab3
+# asm 1: movdqa <ab3=int6464#4,>t7=int6464#14
+# asm 2: movdqa <ab3=%xmm3,>t7=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 t7 *= *(int128 *)(t2p + 64)
+# asm 1: mulpd 64(<t2p=int64#4),<t7=int6464#14
+# asm 2: mulpd 64(<t2p=%rcx),<t7=%xmm13
+mulpd 64(%rcx),%xmm13
+
+# qhasm: float6464 r7 +=t7
+# asm 1: addpd <t7=int6464#14,<r7=int6464#9
+# asm 2: addpd <t7=%xmm13,<r7=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: t8 = ab3
+# asm 1: movdqa <ab3=int6464#4,>t8=int6464#14
+# asm 2: movdqa <ab3=%xmm3,>t8=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 t8 *= *(int128 *)(t2p + 80)
+# asm 1: mulpd 80(<t2p=int64#4),<t8=int6464#14
+# asm 2: mulpd 80(<t2p=%rcx),<t8=%xmm13
+mulpd 80(%rcx),%xmm13
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#14,<r8=int6464#10
+# asm 2: addpd <t8=%xmm13,<r8=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: t9 = ab3
+# asm 1: movdqa <ab3=int6464#4,>t9=int6464#14
+# asm 2: movdqa <ab3=%xmm3,>t9=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 t9 *= *(int128 *)(t2p + 96)
+# asm 1: mulpd 96(<t2p=int64#4),<t9=int6464#14
+# asm 2: mulpd 96(<t2p=%rcx),<t9=%xmm13
+mulpd 96(%rcx),%xmm13
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#14,<r9=int6464#11
+# asm 2: addpd <t9=%xmm13,<r9=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: t13 = ab3
+# asm 1: movdqa <ab3=int6464#4,>t13=int6464#14
+# asm 2: movdqa <ab3=%xmm3,>t13=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 t13 *= *(int128 *)(t2p + 160)
+# asm 1: mulpd 160(<t2p=int64#4),<t13=int6464#14
+# asm 2: mulpd 160(<t2p=%rcx),<t13=%xmm13
+mulpd 160(%rcx),%xmm13
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#14,<r13=int6464#3
+# asm 2: addpd <t13=%xmm13,<r13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: r14 = ab3
+# asm 1: movdqa <ab3=int6464#4,>r14=int6464#4
+# asm 2: movdqa <ab3=%xmm3,>r14=%xmm3
+movdqa %xmm3,%xmm3
+
+# qhasm: float6464 r14 *= *(int128 *)(t2p + 176)
+# asm 1: mulpd 176(<t2p=int64#4),<r14=int6464#4
+# asm 2: mulpd 176(<t2p=%rcx),<r14=%xmm3
+mulpd 176(%rcx),%xmm3
+
+# qhasm: t4 = ab3six
+# asm 1: movdqa <ab3six=int6464#13,>t4=int6464#14
+# asm 2: movdqa <ab3six=%xmm12,>t4=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t4 *= *(int128 *)(t2p + 16)
+# asm 1: mulpd 16(<t2p=int64#4),<t4=int6464#14
+# asm 2: mulpd 16(<t2p=%rcx),<t4=%xmm13
+mulpd 16(%rcx),%xmm13
+
+# qhasm: float6464 r4 +=t4
+# asm 1: addpd <t4=int6464#14,<r4=int6464#6
+# asm 2: addpd <t4=%xmm13,<r4=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: t5 = ab3six
+# asm 1: movdqa <ab3six=int6464#13,>t5=int6464#14
+# asm 2: movdqa <ab3six=%xmm12,>t5=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t5 *= *(int128 *)(t2p + 32)
+# asm 1: mulpd 32(<t2p=int64#4),<t5=int6464#14
+# asm 2: mulpd 32(<t2p=%rcx),<t5=%xmm13
+mulpd 32(%rcx),%xmm13
+
+# qhasm: float6464 r5 +=t5
+# asm 1: addpd <t5=int6464#14,<r5=int6464#7
+# asm 2: addpd <t5=%xmm13,<r5=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: t6 = ab3six
+# asm 1: movdqa <ab3six=int6464#13,>t6=int6464#14
+# asm 2: movdqa <ab3six=%xmm12,>t6=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t6 *= *(int128 *)(t2p + 48)
+# asm 1: mulpd 48(<t2p=int64#4),<t6=int6464#14
+# asm 2: mulpd 48(<t2p=%rcx),<t6=%xmm13
+mulpd 48(%rcx),%xmm13
+
+# qhasm: float6464 r6 +=t6
+# asm 1: addpd <t6=int6464#14,<r6=int6464#8
+# asm 2: addpd <t6=%xmm13,<r6=%xmm7
+addpd %xmm13,%xmm7
+
+# qhasm: t10 = ab3six
+# asm 1: movdqa <ab3six=int6464#13,>t10=int6464#14
+# asm 2: movdqa <ab3six=%xmm12,>t10=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t10 *= *(int128 *)(t2p + 112)
+# asm 1: mulpd 112(<t2p=int64#4),<t10=int6464#14
+# asm 2: mulpd 112(<t2p=%rcx),<t10=%xmm13
+mulpd 112(%rcx),%xmm13
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#14,<r10=int6464#12
+# asm 2: addpd <t10=%xmm13,<r10=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: t11 = ab3six
+# asm 1: movdqa <ab3six=int6464#13,>t11=int6464#14
+# asm 2: movdqa <ab3six=%xmm12,>t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t11 *= *(int128 *)(t2p + 128)
+# asm 1: mulpd 128(<t2p=int64#4),<t11=int6464#14
+# asm 2: mulpd 128(<t2p=%rcx),<t11=%xmm13
+mulpd 128(%rcx),%xmm13
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#14,<r11=int6464#1
+# asm 2: addpd <t11=%xmm13,<r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: t12 = ab3six
+# asm 1: movdqa <ab3six=int6464#13,>t12=int6464#13
+# asm 2: movdqa <ab3six=%xmm12,>t12=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 t12 *= *(int128 *)(t2p + 144)
+# asm 1: mulpd 144(<t2p=int64#4),<t12=int6464#13
+# asm 2: mulpd 144(<t2p=%rcx),<t12=%xmm12
+mulpd 144(%rcx),%xmm12
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#13,<r12=int6464#2
+# asm 2: addpd <t12=%xmm12,<r12=%xmm1
+addpd %xmm12,%xmm1
+
+# qhasm: *(int128 *)(rp + 48) = r3
+# asm 1: movdqa <r3=int6464#5,48(<rp=int64#3)
+# asm 2: movdqa <r3=%xmm4,48(<rp=%rdx)
+movdqa %xmm4,48(%rdx)
+
+# qhasm: ab4 = *(int128 *)(t1p + 64)
+# asm 1: movdqa 64(<t1p=int64#2),>ab4=int6464#5
+# asm 2: movdqa 64(<t1p=%rsi),>ab4=%xmm4
+movdqa 64(%rsi),%xmm4
+
+# qhasm: ab4six = ab4
+# asm 1: movdqa <ab4=int6464#5,>ab4six=int6464#13
+# asm 2: movdqa <ab4=%xmm4,>ab4six=%xmm12
+movdqa %xmm4,%xmm12
+
+# qhasm: float6464 ab4six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab4six=int6464#13
+# asm 2: mulpd SIX_SIX,<ab4six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: t4 = ab4
+# asm 1: movdqa <ab4=int6464#5,>t4=int6464#14
+# asm 2: movdqa <ab4=%xmm4,>t4=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 t4 *= *(int128 *)(t2p + 0)
+# asm 1: mulpd 0(<t2p=int64#4),<t4=int6464#14
+# asm 2: mulpd 0(<t2p=%rcx),<t4=%xmm13
+mulpd 0(%rcx),%xmm13
+
+# qhasm: float6464 r4 +=t4
+# asm 1: addpd <t4=int6464#14,<r4=int6464#6
+# asm 2: addpd <t4=%xmm13,<r4=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: t7 = ab4
+# asm 1: movdqa <ab4=int6464#5,>t7=int6464#14
+# asm 2: movdqa <ab4=%xmm4,>t7=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 t7 *= *(int128 *)(t2p + 48)
+# asm 1: mulpd 48(<t2p=int64#4),<t7=int6464#14
+# asm 2: mulpd 48(<t2p=%rcx),<t7=%xmm13
+mulpd 48(%rcx),%xmm13
+
+# qhasm: float6464 r7 +=t7
+# asm 1: addpd <t7=int6464#14,<r7=int6464#9
+# asm 2: addpd <t7=%xmm13,<r7=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: t8 = ab4
+# asm 1: movdqa <ab4=int6464#5,>t8=int6464#14
+# asm 2: movdqa <ab4=%xmm4,>t8=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 t8 *= *(int128 *)(t2p + 64)
+# asm 1: mulpd 64(<t2p=int64#4),<t8=int6464#14
+# asm 2: mulpd 64(<t2p=%rcx),<t8=%xmm13
+mulpd 64(%rcx),%xmm13
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#14,<r8=int6464#10
+# asm 2: addpd <t8=%xmm13,<r8=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: t9 = ab4
+# asm 1: movdqa <ab4=int6464#5,>t9=int6464#14
+# asm 2: movdqa <ab4=%xmm4,>t9=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 t9 *= *(int128 *)(t2p + 80)
+# asm 1: mulpd 80(<t2p=int64#4),<t9=int6464#14
+# asm 2: mulpd 80(<t2p=%rcx),<t9=%xmm13
+mulpd 80(%rcx),%xmm13
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#14,<r9=int6464#11
+# asm 2: addpd <t9=%xmm13,<r9=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: t10 = ab4
+# asm 1: movdqa <ab4=int6464#5,>t10=int6464#14
+# asm 2: movdqa <ab4=%xmm4,>t10=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 t10 *= *(int128 *)(t2p + 96)
+# asm 1: mulpd 96(<t2p=int64#4),<t10=int6464#14
+# asm 2: mulpd 96(<t2p=%rcx),<t10=%xmm13
+mulpd 96(%rcx),%xmm13
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#14,<r10=int6464#12
+# asm 2: addpd <t10=%xmm13,<r10=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: t13 = ab4
+# asm 1: movdqa <ab4=int6464#5,>t13=int6464#14
+# asm 2: movdqa <ab4=%xmm4,>t13=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 t13 *= *(int128 *)(t2p + 144)
+# asm 1: mulpd 144(<t2p=int64#4),<t13=int6464#14
+# asm 2: mulpd 144(<t2p=%rcx),<t13=%xmm13
+mulpd 144(%rcx),%xmm13
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#14,<r13=int6464#3
+# asm 2: addpd <t13=%xmm13,<r13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: t14 = ab4
+# asm 1: movdqa <ab4=int6464#5,>t14=int6464#14
+# asm 2: movdqa <ab4=%xmm4,>t14=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 t14 *= *(int128 *)(t2p + 160)
+# asm 1: mulpd 160(<t2p=int64#4),<t14=int6464#14
+# asm 2: mulpd 160(<t2p=%rcx),<t14=%xmm13
+mulpd 160(%rcx),%xmm13
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#14,<r14=int6464#4
+# asm 2: addpd <t14=%xmm13,<r14=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: r15 = ab4
+# asm 1: movdqa <ab4=int6464#5,>r15=int6464#5
+# asm 2: movdqa <ab4=%xmm4,>r15=%xmm4
+movdqa %xmm4,%xmm4
+
+# qhasm: float6464 r15 *= *(int128 *)(t2p + 176)
+# asm 1: mulpd 176(<t2p=int64#4),<r15=int6464#5
+# asm 2: mulpd 176(<t2p=%rcx),<r15=%xmm4
+mulpd 176(%rcx),%xmm4
+
+# qhasm: t5 = ab4six
+# asm 1: movdqa <ab4six=int6464#13,>t5=int6464#14
+# asm 2: movdqa <ab4six=%xmm12,>t5=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t5 *= *(int128 *)(t2p + 16)
+# asm 1: mulpd 16(<t2p=int64#4),<t5=int6464#14
+# asm 2: mulpd 16(<t2p=%rcx),<t5=%xmm13
+mulpd 16(%rcx),%xmm13
+
+# qhasm: float6464 r5 +=t5
+# asm 1: addpd <t5=int6464#14,<r5=int6464#7
+# asm 2: addpd <t5=%xmm13,<r5=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: t6 = ab4six
+# asm 1: movdqa <ab4six=int6464#13,>t6=int6464#14
+# asm 2: movdqa <ab4six=%xmm12,>t6=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t6 *= *(int128 *)(t2p + 32)
+# asm 1: mulpd 32(<t2p=int64#4),<t6=int6464#14
+# asm 2: mulpd 32(<t2p=%rcx),<t6=%xmm13
+mulpd 32(%rcx),%xmm13
+
+# qhasm: float6464 r6 +=t6
+# asm 1: addpd <t6=int6464#14,<r6=int6464#8
+# asm 2: addpd <t6=%xmm13,<r6=%xmm7
+addpd %xmm13,%xmm7
+
+# qhasm: t11 = ab4six
+# asm 1: movdqa <ab4six=int6464#13,>t11=int6464#14
+# asm 2: movdqa <ab4six=%xmm12,>t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t11 *= *(int128 *)(t2p + 112)
+# asm 1: mulpd 112(<t2p=int64#4),<t11=int6464#14
+# asm 2: mulpd 112(<t2p=%rcx),<t11=%xmm13
+mulpd 112(%rcx),%xmm13
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#14,<r11=int6464#1
+# asm 2: addpd <t11=%xmm13,<r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: t12 = ab4six
+# asm 1: movdqa <ab4six=int6464#13,>t12=int6464#13
+# asm 2: movdqa <ab4six=%xmm12,>t12=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 t12 *= *(int128 *)(t2p + 128)
+# asm 1: mulpd 128(<t2p=int64#4),<t12=int6464#13
+# asm 2: mulpd 128(<t2p=%rcx),<t12=%xmm12
+mulpd 128(%rcx),%xmm12
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#13,<r12=int6464#2
+# asm 2: addpd <t12=%xmm12,<r12=%xmm1
+addpd %xmm12,%xmm1
+
+# qhasm: *(int128 *)(rp + 64) = r4
+# asm 1: movdqa <r4=int6464#6,64(<rp=int64#3)
+# asm 2: movdqa <r4=%xmm5,64(<rp=%rdx)
+movdqa %xmm5,64(%rdx)
+
+# qhasm: ab5 = *(int128 *)(t1p + 80)
+# asm 1: movdqa 80(<t1p=int64#2),>ab5=int6464#6
+# asm 2: movdqa 80(<t1p=%rsi),>ab5=%xmm5
+movdqa 80(%rsi),%xmm5
+
+# qhasm: ab5six = ab5
+# asm 1: movdqa <ab5=int6464#6,>ab5six=int6464#13
+# asm 2: movdqa <ab5=%xmm5,>ab5six=%xmm12
+movdqa %xmm5,%xmm12
+
+# qhasm: float6464 ab5six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab5six=int6464#13
+# asm 2: mulpd SIX_SIX,<ab5six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: t5 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t5=int6464#14
+# asm 2: movdqa <ab5=%xmm5,>t5=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 t5 *= *(int128 *)(t2p + 0)
+# asm 1: mulpd 0(<t2p=int64#4),<t5=int6464#14
+# asm 2: mulpd 0(<t2p=%rcx),<t5=%xmm13
+mulpd 0(%rcx),%xmm13
+
+# qhasm: float6464 r5 +=t5
+# asm 1: addpd <t5=int6464#14,<r5=int6464#7
+# asm 2: addpd <t5=%xmm13,<r5=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: t7 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t7=int6464#14
+# asm 2: movdqa <ab5=%xmm5,>t7=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 t7 *= *(int128 *)(t2p + 32)
+# asm 1: mulpd 32(<t2p=int64#4),<t7=int6464#14
+# asm 2: mulpd 32(<t2p=%rcx),<t7=%xmm13
+mulpd 32(%rcx),%xmm13
+
+# qhasm: float6464 r7 +=t7
+# asm 1: addpd <t7=int6464#14,<r7=int6464#9
+# asm 2: addpd <t7=%xmm13,<r7=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: t8 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t8=int6464#14
+# asm 2: movdqa <ab5=%xmm5,>t8=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 t8 *= *(int128 *)(t2p + 48)
+# asm 1: mulpd 48(<t2p=int64#4),<t8=int6464#14
+# asm 2: mulpd 48(<t2p=%rcx),<t8=%xmm13
+mulpd 48(%rcx),%xmm13
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#14,<r8=int6464#10
+# asm 2: addpd <t8=%xmm13,<r8=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: t9 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t9=int6464#14
+# asm 2: movdqa <ab5=%xmm5,>t9=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 t9 *= *(int128 *)(t2p + 64)
+# asm 1: mulpd 64(<t2p=int64#4),<t9=int6464#14
+# asm 2: mulpd 64(<t2p=%rcx),<t9=%xmm13
+mulpd 64(%rcx),%xmm13
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#14,<r9=int6464#11
+# asm 2: addpd <t9=%xmm13,<r9=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: t10 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t10=int6464#14
+# asm 2: movdqa <ab5=%xmm5,>t10=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 t10 *= *(int128 *)(t2p + 80)
+# asm 1: mulpd 80(<t2p=int64#4),<t10=int6464#14
+# asm 2: mulpd 80(<t2p=%rcx),<t10=%xmm13
+mulpd 80(%rcx),%xmm13
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#14,<r10=int6464#12
+# asm 2: addpd <t10=%xmm13,<r10=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: t11 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t11=int6464#14
+# asm 2: movdqa <ab5=%xmm5,>t11=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 t11 *= *(int128 *)(t2p + 96)
+# asm 1: mulpd 96(<t2p=int64#4),<t11=int6464#14
+# asm 2: mulpd 96(<t2p=%rcx),<t11=%xmm13
+mulpd 96(%rcx),%xmm13
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#14,<r11=int6464#1
+# asm 2: addpd <t11=%xmm13,<r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: t13 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t13=int6464#14
+# asm 2: movdqa <ab5=%xmm5,>t13=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 t13 *= *(int128 *)(t2p + 128)
+# asm 1: mulpd 128(<t2p=int64#4),<t13=int6464#14
+# asm 2: mulpd 128(<t2p=%rcx),<t13=%xmm13
+mulpd 128(%rcx),%xmm13
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#14,<r13=int6464#3
+# asm 2: addpd <t13=%xmm13,<r13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: t14 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t14=int6464#14
+# asm 2: movdqa <ab5=%xmm5,>t14=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 t14 *= *(int128 *)(t2p + 144)
+# asm 1: mulpd 144(<t2p=int64#4),<t14=int6464#14
+# asm 2: mulpd 144(<t2p=%rcx),<t14=%xmm13
+mulpd 144(%rcx),%xmm13
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#14,<r14=int6464#4
+# asm 2: addpd <t14=%xmm13,<r14=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: t15 = ab5
+# asm 1: movdqa <ab5=int6464#6,>t15=int6464#14
+# asm 2: movdqa <ab5=%xmm5,>t15=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 t15 *= *(int128 *)(t2p + 160)
+# asm 1: mulpd 160(<t2p=int64#4),<t15=int6464#14
+# asm 2: mulpd 160(<t2p=%rcx),<t15=%xmm13
+mulpd 160(%rcx),%xmm13
+
+# qhasm: float6464 r15 +=t15
+# asm 1: addpd <t15=int6464#14,<r15=int6464#5
+# asm 2: addpd <t15=%xmm13,<r15=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: r16 = ab5
+# asm 1: movdqa <ab5=int6464#6,>r16=int6464#6
+# asm 2: movdqa <ab5=%xmm5,>r16=%xmm5
+movdqa %xmm5,%xmm5
+
+# qhasm: float6464 r16 *= *(int128 *)(t2p + 176)
+# asm 1: mulpd 176(<t2p=int64#4),<r16=int6464#6
+# asm 2: mulpd 176(<t2p=%rcx),<r16=%xmm5
+mulpd 176(%rcx),%xmm5
+
+# qhasm: t6 = ab5six
+# asm 1: movdqa <ab5six=int6464#13,>t6=int6464#14
+# asm 2: movdqa <ab5six=%xmm12,>t6=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t6 *= *(int128 *)(t2p + 16)
+# asm 1: mulpd 16(<t2p=int64#4),<t6=int6464#14
+# asm 2: mulpd 16(<t2p=%rcx),<t6=%xmm13
+mulpd 16(%rcx),%xmm13
+
+# qhasm: float6464 r6 +=t6
+# asm 1: addpd <t6=int6464#14,<r6=int6464#8
+# asm 2: addpd <t6=%xmm13,<r6=%xmm7
+addpd %xmm13,%xmm7
+
+# qhasm: t12 = ab5six
+# asm 1: movdqa <ab5six=int6464#13,>t12=int6464#13
+# asm 2: movdqa <ab5six=%xmm12,>t12=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 t12 *= *(int128 *)(t2p + 112)
+# asm 1: mulpd 112(<t2p=int64#4),<t12=int6464#13
+# asm 2: mulpd 112(<t2p=%rcx),<t12=%xmm12
+mulpd 112(%rcx),%xmm12
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#13,<r12=int6464#2
+# asm 2: addpd <t12=%xmm12,<r12=%xmm1
+addpd %xmm12,%xmm1
+
+# qhasm: *(int128 *)(rp + 80) = r5
+# asm 1: movdqa <r5=int6464#7,80(<rp=int64#3)
+# asm 2: movdqa <r5=%xmm6,80(<rp=%rdx)
+movdqa %xmm6,80(%rdx)
+
+# qhasm: ab6 = *(int128 *)(t1p + 96)
+# asm 1: movdqa 96(<t1p=int64#2),>ab6=int6464#7
+# asm 2: movdqa 96(<t1p=%rsi),>ab6=%xmm6
+movdqa 96(%rsi),%xmm6
+
+# qhasm: t6 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t6=int6464#13
+# asm 2: movdqa <ab6=%xmm6,>t6=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 t6 *= *(int128 *)(t2p + 0)
+# asm 1: mulpd 0(<t2p=int64#4),<t6=int6464#13
+# asm 2: mulpd 0(<t2p=%rcx),<t6=%xmm12
+mulpd 0(%rcx),%xmm12
+
+# qhasm: float6464 r6 +=t6
+# asm 1: addpd <t6=int6464#13,<r6=int6464#8
+# asm 2: addpd <t6=%xmm12,<r6=%xmm7
+addpd %xmm12,%xmm7
+
+# qhasm: t7 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t7=int6464#13
+# asm 2: movdqa <ab6=%xmm6,>t7=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 t7 *= *(int128 *)(t2p + 16)
+# asm 1: mulpd 16(<t2p=int64#4),<t7=int6464#13
+# asm 2: mulpd 16(<t2p=%rcx),<t7=%xmm12
+mulpd 16(%rcx),%xmm12
+
+# qhasm: float6464 r7 +=t7
+# asm 1: addpd <t7=int6464#13,<r7=int6464#9
+# asm 2: addpd <t7=%xmm12,<r7=%xmm8
+addpd %xmm12,%xmm8
+
+# qhasm: t8 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t8=int6464#13
+# asm 2: movdqa <ab6=%xmm6,>t8=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 t8 *= *(int128 *)(t2p + 32)
+# asm 1: mulpd 32(<t2p=int64#4),<t8=int6464#13
+# asm 2: mulpd 32(<t2p=%rcx),<t8=%xmm12
+mulpd 32(%rcx),%xmm12
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#13,<r8=int6464#10
+# asm 2: addpd <t8=%xmm12,<r8=%xmm9
+addpd %xmm12,%xmm9
+
+# qhasm: t9 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t9=int6464#13
+# asm 2: movdqa <ab6=%xmm6,>t9=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 t9 *= *(int128 *)(t2p + 48)
+# asm 1: mulpd 48(<t2p=int64#4),<t9=int6464#13
+# asm 2: mulpd 48(<t2p=%rcx),<t9=%xmm12
+mulpd 48(%rcx),%xmm12
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#13,<r9=int6464#11
+# asm 2: addpd <t9=%xmm12,<r9=%xmm10
+addpd %xmm12,%xmm10
+
+# qhasm: t10 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t10=int6464#13
+# asm 2: movdqa <ab6=%xmm6,>t10=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 t10 *= *(int128 *)(t2p + 64)
+# asm 1: mulpd 64(<t2p=int64#4),<t10=int6464#13
+# asm 2: mulpd 64(<t2p=%rcx),<t10=%xmm12
+mulpd 64(%rcx),%xmm12
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#13,<r10=int6464#12
+# asm 2: addpd <t10=%xmm12,<r10=%xmm11
+addpd %xmm12,%xmm11
+
+# qhasm: t11 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t11=int6464#13
+# asm 2: movdqa <ab6=%xmm6,>t11=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 t11 *= *(int128 *)(t2p + 80)
+# asm 1: mulpd 80(<t2p=int64#4),<t11=int6464#13
+# asm 2: mulpd 80(<t2p=%rcx),<t11=%xmm12
+mulpd 80(%rcx),%xmm12
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#13,<r11=int6464#1
+# asm 2: addpd <t11=%xmm12,<r11=%xmm0
+addpd %xmm12,%xmm0
+
+# qhasm: t12 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t12=int6464#13
+# asm 2: movdqa <ab6=%xmm6,>t12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 t12 *= *(int128 *)(t2p + 96)
+# asm 1: mulpd 96(<t2p=int64#4),<t12=int6464#13
+# asm 2: mulpd 96(<t2p=%rcx),<t12=%xmm12
+mulpd 96(%rcx),%xmm12
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#13,<r12=int6464#2
+# asm 2: addpd <t12=%xmm12,<r12=%xmm1
+addpd %xmm12,%xmm1
+
+# qhasm: t13 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t13=int6464#13
+# asm 2: movdqa <ab6=%xmm6,>t13=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 t13 *= *(int128 *)(t2p + 112)
+# asm 1: mulpd 112(<t2p=int64#4),<t13=int6464#13
+# asm 2: mulpd 112(<t2p=%rcx),<t13=%xmm12
+mulpd 112(%rcx),%xmm12
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#13,<r13=int6464#3
+# asm 2: addpd <t13=%xmm12,<r13=%xmm2
+addpd %xmm12,%xmm2
+
+# qhasm: t14 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t14=int6464#13
+# asm 2: movdqa <ab6=%xmm6,>t14=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 t14 *= *(int128 *)(t2p + 128)
+# asm 1: mulpd 128(<t2p=int64#4),<t14=int6464#13
+# asm 2: mulpd 128(<t2p=%rcx),<t14=%xmm12
+mulpd 128(%rcx),%xmm12
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#13,<r14=int6464#4
+# asm 2: addpd <t14=%xmm12,<r14=%xmm3
+addpd %xmm12,%xmm3
+
+# qhasm: t15 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t15=int6464#13
+# asm 2: movdqa <ab6=%xmm6,>t15=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 t15 *= *(int128 *)(t2p + 144)
+# asm 1: mulpd 144(<t2p=int64#4),<t15=int6464#13
+# asm 2: mulpd 144(<t2p=%rcx),<t15=%xmm12
+mulpd 144(%rcx),%xmm12
+
+# qhasm: float6464 r15 +=t15
+# asm 1: addpd <t15=int6464#13,<r15=int6464#5
+# asm 2: addpd <t15=%xmm12,<r15=%xmm4
+addpd %xmm12,%xmm4
+
+# qhasm: t16 = ab6
+# asm 1: movdqa <ab6=int6464#7,>t16=int6464#13
+# asm 2: movdqa <ab6=%xmm6,>t16=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 t16 *= *(int128 *)(t2p + 160)
+# asm 1: mulpd 160(<t2p=int64#4),<t16=int6464#13
+# asm 2: mulpd 160(<t2p=%rcx),<t16=%xmm12
+mulpd 160(%rcx),%xmm12
+
+# qhasm: float6464 r16 +=t16
+# asm 1: addpd <t16=int6464#13,<r16=int6464#6
+# asm 2: addpd <t16=%xmm12,<r16=%xmm5
+addpd %xmm12,%xmm5
+
+# qhasm: r17 = ab6
+# asm 1: movdqa <ab6=int6464#7,>r17=int6464#7
+# asm 2: movdqa <ab6=%xmm6,>r17=%xmm6
+movdqa %xmm6,%xmm6
+
+# qhasm: float6464 r17 *= *(int128 *)(t2p + 176)
+# asm 1: mulpd 176(<t2p=int64#4),<r17=int6464#7
+# asm 2: mulpd 176(<t2p=%rcx),<r17=%xmm6
+mulpd 176(%rcx),%xmm6
+
+# qhasm: *(int128 *)(rp + 96) = r6
+# asm 1: movdqa <r6=int6464#8,96(<rp=int64#3)
+# asm 2: movdqa <r6=%xmm7,96(<rp=%rdx)
+movdqa %xmm7,96(%rdx)
+
+# qhasm: ab7 = *(int128 *)(t1p + 112)
+# asm 1: movdqa 112(<t1p=int64#2),>ab7=int6464#8
+# asm 2: movdqa 112(<t1p=%rsi),>ab7=%xmm7
+movdqa 112(%rsi),%xmm7
+
+# qhasm: ab7six = ab7
+# asm 1: movdqa <ab7=int6464#8,>ab7six=int6464#13
+# asm 2: movdqa <ab7=%xmm7,>ab7six=%xmm12
+movdqa %xmm7,%xmm12
+
+# qhasm: float6464 ab7six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab7six=int6464#13
+# asm 2: mulpd SIX_SIX,<ab7six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: t7 = ab7
+# asm 1: movdqa <ab7=int6464#8,>t7=int6464#14
+# asm 2: movdqa <ab7=%xmm7,>t7=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 t7 *= *(int128 *)(t2p + 0)
+# asm 1: mulpd 0(<t2p=int64#4),<t7=int6464#14
+# asm 2: mulpd 0(<t2p=%rcx),<t7=%xmm13
+mulpd 0(%rcx),%xmm13
+
+# qhasm: float6464 r7 +=t7
+# asm 1: addpd <t7=int6464#14,<r7=int6464#9
+# asm 2: addpd <t7=%xmm13,<r7=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: t13 = ab7
+# asm 1: movdqa <ab7=int6464#8,>t13=int6464#8
+# asm 2: movdqa <ab7=%xmm7,>t13=%xmm7
+movdqa %xmm7,%xmm7
+
+# qhasm: float6464 t13 *= *(int128 *)(t2p + 96)
+# asm 1: mulpd 96(<t2p=int64#4),<t13=int6464#8
+# asm 2: mulpd 96(<t2p=%rcx),<t13=%xmm7
+mulpd 96(%rcx),%xmm7
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#8,<r13=int6464#3
+# asm 2: addpd <t13=%xmm7,<r13=%xmm2
+addpd %xmm7,%xmm2
+
+# qhasm: t8 = ab7six
+# asm 1: movdqa <ab7six=int6464#13,>t8=int6464#8
+# asm 2: movdqa <ab7six=%xmm12,>t8=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 t8 *= *(int128 *)(t2p + 16)
+# asm 1: mulpd 16(<t2p=int64#4),<t8=int6464#8
+# asm 2: mulpd 16(<t2p=%rcx),<t8=%xmm7
+mulpd 16(%rcx),%xmm7
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#8,<r8=int6464#10
+# asm 2: addpd <t8=%xmm7,<r8=%xmm9
+addpd %xmm7,%xmm9
+
+# qhasm: t9 = ab7six
+# asm 1: movdqa <ab7six=int6464#13,>t9=int6464#8
+# asm 2: movdqa <ab7six=%xmm12,>t9=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 t9 *= *(int128 *)(t2p + 32)
+# asm 1: mulpd 32(<t2p=int64#4),<t9=int6464#8
+# asm 2: mulpd 32(<t2p=%rcx),<t9=%xmm7
+mulpd 32(%rcx),%xmm7
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#8,<r9=int6464#11
+# asm 2: addpd <t9=%xmm7,<r9=%xmm10
+addpd %xmm7,%xmm10
+
+# qhasm: t10 = ab7six
+# asm 1: movdqa <ab7six=int6464#13,>t10=int6464#8
+# asm 2: movdqa <ab7six=%xmm12,>t10=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 t10 *= *(int128 *)(t2p + 48)
+# asm 1: mulpd 48(<t2p=int64#4),<t10=int6464#8
+# asm 2: mulpd 48(<t2p=%rcx),<t10=%xmm7
+mulpd 48(%rcx),%xmm7
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#8,<r10=int6464#12
+# asm 2: addpd <t10=%xmm7,<r10=%xmm11
+addpd %xmm7,%xmm11
+
+# qhasm: t11 = ab7six
+# asm 1: movdqa <ab7six=int6464#13,>t11=int6464#8
+# asm 2: movdqa <ab7six=%xmm12,>t11=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 t11 *= *(int128 *)(t2p + 64)
+# asm 1: mulpd 64(<t2p=int64#4),<t11=int6464#8
+# asm 2: mulpd 64(<t2p=%rcx),<t11=%xmm7
+mulpd 64(%rcx),%xmm7
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#8,<r11=int6464#1
+# asm 2: addpd <t11=%xmm7,<r11=%xmm0
+addpd %xmm7,%xmm0
+
+# qhasm: t12 = ab7six
+# asm 1: movdqa <ab7six=int6464#13,>t12=int6464#8
+# asm 2: movdqa <ab7six=%xmm12,>t12=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 t12 *= *(int128 *)(t2p + 80)
+# asm 1: mulpd 80(<t2p=int64#4),<t12=int6464#8
+# asm 2: mulpd 80(<t2p=%rcx),<t12=%xmm7
+mulpd 80(%rcx),%xmm7
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#8,<r12=int6464#2
+# asm 2: addpd <t12=%xmm7,<r12=%xmm1
+addpd %xmm7,%xmm1
+
+# qhasm: t14 = ab7six
+# asm 1: movdqa <ab7six=int6464#13,>t14=int6464#8
+# asm 2: movdqa <ab7six=%xmm12,>t14=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 t14 *= *(int128 *)(t2p + 112)
+# asm 1: mulpd 112(<t2p=int64#4),<t14=int6464#8
+# asm 2: mulpd 112(<t2p=%rcx),<t14=%xmm7
+mulpd 112(%rcx),%xmm7
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#8,<r14=int6464#4
+# asm 2: addpd <t14=%xmm7,<r14=%xmm3
+addpd %xmm7,%xmm3
+
+# qhasm: t15 = ab7six
+# asm 1: movdqa <ab7six=int6464#13,>t15=int6464#8
+# asm 2: movdqa <ab7six=%xmm12,>t15=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 t15 *= *(int128 *)(t2p + 128)
+# asm 1: mulpd 128(<t2p=int64#4),<t15=int6464#8
+# asm 2: mulpd 128(<t2p=%rcx),<t15=%xmm7
+mulpd 128(%rcx),%xmm7
+
+# qhasm: float6464 r15 +=t15
+# asm 1: addpd <t15=int6464#8,<r15=int6464#5
+# asm 2: addpd <t15=%xmm7,<r15=%xmm4
+addpd %xmm7,%xmm4
+
+# qhasm: t16 = ab7six
+# asm 1: movdqa <ab7six=int6464#13,>t16=int6464#8
+# asm 2: movdqa <ab7six=%xmm12,>t16=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 t16 *= *(int128 *)(t2p + 144)
+# asm 1: mulpd 144(<t2p=int64#4),<t16=int6464#8
+# asm 2: mulpd 144(<t2p=%rcx),<t16=%xmm7
+mulpd 144(%rcx),%xmm7
+
+# qhasm: float6464 r16 +=t16
+# asm 1: addpd <t16=int6464#8,<r16=int6464#6
+# asm 2: addpd <t16=%xmm7,<r16=%xmm5
+addpd %xmm7,%xmm5
+
+# qhasm: t17 = ab7six
+# asm 1: movdqa <ab7six=int6464#13,>t17=int6464#8
+# asm 2: movdqa <ab7six=%xmm12,>t17=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 t17 *= *(int128 *)(t2p + 160)
+# asm 1: mulpd 160(<t2p=int64#4),<t17=int6464#8
+# asm 2: mulpd 160(<t2p=%rcx),<t17=%xmm7
+mulpd 160(%rcx),%xmm7
+
+# qhasm: float6464 r17 +=t17
+# asm 1: addpd <t17=int6464#8,<r17=int6464#7
+# asm 2: addpd <t17=%xmm7,<r17=%xmm6
+addpd %xmm7,%xmm6
+
+# qhasm: r18 = ab7six
+# asm 1: movdqa <ab7six=int6464#13,>r18=int6464#8
+# asm 2: movdqa <ab7six=%xmm12,>r18=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 r18 *= *(int128 *)(t2p + 176)
+# asm 1: mulpd 176(<t2p=int64#4),<r18=int6464#8
+# asm 2: mulpd 176(<t2p=%rcx),<r18=%xmm7
+mulpd 176(%rcx),%xmm7
+
+# qhasm: *(int128 *)(rp + 112) = r7
+# asm 1: movdqa <r7=int6464#9,112(<rp=int64#3)
+# asm 2: movdqa <r7=%xmm8,112(<rp=%rdx)
+movdqa %xmm8,112(%rdx)
+
+# qhasm: ab8 = *(int128 *)(t1p + 128)
+# asm 1: movdqa 128(<t1p=int64#2),>ab8=int6464#9
+# asm 2: movdqa 128(<t1p=%rsi),>ab8=%xmm8
+movdqa 128(%rsi),%xmm8
+
+# qhasm: ab8six = ab8
+# asm 1: movdqa <ab8=int6464#9,>ab8six=int6464#13
+# asm 2: movdqa <ab8=%xmm8,>ab8six=%xmm12
+movdqa %xmm8,%xmm12
+
+# qhasm: float6464 ab8six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab8six=int6464#13
+# asm 2: mulpd SIX_SIX,<ab8six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: t8 = ab8
+# asm 1: movdqa <ab8=int6464#9,>t8=int6464#14
+# asm 2: movdqa <ab8=%xmm8,>t8=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm: float6464 t8 *= *(int128 *)(t2p + 0)
+# asm 1: mulpd 0(<t2p=int64#4),<t8=int6464#14
+# asm 2: mulpd 0(<t2p=%rcx),<t8=%xmm13
+mulpd 0(%rcx),%xmm13
+
+# qhasm: float6464 r8 +=t8
+# asm 1: addpd <t8=int6464#14,<r8=int6464#10
+# asm 2: addpd <t8=%xmm13,<r8=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: t13 = ab8
+# asm 1: movdqa <ab8=int6464#9,>t13=int6464#14
+# asm 2: movdqa <ab8=%xmm8,>t13=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm: float6464 t13 *= *(int128 *)(t2p + 80)
+# asm 1: mulpd 80(<t2p=int64#4),<t13=int6464#14
+# asm 2: mulpd 80(<t2p=%rcx),<t13=%xmm13
+mulpd 80(%rcx),%xmm13
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#14,<r13=int6464#3
+# asm 2: addpd <t13=%xmm13,<r13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: t14 = ab8
+# asm 1: movdqa <ab8=int6464#9,>t14=int6464#14
+# asm 2: movdqa <ab8=%xmm8,>t14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm: float6464 t14 *= *(int128 *)(t2p + 96)
+# asm 1: mulpd 96(<t2p=int64#4),<t14=int6464#14
+# asm 2: mulpd 96(<t2p=%rcx),<t14=%xmm13
+mulpd 96(%rcx),%xmm13
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#14,<r14=int6464#4
+# asm 2: addpd <t14=%xmm13,<r14=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: r19 = ab8
+# asm 1: movdqa <ab8=int6464#9,>r19=int6464#9
+# asm 2: movdqa <ab8=%xmm8,>r19=%xmm8
+movdqa %xmm8,%xmm8
+
+# qhasm: float6464 r19 *= *(int128 *)(t2p + 176)
+# asm 1: mulpd 176(<t2p=int64#4),<r19=int6464#9
+# asm 2: mulpd 176(<t2p=%rcx),<r19=%xmm8
+mulpd 176(%rcx),%xmm8
+
+# qhasm: t9 = ab8six
+# asm 1: movdqa <ab8six=int6464#13,>t9=int6464#14
+# asm 2: movdqa <ab8six=%xmm12,>t9=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t9 *= *(int128 *)(t2p + 16)
+# asm 1: mulpd 16(<t2p=int64#4),<t9=int6464#14
+# asm 2: mulpd 16(<t2p=%rcx),<t9=%xmm13
+mulpd 16(%rcx),%xmm13
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#14,<r9=int6464#11
+# asm 2: addpd <t9=%xmm13,<r9=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: t10 = ab8six
+# asm 1: movdqa <ab8six=int6464#13,>t10=int6464#14
+# asm 2: movdqa <ab8six=%xmm12,>t10=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t10 *= *(int128 *)(t2p + 32)
+# asm 1: mulpd 32(<t2p=int64#4),<t10=int6464#14
+# asm 2: mulpd 32(<t2p=%rcx),<t10=%xmm13
+mulpd 32(%rcx),%xmm13
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#14,<r10=int6464#12
+# asm 2: addpd <t10=%xmm13,<r10=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: t11 = ab8six
+# asm 1: movdqa <ab8six=int6464#13,>t11=int6464#14
+# asm 2: movdqa <ab8six=%xmm12,>t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t11 *= *(int128 *)(t2p + 48)
+# asm 1: mulpd 48(<t2p=int64#4),<t11=int6464#14
+# asm 2: mulpd 48(<t2p=%rcx),<t11=%xmm13
+mulpd 48(%rcx),%xmm13
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#14,<r11=int6464#1
+# asm 2: addpd <t11=%xmm13,<r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: t12 = ab8six
+# asm 1: movdqa <ab8six=int6464#13,>t12=int6464#14
+# asm 2: movdqa <ab8six=%xmm12,>t12=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t12 *= *(int128 *)(t2p + 64)
+# asm 1: mulpd 64(<t2p=int64#4),<t12=int6464#14
+# asm 2: mulpd 64(<t2p=%rcx),<t12=%xmm13
+mulpd 64(%rcx),%xmm13
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#14,<r12=int6464#2
+# asm 2: addpd <t12=%xmm13,<r12=%xmm1
+addpd %xmm13,%xmm1
+
+# qhasm: t15 = ab8six
+# asm 1: movdqa <ab8six=int6464#13,>t15=int6464#14
+# asm 2: movdqa <ab8six=%xmm12,>t15=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t15 *= *(int128 *)(t2p + 112)
+# asm 1: mulpd 112(<t2p=int64#4),<t15=int6464#14
+# asm 2: mulpd 112(<t2p=%rcx),<t15=%xmm13
+mulpd 112(%rcx),%xmm13
+
+# qhasm: float6464 r15 +=t15
+# asm 1: addpd <t15=int6464#14,<r15=int6464#5
+# asm 2: addpd <t15=%xmm13,<r15=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: t16 = ab8six
+# asm 1: movdqa <ab8six=int6464#13,>t16=int6464#14
+# asm 2: movdqa <ab8six=%xmm12,>t16=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t16 *= *(int128 *)(t2p + 128)
+# asm 1: mulpd 128(<t2p=int64#4),<t16=int6464#14
+# asm 2: mulpd 128(<t2p=%rcx),<t16=%xmm13
+mulpd 128(%rcx),%xmm13
+
+# qhasm: float6464 r16 +=t16
+# asm 1: addpd <t16=int6464#14,<r16=int6464#6
+# asm 2: addpd <t16=%xmm13,<r16=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: t17 = ab8six
+# asm 1: movdqa <ab8six=int6464#13,>t17=int6464#14
+# asm 2: movdqa <ab8six=%xmm12,>t17=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t17 *= *(int128 *)(t2p + 144)
+# asm 1: mulpd 144(<t2p=int64#4),<t17=int6464#14
+# asm 2: mulpd 144(<t2p=%rcx),<t17=%xmm13
+mulpd 144(%rcx),%xmm13
+
+# qhasm: float6464 r17 +=t17
+# asm 1: addpd <t17=int6464#14,<r17=int6464#7
+# asm 2: addpd <t17=%xmm13,<r17=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: t18 = ab8six
+# asm 1: movdqa <ab8six=int6464#13,>t18=int6464#13
+# asm 2: movdqa <ab8six=%xmm12,>t18=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 t18 *= *(int128 *)(t2p + 160)
+# asm 1: mulpd 160(<t2p=int64#4),<t18=int6464#13
+# asm 2: mulpd 160(<t2p=%rcx),<t18=%xmm12
+mulpd 160(%rcx),%xmm12
+
+# qhasm: float6464 r18 +=t18
+# asm 1: addpd <t18=int6464#13,<r18=int6464#8
+# asm 2: addpd <t18=%xmm12,<r18=%xmm7
+addpd %xmm12,%xmm7
+
+# qhasm: *(int128 *)(rp + 128) = r8
+# asm 1: movdqa <r8=int6464#10,128(<rp=int64#3)
+# asm 2: movdqa <r8=%xmm9,128(<rp=%rdx)
+movdqa %xmm9,128(%rdx)
+
+# qhasm: ab9 = *(int128 *)(t1p + 144)
+# asm 1: movdqa 144(<t1p=int64#2),>ab9=int6464#10
+# asm 2: movdqa 144(<t1p=%rsi),>ab9=%xmm9
+movdqa 144(%rsi),%xmm9
+
+# qhasm: ab9six = ab9
+# asm 1: movdqa <ab9=int6464#10,>ab9six=int6464#13
+# asm 2: movdqa <ab9=%xmm9,>ab9six=%xmm12
+movdqa %xmm9,%xmm12
+
+# qhasm: float6464 ab9six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab9six=int6464#13
+# asm 2: mulpd SIX_SIX,<ab9six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: t9 = ab9
+# asm 1: movdqa <ab9=int6464#10,>t9=int6464#14
+# asm 2: movdqa <ab9=%xmm9,>t9=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 t9 *= *(int128 *)(t2p + 0)
+# asm 1: mulpd 0(<t2p=int64#4),<t9=int6464#14
+# asm 2: mulpd 0(<t2p=%rcx),<t9=%xmm13
+mulpd 0(%rcx),%xmm13
+
+# qhasm: float6464 r9 +=t9
+# asm 1: addpd <t9=int6464#14,<r9=int6464#11
+# asm 2: addpd <t9=%xmm13,<r9=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: t13 = ab9
+# asm 1: movdqa <ab9=int6464#10,>t13=int6464#14
+# asm 2: movdqa <ab9=%xmm9,>t13=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 t13 *= *(int128 *)(t2p + 64)
+# asm 1: mulpd 64(<t2p=int64#4),<t13=int6464#14
+# asm 2: mulpd 64(<t2p=%rcx),<t13=%xmm13
+mulpd 64(%rcx),%xmm13
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#14,<r13=int6464#3
+# asm 2: addpd <t13=%xmm13,<r13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: t14 = ab9
+# asm 1: movdqa <ab9=int6464#10,>t14=int6464#14
+# asm 2: movdqa <ab9=%xmm9,>t14=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 t14 *= *(int128 *)(t2p + 80)
+# asm 1: mulpd 80(<t2p=int64#4),<t14=int6464#14
+# asm 2: mulpd 80(<t2p=%rcx),<t14=%xmm13
+mulpd 80(%rcx),%xmm13
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#14,<r14=int6464#4
+# asm 2: addpd <t14=%xmm13,<r14=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: t15 = ab9
+# asm 1: movdqa <ab9=int6464#10,>t15=int6464#14
+# asm 2: movdqa <ab9=%xmm9,>t15=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 t15 *= *(int128 *)(t2p + 96)
+# asm 1: mulpd 96(<t2p=int64#4),<t15=int6464#14
+# asm 2: mulpd 96(<t2p=%rcx),<t15=%xmm13
+mulpd 96(%rcx),%xmm13
+
+# qhasm: float6464 r15 +=t15
+# asm 1: addpd <t15=int6464#14,<r15=int6464#5
+# asm 2: addpd <t15=%xmm13,<r15=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: t19 = ab9
+# asm 1: movdqa <ab9=int6464#10,>t19=int6464#14
+# asm 2: movdqa <ab9=%xmm9,>t19=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 t19 *= *(int128 *)(t2p + 160)
+# asm 1: mulpd 160(<t2p=int64#4),<t19=int6464#14
+# asm 2: mulpd 160(<t2p=%rcx),<t19=%xmm13
+mulpd 160(%rcx),%xmm13
+
+# qhasm: float6464 r19 +=t19
+# asm 1: addpd <t19=int6464#14,<r19=int6464#9
+# asm 2: addpd <t19=%xmm13,<r19=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: r20 = ab9
+# asm 1: movdqa <ab9=int6464#10,>r20=int6464#10
+# asm 2: movdqa <ab9=%xmm9,>r20=%xmm9
+movdqa %xmm9,%xmm9
+
+# qhasm: float6464 r20 *= *(int128 *)(t2p + 176)
+# asm 1: mulpd 176(<t2p=int64#4),<r20=int6464#10
+# asm 2: mulpd 176(<t2p=%rcx),<r20=%xmm9
+mulpd 176(%rcx),%xmm9
+
+# qhasm: t10 = ab9six
+# asm 1: movdqa <ab9six=int6464#13,>t10=int6464#14
+# asm 2: movdqa <ab9six=%xmm12,>t10=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t10 *= *(int128 *)(t2p + 16)
+# asm 1: mulpd 16(<t2p=int64#4),<t10=int6464#14
+# asm 2: mulpd 16(<t2p=%rcx),<t10=%xmm13
+mulpd 16(%rcx),%xmm13
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#14,<r10=int6464#12
+# asm 2: addpd <t10=%xmm13,<r10=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: t11 = ab9six
+# asm 1: movdqa <ab9six=int6464#13,>t11=int6464#14
+# asm 2: movdqa <ab9six=%xmm12,>t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t11 *= *(int128 *)(t2p + 32)
+# asm 1: mulpd 32(<t2p=int64#4),<t11=int6464#14
+# asm 2: mulpd 32(<t2p=%rcx),<t11=%xmm13
+mulpd 32(%rcx),%xmm13
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#14,<r11=int6464#1
+# asm 2: addpd <t11=%xmm13,<r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: t12 = ab9six
+# asm 1: movdqa <ab9six=int6464#13,>t12=int6464#14
+# asm 2: movdqa <ab9six=%xmm12,>t12=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t12 *= *(int128 *)(t2p + 48)
+# asm 1: mulpd 48(<t2p=int64#4),<t12=int6464#14
+# asm 2: mulpd 48(<t2p=%rcx),<t12=%xmm13
+mulpd 48(%rcx),%xmm13
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#14,<r12=int6464#2
+# asm 2: addpd <t12=%xmm13,<r12=%xmm1
+addpd %xmm13,%xmm1
+
+# qhasm: t16 = ab9six
+# asm 1: movdqa <ab9six=int6464#13,>t16=int6464#14
+# asm 2: movdqa <ab9six=%xmm12,>t16=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t16 *= *(int128 *)(t2p + 112)
+# asm 1: mulpd 112(<t2p=int64#4),<t16=int6464#14
+# asm 2: mulpd 112(<t2p=%rcx),<t16=%xmm13
+mulpd 112(%rcx),%xmm13
+
+# qhasm: float6464 r16 +=t16
+# asm 1: addpd <t16=int6464#14,<r16=int6464#6
+# asm 2: addpd <t16=%xmm13,<r16=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: t17 = ab9six
+# asm 1: movdqa <ab9six=int6464#13,>t17=int6464#14
+# asm 2: movdqa <ab9six=%xmm12,>t17=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t17 *= *(int128 *)(t2p + 128)
+# asm 1: mulpd 128(<t2p=int64#4),<t17=int6464#14
+# asm 2: mulpd 128(<t2p=%rcx),<t17=%xmm13
+mulpd 128(%rcx),%xmm13
+
+# qhasm: float6464 r17 +=t17
+# asm 1: addpd <t17=int6464#14,<r17=int6464#7
+# asm 2: addpd <t17=%xmm13,<r17=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: t18 = ab9six
+# asm 1: movdqa <ab9six=int6464#13,>t18=int6464#13
+# asm 2: movdqa <ab9six=%xmm12,>t18=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 t18 *= *(int128 *)(t2p + 144)
+# asm 1: mulpd 144(<t2p=int64#4),<t18=int6464#13
+# asm 2: mulpd 144(<t2p=%rcx),<t18=%xmm12
+mulpd 144(%rcx),%xmm12
+
+# qhasm: float6464 r18 +=t18
+# asm 1: addpd <t18=int6464#13,<r18=int6464#8
+# asm 2: addpd <t18=%xmm12,<r18=%xmm7
+addpd %xmm12,%xmm7
+
+# qhasm: *(int128 *)(rp + 144) = r9
+# asm 1: movdqa <r9=int6464#11,144(<rp=int64#3)
+# asm 2: movdqa <r9=%xmm10,144(<rp=%rdx)
+movdqa %xmm10,144(%rdx)
+
+# qhasm: ab10 = *(int128 *)(t1p + 160)
+# asm 1: movdqa 160(<t1p=int64#2),>ab10=int6464#11
+# asm 2: movdqa 160(<t1p=%rsi),>ab10=%xmm10
+movdqa 160(%rsi),%xmm10
+
+# qhasm: ab10six = ab10
+# asm 1: movdqa <ab10=int6464#11,>ab10six=int6464#13
+# asm 2: movdqa <ab10=%xmm10,>ab10six=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm: float6464 ab10six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab10six=int6464#13
+# asm 2: mulpd SIX_SIX,<ab10six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: t10 = ab10
+# asm 1: movdqa <ab10=int6464#11,>t10=int6464#14
+# asm 2: movdqa <ab10=%xmm10,>t10=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 t10 *= *(int128 *)(t2p + 0)
+# asm 1: mulpd 0(<t2p=int64#4),<t10=int6464#14
+# asm 2: mulpd 0(<t2p=%rcx),<t10=%xmm13
+mulpd 0(%rcx),%xmm13
+
+# qhasm: float6464 r10 +=t10
+# asm 1: addpd <t10=int6464#14,<r10=int6464#12
+# asm 2: addpd <t10=%xmm13,<r10=%xmm11
+addpd %xmm13,%xmm11
+
+# qhasm: t13 = ab10
+# asm 1: movdqa <ab10=int6464#11,>t13=int6464#14
+# asm 2: movdqa <ab10=%xmm10,>t13=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 t13 *= *(int128 *)(t2p + 48)
+# asm 1: mulpd 48(<t2p=int64#4),<t13=int6464#14
+# asm 2: mulpd 48(<t2p=%rcx),<t13=%xmm13
+mulpd 48(%rcx),%xmm13
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#14,<r13=int6464#3
+# asm 2: addpd <t13=%xmm13,<r13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: t14 = ab10
+# asm 1: movdqa <ab10=int6464#11,>t14=int6464#14
+# asm 2: movdqa <ab10=%xmm10,>t14=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 t14 *= *(int128 *)(t2p + 64)
+# asm 1: mulpd 64(<t2p=int64#4),<t14=int6464#14
+# asm 2: mulpd 64(<t2p=%rcx),<t14=%xmm13
+mulpd 64(%rcx),%xmm13
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#14,<r14=int6464#4
+# asm 2: addpd <t14=%xmm13,<r14=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: t16 = ab10
+# asm 1: movdqa <ab10=int6464#11,>t16=int6464#14
+# asm 2: movdqa <ab10=%xmm10,>t16=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 t16 *= *(int128 *)(t2p + 96)
+# asm 1: mulpd 96(<t2p=int64#4),<t16=int6464#14
+# asm 2: mulpd 96(<t2p=%rcx),<t16=%xmm13
+mulpd 96(%rcx),%xmm13
+
+# qhasm: float6464 r16 +=t16
+# asm 1: addpd <t16=int6464#14,<r16=int6464#6
+# asm 2: addpd <t16=%xmm13,<r16=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: t15 = ab10
+# asm 1: movdqa <ab10=int6464#11,>t15=int6464#14
+# asm 2: movdqa <ab10=%xmm10,>t15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 t15 *= *(int128 *)(t2p + 80)
+# asm 1: mulpd 80(<t2p=int64#4),<t15=int6464#14
+# asm 2: mulpd 80(<t2p=%rcx),<t15=%xmm13
+mulpd 80(%rcx),%xmm13
+
+# qhasm: float6464 r15 +=t15
+# asm 1: addpd <t15=int6464#14,<r15=int6464#5
+# asm 2: addpd <t15=%xmm13,<r15=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: t19 = ab10
+# asm 1: movdqa <ab10=int6464#11,>t19=int6464#14
+# asm 2: movdqa <ab10=%xmm10,>t19=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 t19 *= *(int128 *)(t2p + 144)
+# asm 1: mulpd 144(<t2p=int64#4),<t19=int6464#14
+# asm 2: mulpd 144(<t2p=%rcx),<t19=%xmm13
+mulpd 144(%rcx),%xmm13
+
+# qhasm: float6464 r19 +=t19
+# asm 1: addpd <t19=int6464#14,<r19=int6464#9
+# asm 2: addpd <t19=%xmm13,<r19=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: t20 = ab10
+# asm 1: movdqa <ab10=int6464#11,>t20=int6464#14
+# asm 2: movdqa <ab10=%xmm10,>t20=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 t20 *= *(int128 *)(t2p + 160)
+# asm 1: mulpd 160(<t2p=int64#4),<t20=int6464#14
+# asm 2: mulpd 160(<t2p=%rcx),<t20=%xmm13
+mulpd 160(%rcx),%xmm13
+
+# qhasm: float6464 r20 +=t20
+# asm 1: addpd <t20=int6464#14,<r20=int6464#10
+# asm 2: addpd <t20=%xmm13,<r20=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: r21 = ab10
+# asm 1: movdqa <ab10=int6464#11,>r21=int6464#11
+# asm 2: movdqa <ab10=%xmm10,>r21=%xmm10
+movdqa %xmm10,%xmm10
+
+# qhasm: float6464 r21 *= *(int128 *)(t2p + 176)
+# asm 1: mulpd 176(<t2p=int64#4),<r21=int6464#11
+# asm 2: mulpd 176(<t2p=%rcx),<r21=%xmm10
+mulpd 176(%rcx),%xmm10
+
+# qhasm: t11 = ab10six
+# asm 1: movdqa <ab10six=int6464#13,>t11=int6464#14
+# asm 2: movdqa <ab10six=%xmm12,>t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t11 *= *(int128 *)(t2p + 16)
+# asm 1: mulpd 16(<t2p=int64#4),<t11=int6464#14
+# asm 2: mulpd 16(<t2p=%rcx),<t11=%xmm13
+mulpd 16(%rcx),%xmm13
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#14,<r11=int6464#1
+# asm 2: addpd <t11=%xmm13,<r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: t12 = ab10six
+# asm 1: movdqa <ab10six=int6464#13,>t12=int6464#14
+# asm 2: movdqa <ab10six=%xmm12,>t12=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t12 *= *(int128 *)(t2p + 32)
+# asm 1: mulpd 32(<t2p=int64#4),<t12=int6464#14
+# asm 2: mulpd 32(<t2p=%rcx),<t12=%xmm13
+mulpd 32(%rcx),%xmm13
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#14,<r12=int6464#2
+# asm 2: addpd <t12=%xmm13,<r12=%xmm1
+addpd %xmm13,%xmm1
+
+# qhasm: t17 = ab10six
+# asm 1: movdqa <ab10six=int6464#13,>t17=int6464#14
+# asm 2: movdqa <ab10six=%xmm12,>t17=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t17 *= *(int128 *)(t2p + 112)
+# asm 1: mulpd 112(<t2p=int64#4),<t17=int6464#14
+# asm 2: mulpd 112(<t2p=%rcx),<t17=%xmm13
+mulpd 112(%rcx),%xmm13
+
+# qhasm: float6464 r17 +=t17
+# asm 1: addpd <t17=int6464#14,<r17=int6464#7
+# asm 2: addpd <t17=%xmm13,<r17=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: t18 = ab10six
+# asm 1: movdqa <ab10six=int6464#13,>t18=int6464#13
+# asm 2: movdqa <ab10six=%xmm12,>t18=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 t18 *= *(int128 *)(t2p + 128)
+# asm 1: mulpd 128(<t2p=int64#4),<t18=int6464#13
+# asm 2: mulpd 128(<t2p=%rcx),<t18=%xmm12
+mulpd 128(%rcx),%xmm12
+
+# qhasm: float6464 r18 +=t18
+# asm 1: addpd <t18=int6464#13,<r18=int6464#8
+# asm 2: addpd <t18=%xmm12,<r18=%xmm7
+addpd %xmm12,%xmm7
+
+# qhasm: *(int128 *)(rp + 160) = r10
+# asm 1: movdqa <r10=int6464#12,160(<rp=int64#3)
+# asm 2: movdqa <r10=%xmm11,160(<rp=%rdx)
+movdqa %xmm11,160(%rdx)
+
+# qhasm: ab11 = *(int128 *)(t1p + 176)
+# asm 1: movdqa 176(<t1p=int64#2),>ab11=int6464#12
+# asm 2: movdqa 176(<t1p=%rsi),>ab11=%xmm11
+movdqa 176(%rsi),%xmm11
+
+# qhasm: ab11six = ab11
+# asm 1: movdqa <ab11=int6464#12,>ab11six=int6464#13
+# asm 2: movdqa <ab11=%xmm11,>ab11six=%xmm12
+movdqa %xmm11,%xmm12
+
+# qhasm: float6464 ab11six *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<ab11six=int6464#13
+# asm 2: mulpd SIX_SIX,<ab11six=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: t11 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t11=int6464#14
+# asm 2: movdqa <ab11=%xmm11,>t11=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 t11 *= *(int128 *)(t2p + 0)
+# asm 1: mulpd 0(<t2p=int64#4),<t11=int6464#14
+# asm 2: mulpd 0(<t2p=%rcx),<t11=%xmm13
+mulpd 0(%rcx),%xmm13
+
+# qhasm: float6464 r11 +=t11
+# asm 1: addpd <t11=int6464#14,<r11=int6464#1
+# asm 2: addpd <t11=%xmm13,<r11=%xmm0
+addpd %xmm13,%xmm0
+
+# qhasm: t13 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t13=int6464#14
+# asm 2: movdqa <ab11=%xmm11,>t13=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 t13 *= *(int128 *)(t2p + 32)
+# asm 1: mulpd 32(<t2p=int64#4),<t13=int6464#14
+# asm 2: mulpd 32(<t2p=%rcx),<t13=%xmm13
+mulpd 32(%rcx),%xmm13
+
+# qhasm: float6464 r13 +=t13
+# asm 1: addpd <t13=int6464#14,<r13=int6464#3
+# asm 2: addpd <t13=%xmm13,<r13=%xmm2
+addpd %xmm13,%xmm2
+
+# qhasm: t14 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t14=int6464#14
+# asm 2: movdqa <ab11=%xmm11,>t14=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 t14 *= *(int128 *)(t2p + 48)
+# asm 1: mulpd 48(<t2p=int64#4),<t14=int6464#14
+# asm 2: mulpd 48(<t2p=%rcx),<t14=%xmm13
+mulpd 48(%rcx),%xmm13
+
+# qhasm: float6464 r14 +=t14
+# asm 1: addpd <t14=int6464#14,<r14=int6464#4
+# asm 2: addpd <t14=%xmm13,<r14=%xmm3
+addpd %xmm13,%xmm3
+
+# qhasm: t15 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t15=int6464#14
+# asm 2: movdqa <ab11=%xmm11,>t15=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 t15 *= *(int128 *)(t2p + 64)
+# asm 1: mulpd 64(<t2p=int64#4),<t15=int6464#14
+# asm 2: mulpd 64(<t2p=%rcx),<t15=%xmm13
+mulpd 64(%rcx),%xmm13
+
+# qhasm: float6464 r15 +=t15
+# asm 1: addpd <t15=int6464#14,<r15=int6464#5
+# asm 2: addpd <t15=%xmm13,<r15=%xmm4
+addpd %xmm13,%xmm4
+
+# qhasm: t16 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t16=int6464#14
+# asm 2: movdqa <ab11=%xmm11,>t16=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 t16 *= *(int128 *)(t2p + 80)
+# asm 1: mulpd 80(<t2p=int64#4),<t16=int6464#14
+# asm 2: mulpd 80(<t2p=%rcx),<t16=%xmm13
+mulpd 80(%rcx),%xmm13
+
+# qhasm: float6464 r16 +=t16
+# asm 1: addpd <t16=int6464#14,<r16=int6464#6
+# asm 2: addpd <t16=%xmm13,<r16=%xmm5
+addpd %xmm13,%xmm5
+
+# qhasm: t17 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t17=int6464#14
+# asm 2: movdqa <ab11=%xmm11,>t17=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 t17 *= *(int128 *)(t2p + 96)
+# asm 1: mulpd 96(<t2p=int64#4),<t17=int6464#14
+# asm 2: mulpd 96(<t2p=%rcx),<t17=%xmm13
+mulpd 96(%rcx),%xmm13
+
+# qhasm: float6464 r17 +=t17
+# asm 1: addpd <t17=int6464#14,<r17=int6464#7
+# asm 2: addpd <t17=%xmm13,<r17=%xmm6
+addpd %xmm13,%xmm6
+
+# qhasm: t19 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t19=int6464#14
+# asm 2: movdqa <ab11=%xmm11,>t19=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 t19 *= *(int128 *)(t2p + 128)
+# asm 1: mulpd 128(<t2p=int64#4),<t19=int6464#14
+# asm 2: mulpd 128(<t2p=%rcx),<t19=%xmm13
+mulpd 128(%rcx),%xmm13
+
+# qhasm: float6464 r19 +=t19
+# asm 1: addpd <t19=int6464#14,<r19=int6464#9
+# asm 2: addpd <t19=%xmm13,<r19=%xmm8
+addpd %xmm13,%xmm8
+
+# qhasm: t20 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t20=int6464#14
+# asm 2: movdqa <ab11=%xmm11,>t20=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 t20 *= *(int128 *)(t2p + 144)
+# asm 1: mulpd 144(<t2p=int64#4),<t20=int6464#14
+# asm 2: mulpd 144(<t2p=%rcx),<t20=%xmm13
+mulpd 144(%rcx),%xmm13
+
+# qhasm: float6464 r20 +=t20
+# asm 1: addpd <t20=int6464#14,<r20=int6464#10
+# asm 2: addpd <t20=%xmm13,<r20=%xmm9
+addpd %xmm13,%xmm9
+
+# qhasm: t21 = ab11
+# asm 1: movdqa <ab11=int6464#12,>t21=int6464#14
+# asm 2: movdqa <ab11=%xmm11,>t21=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 t21 *= *(int128 *)(t2p + 160)
+# asm 1: mulpd 160(<t2p=int64#4),<t21=int6464#14
+# asm 2: mulpd 160(<t2p=%rcx),<t21=%xmm13
+mulpd 160(%rcx),%xmm13
+
+# qhasm: float6464 r21 +=t21
+# asm 1: addpd <t21=int6464#14,<r21=int6464#11
+# asm 2: addpd <t21=%xmm13,<r21=%xmm10
+addpd %xmm13,%xmm10
+
+# qhasm: r22 = ab11
+# asm 1: movdqa <ab11=int6464#12,>r22=int6464#12
+# asm 2: movdqa <ab11=%xmm11,>r22=%xmm11
+movdqa %xmm11,%xmm11
+
+# qhasm: float6464 r22 *= *(int128 *)(t2p + 176)
+# asm 1: mulpd 176(<t2p=int64#4),<r22=int6464#12
+# asm 2: mulpd 176(<t2p=%rcx),<r22=%xmm11
+mulpd 176(%rcx),%xmm11
+
+# qhasm: t12 = ab11six
+# asm 1: movdqa <ab11six=int6464#13,>t12=int6464#14
+# asm 2: movdqa <ab11six=%xmm12,>t12=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 t12 *= *(int128 *)(t2p + 16)
+# asm 1: mulpd 16(<t2p=int64#4),<t12=int6464#14
+# asm 2: mulpd 16(<t2p=%rcx),<t12=%xmm13
+mulpd 16(%rcx),%xmm13
+
+# qhasm: float6464 r12 +=t12
+# asm 1: addpd <t12=int6464#14,<r12=int6464#2
+# asm 2: addpd <t12=%xmm13,<r12=%xmm1
+addpd %xmm13,%xmm1
+
+# qhasm: t18 = ab11six
+# asm 1: movdqa <ab11six=int6464#13,>t18=int6464#13
+# asm 2: movdqa <ab11six=%xmm12,>t18=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 t18 *= *(int128 *)(t2p + 112)
+# asm 1: mulpd 112(<t2p=int64#4),<t18=int6464#13
+# asm 2: mulpd 112(<t2p=%rcx),<t18=%xmm12
+mulpd 112(%rcx),%xmm12
+
+# qhasm: float6464 r18 +=t18
+# asm 1: addpd <t18=int6464#13,<r18=int6464#8
+# asm 2: addpd <t18=%xmm12,<r18=%xmm7
+addpd %xmm12,%xmm7
+
+# qhasm: *(int128 *)(rp + 176) = r11
+# asm 1: movdqa <r11=int6464#1,176(<rp=int64#3)
+# asm 2: movdqa <r11=%xmm0,176(<rp=%rdx)
+movdqa %xmm0,176(%rdx)
+
+# qhasm: r0 = *(int128 *)(rp + 0)
+# asm 1: movdqa 0(<rp=int64#3),>r0=int6464#1
+# asm 2: movdqa 0(<rp=%rdx),>r0=%xmm0
+movdqa 0(%rdx),%xmm0
+
+# qhasm: float6464 r0 -= r12
+# asm 1: subpd <r12=int6464#2,<r0=int6464#1
+# asm 2: subpd <r12=%xmm1,<r0=%xmm0
+subpd %xmm1,%xmm0
+
+# qhasm: t15 = r15
+# asm 1: movdqa <r15=int6464#5,>t15=int6464#13
+# asm 2: movdqa <r15=%xmm4,>t15=%xmm12
+movdqa %xmm4,%xmm12
+
+# qhasm: float6464 t15 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t15=int6464#13
+# asm 2: mulpd SIX_SIX,<t15=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: float6464 r0 += t15
+# asm 1: addpd <t15=int6464#13,<r0=int6464#1
+# asm 2: addpd <t15=%xmm12,<r0=%xmm0
+addpd %xmm12,%xmm0
+
+# qhasm: t18 = r18
+# asm 1: movdqa <r18=int6464#8,>t18=int6464#13
+# asm 2: movdqa <r18=%xmm7,>t18=%xmm12
+movdqa %xmm7,%xmm12
+
+# qhasm: float6464 t18 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<t18=int6464#13
+# asm 2: mulpd TWO_TWO,<t18=%xmm12
+mulpd TWO_TWO,%xmm12
+
+# qhasm: float6464 r0 -= t18
+# asm 1: subpd <t18=int6464#13,<r0=int6464#1
+# asm 2: subpd <t18=%xmm12,<r0=%xmm0
+subpd %xmm12,%xmm0
+
+# qhasm: t21 = r21
+# asm 1: movdqa <r21=int6464#11,>t21=int6464#13
+# asm 2: movdqa <r21=%xmm10,>t21=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm: float6464 t21 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t21=int6464#13
+# asm 2: mulpd SIX_SIX,<t21=%xmm12
+mulpd SIX_SIX,%xmm12
+
+# qhasm: float6464 r0 -= t21
+# asm 1: subpd <t21=int6464#13,<r0=int6464#1
+# asm 2: subpd <t21=%xmm12,<r0=%xmm0
+subpd %xmm12,%xmm0
+
+# qhasm: r3 = *(int128 *)(rp + 48)
+# asm 1: movdqa 48(<rp=int64#3),>r3=int6464#13
+# asm 2: movdqa 48(<rp=%rdx),>r3=%xmm12
+movdqa 48(%rdx),%xmm12
+
+# qhasm: float6464 r3 -= r12
+# asm 1: subpd <r12=int6464#2,<r3=int6464#13
+# asm 2: subpd <r12=%xmm1,<r3=%xmm12
+subpd %xmm1,%xmm12
+
+# qhasm: t15 = r15
+# asm 1: movdqa <r15=int6464#5,>t15=int6464#14
+# asm 2: movdqa <r15=%xmm4,>t15=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 t15 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<t15=int6464#14
+# asm 2: mulpd FIVE_FIVE,<t15=%xmm13
+mulpd FIVE_FIVE,%xmm13
+
+# qhasm: float6464 r3 += t15
+# asm 1: addpd <t15=int6464#14,<r3=int6464#13
+# asm 2: addpd <t15=%xmm13,<r3=%xmm12
+addpd %xmm13,%xmm12
+
+# qhasm: float6464 r3 -= r18
+# asm 1: subpd <r18=int6464#8,<r3=int6464#13
+# asm 2: subpd <r18=%xmm7,<r3=%xmm12
+subpd %xmm7,%xmm12
+
+# qhasm: t21 = r21
+# asm 1: movdqa <r21=int6464#11,>t21=int6464#14
+# asm 2: movdqa <r21=%xmm10,>t21=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 t21 *= EIGHT_EIGHT
+# asm 1: mulpd EIGHT_EIGHT,<t21=int6464#14
+# asm 2: mulpd EIGHT_EIGHT,<t21=%xmm13
+mulpd EIGHT_EIGHT,%xmm13
+
+# qhasm: float6464 r3 -= t21
+# asm 1: subpd <t21=int6464#14,<r3=int6464#13
+# asm 2: subpd <t21=%xmm13,<r3=%xmm12
+subpd %xmm13,%xmm12
+
+# qhasm: r6 = *(int128 *)(rp + 96)
+# asm 1: movdqa 96(<rp=int64#3),>r6=int6464#14
+# asm 2: movdqa 96(<rp=%rdx),>r6=%xmm13
+movdqa 96(%rdx),%xmm13
+
+# qhasm: t12 = r12
+# asm 1: movdqa <r12=int6464#2,>t12=int6464#15
+# asm 2: movdqa <r12=%xmm1,>t12=%xmm14
+movdqa %xmm1,%xmm14
+
+# qhasm: float6464 t12 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<t12=int6464#15
+# asm 2: mulpd FOUR_FOUR,<t12=%xmm14
+mulpd FOUR_FOUR,%xmm14
+
+# qhasm: float6464 r6 -= t12
+# asm 1: subpd <t12=int6464#15,<r6=int6464#14
+# asm 2: subpd <t12=%xmm14,<r6=%xmm13
+subpd %xmm14,%xmm13
+
+# qhasm: t15 = r15
+# asm 1: movdqa <r15=int6464#5,>t15=int6464#15
+# asm 2: movdqa <r15=%xmm4,>t15=%xmm14
+movdqa %xmm4,%xmm14
+
+# qhasm: float6464 t15 *= EIGHTEEN_EIGHTEEN
+# asm 1: mulpd EIGHTEEN_EIGHTEEN,<t15=int6464#15
+# asm 2: mulpd EIGHTEEN_EIGHTEEN,<t15=%xmm14
+mulpd EIGHTEEN_EIGHTEEN,%xmm14
+
+# qhasm: float6464 r6 += t15
+# asm 1: addpd <t15=int6464#15,<r6=int6464#14
+# asm 2: addpd <t15=%xmm14,<r6=%xmm13
+addpd %xmm14,%xmm13
+
+# qhasm: t18 = r18
+# asm 1: movdqa <r18=int6464#8,>t18=int6464#15
+# asm 2: movdqa <r18=%xmm7,>t18=%xmm14
+movdqa %xmm7,%xmm14
+
+# qhasm: float6464 t18 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<t18=int6464#15
+# asm 2: mulpd THREE_THREE,<t18=%xmm14
+mulpd THREE_THREE,%xmm14
+
+# qhasm: float6464 r6 -= t18
+# asm 1: subpd <t18=int6464#15,<r6=int6464#14
+# asm 2: subpd <t18=%xmm14,<r6=%xmm13
+subpd %xmm14,%xmm13
+
+# qhasm: t21 = r21
+# asm 1: movdqa <r21=int6464#11,>t21=int6464#15
+# asm 2: movdqa <r21=%xmm10,>t21=%xmm14
+movdqa %xmm10,%xmm14
+
+# qhasm: float6464 t21 *= THIRTY_THIRTY
+# asm 1: mulpd THIRTY_THIRTY,<t21=int6464#15
+# asm 2: mulpd THIRTY_THIRTY,<t21=%xmm14
+mulpd THIRTY_THIRTY,%xmm14
+
+# qhasm: float6464 r6 -= t21
+# asm 1: subpd <t21=int6464#15,<r6=int6464#14
+# asm 2: subpd <t21=%xmm14,<r6=%xmm13
+subpd %xmm14,%xmm13
+
+# qhasm: r9 = *(int128 *)(rp + 144)
+# asm 1: movdqa 144(<rp=int64#3),>r9=int6464#15
+# asm 2: movdqa 144(<rp=%rdx),>r9=%xmm14
+movdqa 144(%rdx),%xmm14
+
+# qhasm: float6464 r9 -= r12
+# asm 1: subpd <r12=int6464#2,<r9=int6464#15
+# asm 2: subpd <r12=%xmm1,<r9=%xmm14
+subpd %xmm1,%xmm14
+
+# qhasm: t15 = r15
+# asm 1: movdqa <r15=int6464#5,>t15=int6464#2
+# asm 2: movdqa <r15=%xmm4,>t15=%xmm1
+movdqa %xmm4,%xmm1
+
+# qhasm: float6464 t15 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<t15=int6464#2
+# asm 2: mulpd TWO_TWO,<t15=%xmm1
+mulpd TWO_TWO,%xmm1
+
+# qhasm: float6464 r9 += t15
+# asm 1: addpd <t15=int6464#2,<r9=int6464#15
+# asm 2: addpd <t15=%xmm1,<r9=%xmm14
+addpd %xmm1,%xmm14
+
+# qhasm: float6464 r9 += r18
+# asm 1: addpd <r18=int6464#8,<r9=int6464#15
+# asm 2: addpd <r18=%xmm7,<r9=%xmm14
+addpd %xmm7,%xmm14
+
+# qhasm: t21 = r21
+# asm 1: movdqa <r21=int6464#11,>t21=int6464#2
+# asm 2: movdqa <r21=%xmm10,>t21=%xmm1
+movdqa %xmm10,%xmm1
+
+# qhasm: float6464 t21 *= NINE_NINE
+# asm 1: mulpd NINE_NINE,<t21=int6464#2
+# asm 2: mulpd NINE_NINE,<t21=%xmm1
+mulpd NINE_NINE,%xmm1
+
+# qhasm: float6464 r9 -= t21
+# asm 1: subpd <t21=int6464#2,<r9=int6464#15
+# asm 2: subpd <t21=%xmm1,<r9=%xmm14
+subpd %xmm1,%xmm14
+
+# qhasm: r1 = *(int128 *)(rp + 16)
+# asm 1: movdqa 16(<rp=int64#3),>r1=int6464#2
+# asm 2: movdqa 16(<rp=%rdx),>r1=%xmm1
+movdqa 16(%rdx),%xmm1
+
+# qhasm: float6464 r1 -= r13
+# asm 1: subpd <r13=int6464#3,<r1=int6464#2
+# asm 2: subpd <r13=%xmm2,<r1=%xmm1
+subpd %xmm2,%xmm1
+
+# qhasm: float6464 r1 += r16
+# asm 1: addpd <r16=int6464#6,<r1=int6464#2
+# asm 2: addpd <r16=%xmm5,<r1=%xmm1
+addpd %xmm5,%xmm1
+
+# qhasm: t19 = r19
+# asm 1: movdqa <r19=int6464#9,>t19=int6464#5
+# asm 2: movdqa <r19=%xmm8,>t19=%xmm4
+movdqa %xmm8,%xmm4
+
+# qhasm: float6464 t19 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<t19=int6464#5
+# asm 2: mulpd TWO_TWO,<t19=%xmm4
+mulpd TWO_TWO,%xmm4
+
+# qhasm: float6464 r1 -= t19
+# asm 1: subpd <t19=int6464#5,<r1=int6464#2
+# asm 2: subpd <t19=%xmm4,<r1=%xmm1
+subpd %xmm4,%xmm1
+
+# qhasm: float6464 r1 -= r22
+# asm 1: subpd <r22=int6464#12,<r1=int6464#2
+# asm 2: subpd <r22=%xmm11,<r1=%xmm1
+subpd %xmm11,%xmm1
+
+# qhasm: r4 = *(int128 *)(rp + 64)
+# asm 1: movdqa 64(<rp=int64#3),>r4=int6464#5
+# asm 2: movdqa 64(<rp=%rdx),>r4=%xmm4
+movdqa 64(%rdx),%xmm4
+
+# qhasm: t13 = r13
+# asm 1: movdqa <r13=int6464#3,>t13=int6464#8
+# asm 2: movdqa <r13=%xmm2,>t13=%xmm7
+movdqa %xmm2,%xmm7
+
+# qhasm: float6464 t13 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t13=int6464#8
+# asm 2: mulpd SIX_SIX,<t13=%xmm7
+mulpd SIX_SIX,%xmm7
+
+# qhasm: float6464 r4 -= t13
+# asm 1: subpd <t13=int6464#8,<r4=int6464#5
+# asm 2: subpd <t13=%xmm7,<r4=%xmm4
+subpd %xmm7,%xmm4
+
+# qhasm: t16 = r16
+# asm 1: movdqa <r16=int6464#6,>t16=int6464#8
+# asm 2: movdqa <r16=%xmm5,>t16=%xmm7
+movdqa %xmm5,%xmm7
+
+# qhasm: float6464 t16 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<t16=int6464#8
+# asm 2: mulpd FIVE_FIVE,<t16=%xmm7
+mulpd FIVE_FIVE,%xmm7
+
+# qhasm: float6464 r4 += t16
+# asm 1: addpd <t16=int6464#8,<r4=int6464#5
+# asm 2: addpd <t16=%xmm7,<r4=%xmm4
+addpd %xmm7,%xmm4
+
+# qhasm: t19 = r19
+# asm 1: movdqa <r19=int6464#9,>t19=int6464#8
+# asm 2: movdqa <r19=%xmm8,>t19=%xmm7
+movdqa %xmm8,%xmm7
+
+# qhasm: float6464 t19 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t19=int6464#8
+# asm 2: mulpd SIX_SIX,<t19=%xmm7
+mulpd SIX_SIX,%xmm7
+
+# qhasm: float6464 r4 -= t19
+# asm 1: subpd <t19=int6464#8,<r4=int6464#5
+# asm 2: subpd <t19=%xmm7,<r4=%xmm4
+subpd %xmm7,%xmm4
+
+# qhasm: t22 = r22
+# asm 1: movdqa <r22=int6464#12,>t22=int6464#8
+# asm 2: movdqa <r22=%xmm11,>t22=%xmm7
+movdqa %xmm11,%xmm7
+
+# qhasm: float6464 t22 *= EIGHT_EIGHT
+# asm 1: mulpd EIGHT_EIGHT,<t22=int6464#8
+# asm 2: mulpd EIGHT_EIGHT,<t22=%xmm7
+mulpd EIGHT_EIGHT,%xmm7
+
+# qhasm: float6464 r4 -= t22
+# asm 1: subpd <t22=int6464#8,<r4=int6464#5
+# asm 2: subpd <t22=%xmm7,<r4=%xmm4
+subpd %xmm7,%xmm4
+
+# qhasm: r7 = *(int128 *)(rp + 112)
+# asm 1: movdqa 112(<rp=int64#3),>r7=int6464#8
+# asm 2: movdqa 112(<rp=%rdx),>r7=%xmm7
+movdqa 112(%rdx),%xmm7
+
+# qhasm: t13 = r13
+# asm 1: movdqa <r13=int6464#3,>t13=int6464#11
+# asm 2: movdqa <r13=%xmm2,>t13=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm: float6464 t13 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<t13=int6464#11
+# asm 2: mulpd FOUR_FOUR,<t13=%xmm10
+mulpd FOUR_FOUR,%xmm10
+
+# qhasm: float6464 r7 -= t13
+# asm 1: subpd <t13=int6464#11,<r7=int6464#8
+# asm 2: subpd <t13=%xmm10,<r7=%xmm7
+subpd %xmm10,%xmm7
+
+# qhasm: t16 = r16
+# asm 1: movdqa <r16=int6464#6,>t16=int6464#11
+# asm 2: movdqa <r16=%xmm5,>t16=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm: float6464 t16 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<t16=int6464#11
+# asm 2: mulpd THREE_THREE,<t16=%xmm10
+mulpd THREE_THREE,%xmm10
+
+# qhasm: float6464 r7 += t16
+# asm 1: addpd <t16=int6464#11,<r7=int6464#8
+# asm 2: addpd <t16=%xmm10,<r7=%xmm7
+addpd %xmm10,%xmm7
+
+# qhasm: t19 = r19
+# asm 1: movdqa <r19=int6464#9,>t19=int6464#11
+# asm 2: movdqa <r19=%xmm8,>t19=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm: float6464 t19 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<t19=int6464#11
+# asm 2: mulpd THREE_THREE,<t19=%xmm10
+mulpd THREE_THREE,%xmm10
+
+# qhasm: float6464 r7 -= t19
+# asm 1: subpd <t19=int6464#11,<r7=int6464#8
+# asm 2: subpd <t19=%xmm10,<r7=%xmm7
+subpd %xmm10,%xmm7
+
+# qhasm: t22 = r22
+# asm 1: movdqa <r22=int6464#12,>t22=int6464#11
+# asm 2: movdqa <r22=%xmm11,>t22=%xmm10
+movdqa %xmm11,%xmm10
+
+# qhasm: float6464 t22 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<t22=int6464#11
+# asm 2: mulpd FIVE_FIVE,<t22=%xmm10
+mulpd FIVE_FIVE,%xmm10
+
+# qhasm: float6464 r7 -= t22
+# asm 1: subpd <t22=int6464#11,<r7=int6464#8
+# asm 2: subpd <t22=%xmm10,<r7=%xmm7
+subpd %xmm10,%xmm7
+
+# qhasm: r10 = *(int128 *)(rp + 160)
+# asm 1: movdqa 160(<rp=int64#3),>r10=int6464#11
+# asm 2: movdqa 160(<rp=%rdx),>r10=%xmm10
+movdqa 160(%rdx),%xmm10
+
+# qhasm: t13 = r13
+# asm 1: movdqa <r13=int6464#3,>t13=int6464#3
+# asm 2: movdqa <r13=%xmm2,>t13=%xmm2
+movdqa %xmm2,%xmm2
+
+# qhasm: float6464 t13 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t13=int6464#3
+# asm 2: mulpd SIX_SIX,<t13=%xmm2
+mulpd SIX_SIX,%xmm2
+
+# qhasm: float6464 r10 -= t13
+# asm 1: subpd <t13=int6464#3,<r10=int6464#11
+# asm 2: subpd <t13=%xmm2,<r10=%xmm10
+subpd %xmm2,%xmm10
+
+# qhasm: t16 = r16
+# asm 1: movdqa <r16=int6464#6,>t16=int6464#3
+# asm 2: movdqa <r16=%xmm5,>t16=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm: float6464 t16 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<t16=int6464#3
+# asm 2: mulpd TWO_TWO,<t16=%xmm2
+mulpd TWO_TWO,%xmm2
+
+# qhasm: float6464 r10 += t16
+# asm 1: addpd <t16=int6464#3,<r10=int6464#11
+# asm 2: addpd <t16=%xmm2,<r10=%xmm10
+addpd %xmm2,%xmm10
+
+# qhasm: t19 = r19
+# asm 1: movdqa <r19=int6464#9,>t19=int6464#3
+# asm 2: movdqa <r19=%xmm8,>t19=%xmm2
+movdqa %xmm8,%xmm2
+
+# qhasm: float6464 t19 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t19=int6464#3
+# asm 2: mulpd SIX_SIX,<t19=%xmm2
+mulpd SIX_SIX,%xmm2
+
+# qhasm: float6464 r10 += t19
+# asm 1: addpd <t19=int6464#3,<r10=int6464#11
+# asm 2: addpd <t19=%xmm2,<r10=%xmm10
+addpd %xmm2,%xmm10
+
+# qhasm: t22 = r22
+# asm 1: movdqa <r22=int6464#12,>t22=int6464#3
+# asm 2: movdqa <r22=%xmm11,>t22=%xmm2
+movdqa %xmm11,%xmm2
+
+# qhasm: float6464 t22 *= NINE_NINE
+# asm 1: mulpd NINE_NINE,<t22=int6464#3
+# asm 2: mulpd NINE_NINE,<t22=%xmm2
+mulpd NINE_NINE,%xmm2
+
+# qhasm: float6464 r10 -= t22
+# asm 1: subpd <t22=int6464#3,<r10=int6464#11
+# asm 2: subpd <t22=%xmm2,<r10=%xmm10
+subpd %xmm2,%xmm10
+
+# qhasm: r2 = *(int128 *)(rp + 32)
+# asm 1: movdqa 32(<rp=int64#3),>r2=int6464#3
+# asm 2: movdqa 32(<rp=%rdx),>r2=%xmm2
+movdqa 32(%rdx),%xmm2
+
+# qhasm: float6464 r2 -= r14
+# asm 1: subpd <r14=int6464#4,<r2=int6464#3
+# asm 2: subpd <r14=%xmm3,<r2=%xmm2
+subpd %xmm3,%xmm2
+
+# qhasm: float6464 r2 += r17
+# asm 1: addpd <r17=int6464#7,<r2=int6464#3
+# asm 2: addpd <r17=%xmm6,<r2=%xmm2
+addpd %xmm6,%xmm2
+
+# qhasm: t20 = r20
+# asm 1: movdqa <r20=int6464#10,>t20=int6464#6
+# asm 2: movdqa <r20=%xmm9,>t20=%xmm5
+movdqa %xmm9,%xmm5
+
+# qhasm: float6464 t20 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<t20=int6464#6
+# asm 2: mulpd TWO_TWO,<t20=%xmm5
+mulpd TWO_TWO,%xmm5
+
+# qhasm: float6464 r2 -= t20
+# asm 1: subpd <t20=int6464#6,<r2=int6464#3
+# asm 2: subpd <t20=%xmm5,<r2=%xmm2
+subpd %xmm5,%xmm2
+
+# qhasm: r5 = *(int128 *)(rp + 80)
+# asm 1: movdqa 80(<rp=int64#3),>r5=int6464#6
+# asm 2: movdqa 80(<rp=%rdx),>r5=%xmm5
+movdqa 80(%rdx),%xmm5
+
+# qhasm: t14 = r14
+# asm 1: movdqa <r14=int6464#4,>t14=int6464#9
+# asm 2: movdqa <r14=%xmm3,>t14=%xmm8
+movdqa %xmm3,%xmm8
+
+# qhasm: float6464 t14 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t14=int6464#9
+# asm 2: mulpd SIX_SIX,<t14=%xmm8
+mulpd SIX_SIX,%xmm8
+
+# qhasm: float6464 r5 -= t14
+# asm 1: subpd <t14=int6464#9,<r5=int6464#6
+# asm 2: subpd <t14=%xmm8,<r5=%xmm5
+subpd %xmm8,%xmm5
+
+# qhasm: t17 = r17
+# asm 1: movdqa <r17=int6464#7,>t17=int6464#9
+# asm 2: movdqa <r17=%xmm6,>t17=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm: float6464 t17 *= FIVE_FIVE
+# asm 1: mulpd FIVE_FIVE,<t17=int6464#9
+# asm 2: mulpd FIVE_FIVE,<t17=%xmm8
+mulpd FIVE_FIVE,%xmm8
+
+# qhasm: float6464 r5 += t17
+# asm 1: addpd <t17=int6464#9,<r5=int6464#6
+# asm 2: addpd <t17=%xmm8,<r5=%xmm5
+addpd %xmm8,%xmm5
+
+# qhasm: t20 = r20
+# asm 1: movdqa <r20=int6464#10,>t20=int6464#9
+# asm 2: movdqa <r20=%xmm9,>t20=%xmm8
+movdqa %xmm9,%xmm8
+
+# qhasm: float6464 t20 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t20=int6464#9
+# asm 2: mulpd SIX_SIX,<t20=%xmm8
+mulpd SIX_SIX,%xmm8
+
+# qhasm: float6464 r5 -= t20
+# asm 1: subpd <t20=int6464#9,<r5=int6464#6
+# asm 2: subpd <t20=%xmm8,<r5=%xmm5
+subpd %xmm8,%xmm5
+
+# qhasm: r8 = *(int128 *)(rp + 128)
+# asm 1: movdqa 128(<rp=int64#3),>r8=int6464#9
+# asm 2: movdqa 128(<rp=%rdx),>r8=%xmm8
+movdqa 128(%rdx),%xmm8
+
+# qhasm: t14 = r14
+# asm 1: movdqa <r14=int6464#4,>t14=int6464#12
+# asm 2: movdqa <r14=%xmm3,>t14=%xmm11
+movdqa %xmm3,%xmm11
+
+# qhasm: float6464 t14 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<t14=int6464#12
+# asm 2: mulpd FOUR_FOUR,<t14=%xmm11
+mulpd FOUR_FOUR,%xmm11
+
+# qhasm: float6464 r8 -= t14
+# asm 1: subpd <t14=int6464#12,<r8=int6464#9
+# asm 2: subpd <t14=%xmm11,<r8=%xmm8
+subpd %xmm11,%xmm8
+
+# qhasm: t17 = r17
+# asm 1: movdqa <r17=int6464#7,>t17=int6464#12
+# asm 2: movdqa <r17=%xmm6,>t17=%xmm11
+movdqa %xmm6,%xmm11
+
+# qhasm: float6464 t17 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<t17=int6464#12
+# asm 2: mulpd THREE_THREE,<t17=%xmm11
+mulpd THREE_THREE,%xmm11
+
+# qhasm: float6464 r8 += t17
+# asm 1: addpd <t17=int6464#12,<r8=int6464#9
+# asm 2: addpd <t17=%xmm11,<r8=%xmm8
+addpd %xmm11,%xmm8
+
+# qhasm: t20 = r20
+# asm 1: movdqa <r20=int6464#10,>t20=int6464#12
+# asm 2: movdqa <r20=%xmm9,>t20=%xmm11
+movdqa %xmm9,%xmm11
+
+# qhasm: float6464 t20 *= THREE_THREE
+# asm 1: mulpd THREE_THREE,<t20=int6464#12
+# asm 2: mulpd THREE_THREE,<t20=%xmm11
+mulpd THREE_THREE,%xmm11
+
+# qhasm: float6464 r8 -= t20
+# asm 1: subpd <t20=int6464#12,<r8=int6464#9
+# asm 2: subpd <t20=%xmm11,<r8=%xmm8
+subpd %xmm11,%xmm8
+
+# qhasm: r11 = *(int128 *)(rp + 176)
+# asm 1: movdqa 176(<rp=int64#3),>r11=int6464#12
+# asm 2: movdqa 176(<rp=%rdx),>r11=%xmm11
+movdqa 176(%rdx),%xmm11
+
+# qhasm: t14 = r14
+# asm 1: movdqa <r14=int6464#4,>t14=int6464#4
+# asm 2: movdqa <r14=%xmm3,>t14=%xmm3
+movdqa %xmm3,%xmm3
+
+# qhasm: float6464 t14 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t14=int6464#4
+# asm 2: mulpd SIX_SIX,<t14=%xmm3
+mulpd SIX_SIX,%xmm3
+
+# qhasm: float6464 r11 -= t14
+# asm 1: subpd <t14=int6464#4,<r11=int6464#12
+# asm 2: subpd <t14=%xmm3,<r11=%xmm11
+subpd %xmm3,%xmm11
+
+# qhasm: t17 = r17
+# asm 1: movdqa <r17=int6464#7,>t17=int6464#4
+# asm 2: movdqa <r17=%xmm6,>t17=%xmm3
+movdqa %xmm6,%xmm3
+
+# qhasm: float6464 t17 *= TWO_TWO
+# asm 1: mulpd TWO_TWO,<t17=int6464#4
+# asm 2: mulpd TWO_TWO,<t17=%xmm3
+mulpd TWO_TWO,%xmm3
+
+# qhasm: float6464 r11 += t17
+# asm 1: addpd <t17=int6464#4,<r11=int6464#12
+# asm 2: addpd <t17=%xmm3,<r11=%xmm11
+addpd %xmm3,%xmm11
+
+# qhasm: t20 = r20
+# asm 1: movdqa <r20=int6464#10,>t20=int6464#4
+# asm 2: movdqa <r20=%xmm9,>t20=%xmm3
+movdqa %xmm9,%xmm3
+
+# qhasm: float6464 t20 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<t20=int6464#4
+# asm 2: mulpd SIX_SIX,<t20=%xmm3
+mulpd SIX_SIX,%xmm3
+
+# qhasm: float6464 r11 += t20
+# asm 1: addpd <t20=int6464#4,<r11=int6464#12
+# asm 2: addpd <t20=%xmm3,<r11=%xmm11
+addpd %xmm3,%xmm11
+
+# qhasm: round = ROUND_ROUND
+# asm 1: movdqa ROUND_ROUND,<round=int6464#4
+# asm 2: movdqa ROUND_ROUND,<round=%xmm3
+movdqa ROUND_ROUND,%xmm3
+
+# qhasm: carry = r1
+# asm 1: movdqa <r1=int6464#2,>carry=int6464#7
+# asm 2: movdqa <r1=%xmm1,>carry=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r2 += carry
+# asm 1: addpd <carry=int6464#7,<r2=int6464#3
+# asm 2: addpd <carry=%xmm6,<r2=%xmm2
+addpd %xmm6,%xmm2
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r1 -= carry
+# asm 1: subpd <carry=int6464#7,<r1=int6464#2
+# asm 2: subpd <carry=%xmm6,<r1=%xmm1
+subpd %xmm6,%xmm1
+
+# qhasm: carry = r4
+# asm 1: movdqa <r4=int6464#5,>carry=int6464#7
+# asm 2: movdqa <r4=%xmm4,>carry=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r5 += carry
+# asm 1: addpd <carry=int6464#7,<r5=int6464#6
+# asm 2: addpd <carry=%xmm6,<r5=%xmm5
+addpd %xmm6,%xmm5
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r4 -= carry
+# asm 1: subpd <carry=int6464#7,<r4=int6464#5
+# asm 2: subpd <carry=%xmm6,<r4=%xmm4
+subpd %xmm6,%xmm4
+
+# qhasm: carry = r7
+# asm 1: movdqa <r7=int6464#8,>carry=int6464#7
+# asm 2: movdqa <r7=%xmm7,>carry=%xmm6
+movdqa %xmm7,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r8 += carry
+# asm 1: addpd <carry=int6464#7,<r8=int6464#9
+# asm 2: addpd <carry=%xmm6,<r8=%xmm8
+addpd %xmm6,%xmm8
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r7 -= carry
+# asm 1: subpd <carry=int6464#7,<r7=int6464#8
+# asm 2: subpd <carry=%xmm6,<r7=%xmm7
+subpd %xmm6,%xmm7
+
+# qhasm: carry = r10
+# asm 1: movdqa <r10=int6464#11,>carry=int6464#7
+# asm 2: movdqa <r10=%xmm10,>carry=%xmm6
+movdqa %xmm10,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r11 += carry
+# asm 1: addpd <carry=int6464#7,<r11=int6464#12
+# asm 2: addpd <carry=%xmm6,<r11=%xmm11
+addpd %xmm6,%xmm11
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r10 -= carry
+# asm 1: subpd <carry=int6464#7,<r10=int6464#11
+# asm 2: subpd <carry=%xmm6,<r10=%xmm10
+subpd %xmm6,%xmm10
+
+# qhasm: carry = r2
+# asm 1: movdqa <r2=int6464#3,>carry=int6464#7
+# asm 2: movdqa <r2=%xmm2,>carry=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r3 += carry
+# asm 1: addpd <carry=int6464#7,<r3=int6464#13
+# asm 2: addpd <carry=%xmm6,<r3=%xmm12
+addpd %xmm6,%xmm12
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r2 -= carry
+# asm 1: subpd <carry=int6464#7,<r2=int6464#3
+# asm 2: subpd <carry=%xmm6,<r2=%xmm2
+subpd %xmm6,%xmm2
+
+# qhasm: carry = r5
+# asm 1: movdqa <r5=int6464#6,>carry=int6464#7
+# asm 2: movdqa <r5=%xmm5,>carry=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r6 += carry
+# asm 1: addpd <carry=int6464#7,<r6=int6464#14
+# asm 2: addpd <carry=%xmm6,<r6=%xmm13
+addpd %xmm6,%xmm13
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r5 -= carry
+# asm 1: subpd <carry=int6464#7,<r5=int6464#6
+# asm 2: subpd <carry=%xmm6,<r5=%xmm5
+subpd %xmm6,%xmm5
+
+# qhasm: carry = r8
+# asm 1: movdqa <r8=int6464#9,>carry=int6464#7
+# asm 2: movdqa <r8=%xmm8,>carry=%xmm6
+movdqa %xmm8,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r9 += carry
+# asm 1: addpd <carry=int6464#7,<r9=int6464#15
+# asm 2: addpd <carry=%xmm6,<r9=%xmm14
+addpd %xmm6,%xmm14
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r8 -= carry
+# asm 1: subpd <carry=int6464#7,<r8=int6464#9
+# asm 2: subpd <carry=%xmm6,<r8=%xmm8
+subpd %xmm6,%xmm8
+
+# qhasm: carry = r11
+# asm 1: movdqa <r11=int6464#12,>carry=int6464#7
+# asm 2: movdqa <r11=%xmm11,>carry=%xmm6
+movdqa %xmm11,%xmm6
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#7
+# asm 2: mulpd VINV_VINV,<carry=%xmm6
+mulpd VINV_VINV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r0 -= carry
+# asm 1: subpd <carry=int6464#7,<r0=int6464#1
+# asm 2: subpd <carry=%xmm6,<r0=%xmm0
+subpd %xmm6,%xmm0
+
+# qhasm: float6464 r3 -= carry
+# asm 1: subpd <carry=int6464#7,<r3=int6464#13
+# asm 2: subpd <carry=%xmm6,<r3=%xmm12
+subpd %xmm6,%xmm12
+
+# qhasm: 2t6 = carry
+# asm 1: movdqa <carry=int6464#7,>2t6=int6464#10
+# asm 2: movdqa <carry=%xmm6,>2t6=%xmm9
+movdqa %xmm6,%xmm9
+
+# qhasm: float6464 2t6 *= FOUR_FOUR
+# asm 1: mulpd FOUR_FOUR,<2t6=int6464#10
+# asm 2: mulpd FOUR_FOUR,<2t6=%xmm9
+mulpd FOUR_FOUR,%xmm9
+
+# qhasm: float6464 r6 -= 2t6
+# asm 1: subpd <2t6=int6464#10,<r6=int6464#14
+# asm 2: subpd <2t6=%xmm9,<r6=%xmm13
+subpd %xmm9,%xmm13
+
+# qhasm: float6464 r9 -= carry
+# asm 1: subpd <carry=int6464#7,<r9=int6464#15
+# asm 2: subpd <carry=%xmm6,<r9=%xmm14
+subpd %xmm6,%xmm14
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#7
+# asm 2: mulpd V_V,<carry=%xmm6
+mulpd V_V,%xmm6
+
+# qhasm: float6464 r11 -= carry
+# asm 1: subpd <carry=int6464#7,<r11=int6464#12
+# asm 2: subpd <carry=%xmm6,<r11=%xmm11
+subpd %xmm6,%xmm11
+
+# qhasm: carry = r0
+# asm 1: movdqa <r0=int6464#1,>carry=int6464#7
+# asm 2: movdqa <r0=%xmm0,>carry=%xmm6
+movdqa %xmm0,%xmm6
+
+# qhasm: float6464 carry *= V6INV_V6INV
+# asm 1: mulpd V6INV_V6INV,<carry=int6464#7
+# asm 2: mulpd V6INV_V6INV,<carry=%xmm6
+mulpd V6INV_V6INV,%xmm6
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#7
+# asm 2: addpd <round=%xmm3,<carry=%xmm6
+addpd %xmm3,%xmm6
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#7
+# asm 2: subpd <round=%xmm3,<carry=%xmm6
+subpd %xmm3,%xmm6
+
+# qhasm: float6464 r1 += carry
+# asm 1: addpd <carry=int6464#7,<r1=int6464#2
+# asm 2: addpd <carry=%xmm6,<r1=%xmm1
+addpd %xmm6,%xmm1
+
+# qhasm: float6464 carry *= V6_V6
+# asm 1: mulpd V6_V6,<carry=int6464#7
+# asm 2: mulpd V6_V6,<carry=%xmm6
+mulpd V6_V6,%xmm6
+
+# qhasm: float6464 r0 -= carry
+# asm 1: subpd <carry=int6464#7,<r0=int6464#1
+# asm 2: subpd <carry=%xmm6,<r0=%xmm0
+subpd %xmm6,%xmm0
+
+# qhasm: *(int128 *)(rop +   0) =  r0
+# asm 1: movdqa <r0=int6464#1,0(<rop=int64#1)
+# asm 2: movdqa <r0=%xmm0,0(<rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: carry = r3
+# asm 1: movdqa <r3=int6464#13,>carry=int6464#1
+# asm 2: movdqa <r3=%xmm12,>carry=%xmm0
+movdqa %xmm12,%xmm0
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#1
+# asm 2: mulpd VINV_VINV,<carry=%xmm0
+mulpd VINV_VINV,%xmm0
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#1
+# asm 2: addpd <round=%xmm3,<carry=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#1
+# asm 2: subpd <round=%xmm3,<carry=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 r4 += carry
+# asm 1: addpd <carry=int6464#1,<r4=int6464#5
+# asm 2: addpd <carry=%xmm0,<r4=%xmm4
+addpd %xmm0,%xmm4
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#1
+# asm 2: mulpd V_V,<carry=%xmm0
+mulpd V_V,%xmm0
+
+# qhasm: float6464 r3 -= carry
+# asm 1: subpd <carry=int6464#1,<r3=int6464#13
+# asm 2: subpd <carry=%xmm0,<r3=%xmm12
+subpd %xmm0,%xmm12
+
+# qhasm: *(int128 *)(rop +  48) =  r3
+# asm 1: movdqa <r3=int6464#13,48(<rop=int64#1)
+# asm 2: movdqa <r3=%xmm12,48(<rop=%rdi)
+movdqa %xmm12,48(%rdi)
+
+# qhasm: carry = r6
+# asm 1: movdqa <r6=int6464#14,>carry=int6464#1
+# asm 2: movdqa <r6=%xmm13,>carry=%xmm0
+movdqa %xmm13,%xmm0
+
+# qhasm: float6464 carry *= V6INV_V6INV
+# asm 1: mulpd V6INV_V6INV,<carry=int6464#1
+# asm 2: mulpd V6INV_V6INV,<carry=%xmm0
+mulpd V6INV_V6INV,%xmm0
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#1
+# asm 2: addpd <round=%xmm3,<carry=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#1
+# asm 2: subpd <round=%xmm3,<carry=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 r7 += carry
+# asm 1: addpd <carry=int6464#1,<r7=int6464#8
+# asm 2: addpd <carry=%xmm0,<r7=%xmm7
+addpd %xmm0,%xmm7
+
+# qhasm: float6464 carry *= V6_V6
+# asm 1: mulpd V6_V6,<carry=int6464#1
+# asm 2: mulpd V6_V6,<carry=%xmm0
+mulpd V6_V6,%xmm0
+
+# qhasm: float6464 r6 -= carry
+# asm 1: subpd <carry=int6464#1,<r6=int6464#14
+# asm 2: subpd <carry=%xmm0,<r6=%xmm13
+subpd %xmm0,%xmm13
+
+# qhasm: *(int128 *)(rop +  96) =  r6
+# asm 1: movdqa <r6=int6464#14,96(<rop=int64#1)
+# asm 2: movdqa <r6=%xmm13,96(<rop=%rdi)
+movdqa %xmm13,96(%rdi)
+
+# qhasm: carry = r9
+# asm 1: movdqa <r9=int6464#15,>carry=int6464#1
+# asm 2: movdqa <r9=%xmm14,>carry=%xmm0
+movdqa %xmm14,%xmm0
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#1
+# asm 2: mulpd VINV_VINV,<carry=%xmm0
+mulpd VINV_VINV,%xmm0
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#1
+# asm 2: addpd <round=%xmm3,<carry=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#1
+# asm 2: subpd <round=%xmm3,<carry=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 r10 += carry
+# asm 1: addpd <carry=int6464#1,<r10=int6464#11
+# asm 2: addpd <carry=%xmm0,<r10=%xmm10
+addpd %xmm0,%xmm10
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#1
+# asm 2: mulpd V_V,<carry=%xmm0
+mulpd V_V,%xmm0
+
+# qhasm: float6464 r9 -= carry
+# asm 1: subpd <carry=int6464#1,<r9=int6464#15
+# asm 2: subpd <carry=%xmm0,<r9=%xmm14
+subpd %xmm0,%xmm14
+
+# qhasm: *(int128 *)(rop + 144) =  r9
+# asm 1: movdqa <r9=int6464#15,144(<rop=int64#1)
+# asm 2: movdqa <r9=%xmm14,144(<rop=%rdi)
+movdqa %xmm14,144(%rdi)
+
+# qhasm: carry = r1
+# asm 1: movdqa <r1=int6464#2,>carry=int6464#1
+# asm 2: movdqa <r1=%xmm1,>carry=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#1
+# asm 2: mulpd VINV_VINV,<carry=%xmm0
+mulpd VINV_VINV,%xmm0
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#1
+# asm 2: addpd <round=%xmm3,<carry=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#1
+# asm 2: subpd <round=%xmm3,<carry=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 r2 += carry
+# asm 1: addpd <carry=int6464#1,<r2=int6464#3
+# asm 2: addpd <carry=%xmm0,<r2=%xmm2
+addpd %xmm0,%xmm2
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#1
+# asm 2: mulpd V_V,<carry=%xmm0
+mulpd V_V,%xmm0
+
+# qhasm: float6464 r1 -= carry
+# asm 1: subpd <carry=int6464#1,<r1=int6464#2
+# asm 2: subpd <carry=%xmm0,<r1=%xmm1
+subpd %xmm0,%xmm1
+
+# qhasm: *(int128 *)(rop +  16) =  r1
+# asm 1: movdqa <r1=int6464#2,16(<rop=int64#1)
+# asm 2: movdqa <r1=%xmm1,16(<rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(rop +  32) =  r2
+# asm 1: movdqa <r2=int6464#3,32(<rop=int64#1)
+# asm 2: movdqa <r2=%xmm2,32(<rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: carry = r4
+# asm 1: movdqa <r4=int6464#5,>carry=int6464#1
+# asm 2: movdqa <r4=%xmm4,>carry=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#1
+# asm 2: mulpd VINV_VINV,<carry=%xmm0
+mulpd VINV_VINV,%xmm0
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#1
+# asm 2: addpd <round=%xmm3,<carry=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#1
+# asm 2: subpd <round=%xmm3,<carry=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 r5 += carry
+# asm 1: addpd <carry=int6464#1,<r5=int6464#6
+# asm 2: addpd <carry=%xmm0,<r5=%xmm5
+addpd %xmm0,%xmm5
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#1
+# asm 2: mulpd V_V,<carry=%xmm0
+mulpd V_V,%xmm0
+
+# qhasm: float6464 r4 -= carry
+# asm 1: subpd <carry=int6464#1,<r4=int6464#5
+# asm 2: subpd <carry=%xmm0,<r4=%xmm4
+subpd %xmm0,%xmm4
+
+# qhasm: *(int128 *)(rop +  64) =  r4
+# asm 1: movdqa <r4=int6464#5,64(<rop=int64#1)
+# asm 2: movdqa <r4=%xmm4,64(<rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(rop +  80) =  r5
+# asm 1: movdqa <r5=int6464#6,80(<rop=int64#1)
+# asm 2: movdqa <r5=%xmm5,80(<rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: carry = r7
+# asm 1: movdqa <r7=int6464#8,>carry=int6464#1
+# asm 2: movdqa <r7=%xmm7,>carry=%xmm0
+movdqa %xmm7,%xmm0
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#1
+# asm 2: mulpd VINV_VINV,<carry=%xmm0
+mulpd VINV_VINV,%xmm0
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#1
+# asm 2: addpd <round=%xmm3,<carry=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#1
+# asm 2: subpd <round=%xmm3,<carry=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 r8 += carry
+# asm 1: addpd <carry=int6464#1,<r8=int6464#9
+# asm 2: addpd <carry=%xmm0,<r8=%xmm8
+addpd %xmm0,%xmm8
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#1
+# asm 2: mulpd V_V,<carry=%xmm0
+mulpd V_V,%xmm0
+
+# qhasm: float6464 r7 -= carry
+# asm 1: subpd <carry=int6464#1,<r7=int6464#8
+# asm 2: subpd <carry=%xmm0,<r7=%xmm7
+subpd %xmm0,%xmm7
+
+# qhasm: *(int128 *)(rop + 112) =  r7
+# asm 1: movdqa <r7=int6464#8,112(<rop=int64#1)
+# asm 2: movdqa <r7=%xmm7,112(<rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(rop + 128) =  r8
+# asm 1: movdqa <r8=int6464#9,128(<rop=int64#1)
+# asm 2: movdqa <r8=%xmm8,128(<rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: carry = r10
+# asm 1: movdqa <r10=int6464#11,>carry=int6464#1
+# asm 2: movdqa <r10=%xmm10,>carry=%xmm0
+movdqa %xmm10,%xmm0
+
+# qhasm: float6464 carry *= VINV_VINV
+# asm 1: mulpd VINV_VINV,<carry=int6464#1
+# asm 2: mulpd VINV_VINV,<carry=%xmm0
+mulpd VINV_VINV,%xmm0
+
+# qhasm: float6464 carry += round
+# asm 1: addpd <round=int6464#4,<carry=int6464#1
+# asm 2: addpd <round=%xmm3,<carry=%xmm0
+addpd %xmm3,%xmm0
+
+# qhasm: float6464 carry -= round
+# asm 1: subpd <round=int6464#4,<carry=int6464#1
+# asm 2: subpd <round=%xmm3,<carry=%xmm0
+subpd %xmm3,%xmm0
+
+# qhasm: float6464 r11 += carry
+# asm 1: addpd <carry=int6464#1,<r11=int6464#12
+# asm 2: addpd <carry=%xmm0,<r11=%xmm11
+addpd %xmm0,%xmm11
+
+# qhasm: float6464 carry *= V_V
+# asm 1: mulpd V_V,<carry=int6464#1
+# asm 2: mulpd V_V,<carry=%xmm0
+mulpd V_V,%xmm0
+
+# qhasm: float6464 r10 -= carry
+# asm 1: subpd <carry=int6464#1,<r10=int6464#11
+# asm 2: subpd <carry=%xmm0,<r10=%xmm10
+subpd %xmm0,%xmm10
+
+# qhasm: *(int128 *)(rop + 160) = r10
+# asm 1: movdqa <r10=int6464#11,160(<rop=int64#1)
+# asm 2: movdqa <r10=%xmm10,160(<rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(rop + 176) = r11
+# asm 1: movdqa <r11=int6464#12,176(<rop=int64#1)
+# asm 2: movdqa <r11=%xmm11,176(<rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 314 - 0
dclxvi-20130329/fp2e_sub.s

@@ -0,0 +1,314 @@
+# File:   dclxvi-20130329/fp2e_sub.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fp2e_sub_qhasm
+.text
+.p2align 5
+.globl _fp2e_sub_qhasm
+.globl fp2e_sub_qhasm
+_fp2e_sub_qhasm:
+fp2e_sub_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: int64 0rop
+
+# qhasm: int64 0op1
+
+# qhasm: int64 0op2
+
+# qhasm: input 0rop
+
+# qhasm: input 0op1
+
+# qhasm: input 0op2
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: int6464 0t0
+
+# qhasm: int6464 0t1
+
+# qhasm: int6464 0t2
+
+# qhasm: int6464 0t3
+
+# qhasm: 0r0  = *(int128 *)(0op1 +   0)
+# asm 1: movdqa 0(<0op1=int64#2),>0r0=int6464#1
+# asm 2: movdqa 0(<0op1=%rsi),>0r0=%xmm0
+movdqa 0(%rsi),%xmm0
+
+# qhasm: 0r1  = *(int128 *)(0op1 +  16)
+# asm 1: movdqa 16(<0op1=int64#2),>0r1=int6464#2
+# asm 2: movdqa 16(<0op1=%rsi),>0r1=%xmm1
+movdqa 16(%rsi),%xmm1
+
+# qhasm: 0r2  = *(int128 *)(0op1 +  32)
+# asm 1: movdqa 32(<0op1=int64#2),>0r2=int6464#3
+# asm 2: movdqa 32(<0op1=%rsi),>0r2=%xmm2
+movdqa 32(%rsi),%xmm2
+
+# qhasm: 0r3  = *(int128 *)(0op1 +  48)
+# asm 1: movdqa 48(<0op1=int64#2),>0r3=int6464#4
+# asm 2: movdqa 48(<0op1=%rsi),>0r3=%xmm3
+movdqa 48(%rsi),%xmm3
+
+# qhasm: 0r4  = *(int128 *)(0op1 +  64)
+# asm 1: movdqa 64(<0op1=int64#2),>0r4=int6464#5
+# asm 2: movdqa 64(<0op1=%rsi),>0r4=%xmm4
+movdqa 64(%rsi),%xmm4
+
+# qhasm: 0r5  = *(int128 *)(0op1 +  80)
+# asm 1: movdqa 80(<0op1=int64#2),>0r5=int6464#6
+# asm 2: movdqa 80(<0op1=%rsi),>0r5=%xmm5
+movdqa 80(%rsi),%xmm5
+
+# qhasm: 0r6  = *(int128 *)(0op1 +  96)
+# asm 1: movdqa 96(<0op1=int64#2),>0r6=int6464#7
+# asm 2: movdqa 96(<0op1=%rsi),>0r6=%xmm6
+movdqa 96(%rsi),%xmm6
+
+# qhasm: 0r7  = *(int128 *)(0op1 + 112)
+# asm 1: movdqa 112(<0op1=int64#2),>0r7=int6464#8
+# asm 2: movdqa 112(<0op1=%rsi),>0r7=%xmm7
+movdqa 112(%rsi),%xmm7
+
+# qhasm: 0r8  = *(int128 *)(0op1 + 128)
+# asm 1: movdqa 128(<0op1=int64#2),>0r8=int6464#9
+# asm 2: movdqa 128(<0op1=%rsi),>0r8=%xmm8
+movdqa 128(%rsi),%xmm8
+
+# qhasm: 0r9  = *(int128 *)(0op1 + 144)
+# asm 1: movdqa 144(<0op1=int64#2),>0r9=int6464#10
+# asm 2: movdqa 144(<0op1=%rsi),>0r9=%xmm9
+movdqa 144(%rsi),%xmm9
+
+# qhasm: 0r10 = *(int128 *)(0op1 + 160)
+# asm 1: movdqa 160(<0op1=int64#2),>0r10=int6464#11
+# asm 2: movdqa 160(<0op1=%rsi),>0r10=%xmm10
+movdqa 160(%rsi),%xmm10
+
+# qhasm: 0r11 = *(int128 *)(0op1 + 176)
+# asm 1: movdqa 176(<0op1=int64#2),>0r11=int6464#12
+# asm 2: movdqa 176(<0op1=%rsi),>0r11=%xmm11
+movdqa 176(%rsi),%xmm11
+
+# qhasm: int6464 1t0
+
+# qhasm: int6464 1t1
+
+# qhasm: int6464 1t2
+
+# qhasm: int6464 1t3
+
+# qhasm: 1t0 = *(int128 *)(0op2 + 0)
+# asm 1: movdqa 0(<0op2=int64#3),>1t0=int6464#13
+# asm 2: movdqa 0(<0op2=%rdx),>1t0=%xmm12
+movdqa 0(%rdx),%xmm12
+
+# qhasm: 1t1 = *(int128 *)(0op2 + 16)
+# asm 1: movdqa 16(<0op2=int64#3),>1t1=int6464#14
+# asm 2: movdqa 16(<0op2=%rdx),>1t1=%xmm13
+movdqa 16(%rdx),%xmm13
+
+# qhasm: 1t2 = *(int128 *)(0op2 + 32)
+# asm 1: movdqa 32(<0op2=int64#3),>1t2=int6464#15
+# asm 2: movdqa 32(<0op2=%rdx),>1t2=%xmm14
+movdqa 32(%rdx),%xmm14
+
+# qhasm: 1t3 = *(int128 *)(0op2 + 48)
+# asm 1: movdqa 48(<0op2=int64#3),>1t3=int6464#16
+# asm 2: movdqa 48(<0op2=%rdx),>1t3=%xmm15
+movdqa 48(%rdx),%xmm15
+
+# qhasm: float6464 0r0 -= 1t0
+# asm 1: subpd <1t0=int6464#13,<0r0=int6464#1
+# asm 2: subpd <1t0=%xmm12,<0r0=%xmm0
+subpd %xmm12,%xmm0
+
+# qhasm: float6464 0r1 -= 1t1
+# asm 1: subpd <1t1=int6464#14,<0r1=int6464#2
+# asm 2: subpd <1t1=%xmm13,<0r1=%xmm1
+subpd %xmm13,%xmm1
+
+# qhasm: float6464 0r2 -= 1t2
+# asm 1: subpd <1t2=int6464#15,<0r2=int6464#3
+# asm 2: subpd <1t2=%xmm14,<0r2=%xmm2
+subpd %xmm14,%xmm2
+
+# qhasm: float6464 0r3 -= 1t3
+# asm 1: subpd <1t3=int6464#16,<0r3=int6464#4
+# asm 2: subpd <1t3=%xmm15,<0r3=%xmm3
+subpd %xmm15,%xmm3
+
+# qhasm: 1t0 = *(int128 *)(0op2 + 64)
+# asm 1: movdqa 64(<0op2=int64#3),>1t0=int6464#13
+# asm 2: movdqa 64(<0op2=%rdx),>1t0=%xmm12
+movdqa 64(%rdx),%xmm12
+
+# qhasm: 1t1 = *(int128 *)(0op2 + 80)
+# asm 1: movdqa 80(<0op2=int64#3),>1t1=int6464#14
+# asm 2: movdqa 80(<0op2=%rdx),>1t1=%xmm13
+movdqa 80(%rdx),%xmm13
+
+# qhasm: 1t2 = *(int128 *)(0op2 + 96)
+# asm 1: movdqa 96(<0op2=int64#3),>1t2=int6464#15
+# asm 2: movdqa 96(<0op2=%rdx),>1t2=%xmm14
+movdqa 96(%rdx),%xmm14
+
+# qhasm: 1t3 = *(int128 *)(0op2 + 112)
+# asm 1: movdqa 112(<0op2=int64#3),>1t3=int6464#16
+# asm 2: movdqa 112(<0op2=%rdx),>1t3=%xmm15
+movdqa 112(%rdx),%xmm15
+
+# qhasm: float6464 0r4 -= 1t0
+# asm 1: subpd <1t0=int6464#13,<0r4=int6464#5
+# asm 2: subpd <1t0=%xmm12,<0r4=%xmm4
+subpd %xmm12,%xmm4
+
+# qhasm: float6464 0r5 -= 1t1
+# asm 1: subpd <1t1=int6464#14,<0r5=int6464#6
+# asm 2: subpd <1t1=%xmm13,<0r5=%xmm5
+subpd %xmm13,%xmm5
+
+# qhasm: float6464 0r6 -= 1t2
+# asm 1: subpd <1t2=int6464#15,<0r6=int6464#7
+# asm 2: subpd <1t2=%xmm14,<0r6=%xmm6
+subpd %xmm14,%xmm6
+
+# qhasm: float6464 0r7 -= 1t3
+# asm 1: subpd <1t3=int6464#16,<0r7=int6464#8
+# asm 2: subpd <1t3=%xmm15,<0r7=%xmm7
+subpd %xmm15,%xmm7
+
+# qhasm: 1t0 = *(int128 *)(0op2 + 128)
+# asm 1: movdqa 128(<0op2=int64#3),>1t0=int6464#13
+# asm 2: movdqa 128(<0op2=%rdx),>1t0=%xmm12
+movdqa 128(%rdx),%xmm12
+
+# qhasm: 1t1 = *(int128 *)(0op2 + 144)
+# asm 1: movdqa 144(<0op2=int64#3),>1t1=int6464#14
+# asm 2: movdqa 144(<0op2=%rdx),>1t1=%xmm13
+movdqa 144(%rdx),%xmm13
+
+# qhasm: 1t2 = *(int128 *)(0op2 + 160)
+# asm 1: movdqa 160(<0op2=int64#3),>1t2=int6464#15
+# asm 2: movdqa 160(<0op2=%rdx),>1t2=%xmm14
+movdqa 160(%rdx),%xmm14
+
+# qhasm: 1t3 = *(int128 *)(0op2 + 176)
+# asm 1: movdqa 176(<0op2=int64#3),>1t3=int6464#16
+# asm 2: movdqa 176(<0op2=%rdx),>1t3=%xmm15
+movdqa 176(%rdx),%xmm15
+
+# qhasm: float6464 0r8 -= 1t0
+# asm 1: subpd <1t0=int6464#13,<0r8=int6464#9
+# asm 2: subpd <1t0=%xmm12,<0r8=%xmm8
+subpd %xmm12,%xmm8
+
+# qhasm: float6464 0r9 -= 1t1
+# asm 1: subpd <1t1=int6464#14,<0r9=int6464#10
+# asm 2: subpd <1t1=%xmm13,<0r9=%xmm9
+subpd %xmm13,%xmm9
+
+# qhasm: float6464 0r10 -= 1t2
+# asm 1: subpd <1t2=int6464#15,<0r10=int6464#11
+# asm 2: subpd <1t2=%xmm14,<0r10=%xmm10
+subpd %xmm14,%xmm10
+
+# qhasm: float6464 0r11 -= 1t3
+# asm 1: subpd <1t3=int6464#16,<0r11=int6464#12
+# asm 2: subpd <1t3=%xmm15,<0r11=%xmm11
+subpd %xmm15,%xmm11
+
+# qhasm: *(int128 *)(0rop +   0) =  0r0
+# asm 1: movdqa <0r0=int6464#1,0(<0rop=int64#1)
+# asm 2: movdqa <0r0=%xmm0,0(<0rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: *(int128 *)(0rop +  16) =  0r1
+# asm 1: movdqa <0r1=int6464#2,16(<0rop=int64#1)
+# asm 2: movdqa <0r1=%xmm1,16(<0rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(0rop +  32) =  0r2
+# asm 1: movdqa <0r2=int6464#3,32(<0rop=int64#1)
+# asm 2: movdqa <0r2=%xmm2,32(<0rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: *(int128 *)(0rop +  48) =  0r3
+# asm 1: movdqa <0r3=int6464#4,48(<0rop=int64#1)
+# asm 2: movdqa <0r3=%xmm3,48(<0rop=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm: *(int128 *)(0rop +  64) =  0r4
+# asm 1: movdqa <0r4=int6464#5,64(<0rop=int64#1)
+# asm 2: movdqa <0r4=%xmm4,64(<0rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(0rop +  80) =  0r5
+# asm 1: movdqa <0r5=int6464#6,80(<0rop=int64#1)
+# asm 2: movdqa <0r5=%xmm5,80(<0rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: *(int128 *)(0rop +  96) =  0r6
+# asm 1: movdqa <0r6=int6464#7,96(<0rop=int64#1)
+# asm 2: movdqa <0r6=%xmm6,96(<0rop=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm: *(int128 *)(0rop + 112) =  0r7
+# asm 1: movdqa <0r7=int6464#8,112(<0rop=int64#1)
+# asm 2: movdqa <0r7=%xmm7,112(<0rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(0rop + 128) =  0r8
+# asm 1: movdqa <0r8=int6464#9,128(<0rop=int64#1)
+# asm 2: movdqa <0r8=%xmm8,128(<0rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: *(int128 *)(0rop + 144) =  0r9
+# asm 1: movdqa <0r9=int6464#10,144(<0rop=int64#1)
+# asm 2: movdqa <0r9=%xmm9,144(<0rop=%rdi)
+movdqa %xmm9,144(%rdi)
+
+# qhasm: *(int128 *)(0rop + 160) = 0r10
+# asm 1: movdqa <0r10=int6464#11,160(<0rop=int64#1)
+# asm 2: movdqa <0r10=%xmm10,160(<0rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(0rop + 176) = 0r11
+# asm 1: movdqa <0r11=int6464#12,176(<0rop=int64#1)
+# asm 2: movdqa <0r11=%xmm11,176(<0rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 310 - 0
dclxvi-20130329/fp2e_sub2.s

@@ -0,0 +1,310 @@
+# File:   dclxvi-20130329/fp2e_sub2.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fp2e_sub2_qhasm
+.text
+.p2align 5
+.globl _fp2e_sub2_qhasm
+.globl fp2e_sub2_qhasm
+_fp2e_sub2_qhasm:
+fp2e_sub2_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: int64 0rop
+
+# qhasm: int64 0op1
+
+# qhasm: input 0rop
+
+# qhasm: input 0op1
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: int6464 0t0
+
+# qhasm: int6464 0t1
+
+# qhasm: int6464 0t2
+
+# qhasm: int6464 0t3
+
+# qhasm: 0r0  = *(int128 *)(0rop +   0)
+# asm 1: movdqa 0(<0rop=int64#1),>0r0=int6464#1
+# asm 2: movdqa 0(<0rop=%rdi),>0r0=%xmm0
+movdqa 0(%rdi),%xmm0
+
+# qhasm: 0r1  = *(int128 *)(0rop +  16)
+# asm 1: movdqa 16(<0rop=int64#1),>0r1=int6464#2
+# asm 2: movdqa 16(<0rop=%rdi),>0r1=%xmm1
+movdqa 16(%rdi),%xmm1
+
+# qhasm: 0r2  = *(int128 *)(0rop +  32)
+# asm 1: movdqa 32(<0rop=int64#1),>0r2=int6464#3
+# asm 2: movdqa 32(<0rop=%rdi),>0r2=%xmm2
+movdqa 32(%rdi),%xmm2
+
+# qhasm: 0r3  = *(int128 *)(0rop +  48)
+# asm 1: movdqa 48(<0rop=int64#1),>0r3=int6464#4
+# asm 2: movdqa 48(<0rop=%rdi),>0r3=%xmm3
+movdqa 48(%rdi),%xmm3
+
+# qhasm: 0r4  = *(int128 *)(0rop +  64)
+# asm 1: movdqa 64(<0rop=int64#1),>0r4=int6464#5
+# asm 2: movdqa 64(<0rop=%rdi),>0r4=%xmm4
+movdqa 64(%rdi),%xmm4
+
+# qhasm: 0r5  = *(int128 *)(0rop +  80)
+# asm 1: movdqa 80(<0rop=int64#1),>0r5=int6464#6
+# asm 2: movdqa 80(<0rop=%rdi),>0r5=%xmm5
+movdqa 80(%rdi),%xmm5
+
+# qhasm: 0r6  = *(int128 *)(0rop +  96)
+# asm 1: movdqa 96(<0rop=int64#1),>0r6=int6464#7
+# asm 2: movdqa 96(<0rop=%rdi),>0r6=%xmm6
+movdqa 96(%rdi),%xmm6
+
+# qhasm: 0r7  = *(int128 *)(0rop + 112)
+# asm 1: movdqa 112(<0rop=int64#1),>0r7=int6464#8
+# asm 2: movdqa 112(<0rop=%rdi),>0r7=%xmm7
+movdqa 112(%rdi),%xmm7
+
+# qhasm: 0r8  = *(int128 *)(0rop + 128)
+# asm 1: movdqa 128(<0rop=int64#1),>0r8=int6464#9
+# asm 2: movdqa 128(<0rop=%rdi),>0r8=%xmm8
+movdqa 128(%rdi),%xmm8
+
+# qhasm: 0r9  = *(int128 *)(0rop + 144)
+# asm 1: movdqa 144(<0rop=int64#1),>0r9=int6464#10
+# asm 2: movdqa 144(<0rop=%rdi),>0r9=%xmm9
+movdqa 144(%rdi),%xmm9
+
+# qhasm: 0r10 = *(int128 *)(0rop + 160)
+# asm 1: movdqa 160(<0rop=int64#1),>0r10=int6464#11
+# asm 2: movdqa 160(<0rop=%rdi),>0r10=%xmm10
+movdqa 160(%rdi),%xmm10
+
+# qhasm: 0r11 = *(int128 *)(0rop + 176)
+# asm 1: movdqa 176(<0rop=int64#1),>0r11=int6464#12
+# asm 2: movdqa 176(<0rop=%rdi),>0r11=%xmm11
+movdqa 176(%rdi),%xmm11
+
+# qhasm: int6464 1t0
+
+# qhasm: int6464 1t1
+
+# qhasm: int6464 1t2
+
+# qhasm: int6464 1t3
+
+# qhasm: 1t0 = *(int128 *)(0op1 + 0)
+# asm 1: movdqa 0(<0op1=int64#2),>1t0=int6464#13
+# asm 2: movdqa 0(<0op1=%rsi),>1t0=%xmm12
+movdqa 0(%rsi),%xmm12
+
+# qhasm: 1t1 = *(int128 *)(0op1 + 16)
+# asm 1: movdqa 16(<0op1=int64#2),>1t1=int6464#14
+# asm 2: movdqa 16(<0op1=%rsi),>1t1=%xmm13
+movdqa 16(%rsi),%xmm13
+
+# qhasm: 1t2 = *(int128 *)(0op1 + 32)
+# asm 1: movdqa 32(<0op1=int64#2),>1t2=int6464#15
+# asm 2: movdqa 32(<0op1=%rsi),>1t2=%xmm14
+movdqa 32(%rsi),%xmm14
+
+# qhasm: 1t3 = *(int128 *)(0op1 + 48)
+# asm 1: movdqa 48(<0op1=int64#2),>1t3=int6464#16
+# asm 2: movdqa 48(<0op1=%rsi),>1t3=%xmm15
+movdqa 48(%rsi),%xmm15
+
+# qhasm: float6464 0r0 -= 1t0
+# asm 1: subpd <1t0=int6464#13,<0r0=int6464#1
+# asm 2: subpd <1t0=%xmm12,<0r0=%xmm0
+subpd %xmm12,%xmm0
+
+# qhasm: float6464 0r1 -= 1t1
+# asm 1: subpd <1t1=int6464#14,<0r1=int6464#2
+# asm 2: subpd <1t1=%xmm13,<0r1=%xmm1
+subpd %xmm13,%xmm1
+
+# qhasm: float6464 0r2 -= 1t2
+# asm 1: subpd <1t2=int6464#15,<0r2=int6464#3
+# asm 2: subpd <1t2=%xmm14,<0r2=%xmm2
+subpd %xmm14,%xmm2
+
+# qhasm: float6464 0r3 -= 1t3
+# asm 1: subpd <1t3=int6464#16,<0r3=int6464#4
+# asm 2: subpd <1t3=%xmm15,<0r3=%xmm3
+subpd %xmm15,%xmm3
+
+# qhasm: 1t0 = *(int128 *)(0op1 + 64)
+# asm 1: movdqa 64(<0op1=int64#2),>1t0=int6464#13
+# asm 2: movdqa 64(<0op1=%rsi),>1t0=%xmm12
+movdqa 64(%rsi),%xmm12
+
+# qhasm: 1t1 = *(int128 *)(0op1 + 80)
+# asm 1: movdqa 80(<0op1=int64#2),>1t1=int6464#14
+# asm 2: movdqa 80(<0op1=%rsi),>1t1=%xmm13
+movdqa 80(%rsi),%xmm13
+
+# qhasm: 1t2 = *(int128 *)(0op1 + 96)
+# asm 1: movdqa 96(<0op1=int64#2),>1t2=int6464#15
+# asm 2: movdqa 96(<0op1=%rsi),>1t2=%xmm14
+movdqa 96(%rsi),%xmm14
+
+# qhasm: 1t3 = *(int128 *)(0op1 + 112)
+# asm 1: movdqa 112(<0op1=int64#2),>1t3=int6464#16
+# asm 2: movdqa 112(<0op1=%rsi),>1t3=%xmm15
+movdqa 112(%rsi),%xmm15
+
+# qhasm: float6464 0r4 -= 1t0
+# asm 1: subpd <1t0=int6464#13,<0r4=int6464#5
+# asm 2: subpd <1t0=%xmm12,<0r4=%xmm4
+subpd %xmm12,%xmm4
+
+# qhasm: float6464 0r5 -= 1t1
+# asm 1: subpd <1t1=int6464#14,<0r5=int6464#6
+# asm 2: subpd <1t1=%xmm13,<0r5=%xmm5
+subpd %xmm13,%xmm5
+
+# qhasm: float6464 0r6 -= 1t2
+# asm 1: subpd <1t2=int6464#15,<0r6=int6464#7
+# asm 2: subpd <1t2=%xmm14,<0r6=%xmm6
+subpd %xmm14,%xmm6
+
+# qhasm: float6464 0r7 -= 1t3
+# asm 1: subpd <1t3=int6464#16,<0r7=int6464#8
+# asm 2: subpd <1t3=%xmm15,<0r7=%xmm7
+subpd %xmm15,%xmm7
+
+# qhasm: 1t0 = *(int128 *)(0op1 + 128)
+# asm 1: movdqa 128(<0op1=int64#2),>1t0=int6464#13
+# asm 2: movdqa 128(<0op1=%rsi),>1t0=%xmm12
+movdqa 128(%rsi),%xmm12
+
+# qhasm: 1t1 = *(int128 *)(0op1 + 144)
+# asm 1: movdqa 144(<0op1=int64#2),>1t1=int6464#14
+# asm 2: movdqa 144(<0op1=%rsi),>1t1=%xmm13
+movdqa 144(%rsi),%xmm13
+
+# qhasm: 1t2 = *(int128 *)(0op1 + 160)
+# asm 1: movdqa 160(<0op1=int64#2),>1t2=int6464#15
+# asm 2: movdqa 160(<0op1=%rsi),>1t2=%xmm14
+movdqa 160(%rsi),%xmm14
+
+# qhasm: 1t3 = *(int128 *)(0op1 + 176)
+# asm 1: movdqa 176(<0op1=int64#2),>1t3=int6464#16
+# asm 2: movdqa 176(<0op1=%rsi),>1t3=%xmm15
+movdqa 176(%rsi),%xmm15
+
+# qhasm: float6464 0r8 -= 1t0
+# asm 1: subpd <1t0=int6464#13,<0r8=int6464#9
+# asm 2: subpd <1t0=%xmm12,<0r8=%xmm8
+subpd %xmm12,%xmm8
+
+# qhasm: float6464 0r9 -= 1t1
+# asm 1: subpd <1t1=int6464#14,<0r9=int6464#10
+# asm 2: subpd <1t1=%xmm13,<0r9=%xmm9
+subpd %xmm13,%xmm9
+
+# qhasm: float6464 0r10 -= 1t2
+# asm 1: subpd <1t2=int6464#15,<0r10=int6464#11
+# asm 2: subpd <1t2=%xmm14,<0r10=%xmm10
+subpd %xmm14,%xmm10
+
+# qhasm: float6464 0r11 -= 1t3
+# asm 1: subpd <1t3=int6464#16,<0r11=int6464#12
+# asm 2: subpd <1t3=%xmm15,<0r11=%xmm11
+subpd %xmm15,%xmm11
+
+# qhasm: *(int128 *)(0rop +   0) =  0r0
+# asm 1: movdqa <0r0=int6464#1,0(<0rop=int64#1)
+# asm 2: movdqa <0r0=%xmm0,0(<0rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: *(int128 *)(0rop +  16) =  0r1
+# asm 1: movdqa <0r1=int6464#2,16(<0rop=int64#1)
+# asm 2: movdqa <0r1=%xmm1,16(<0rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(0rop +  32) =  0r2
+# asm 1: movdqa <0r2=int6464#3,32(<0rop=int64#1)
+# asm 2: movdqa <0r2=%xmm2,32(<0rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: *(int128 *)(0rop +  48) =  0r3
+# asm 1: movdqa <0r3=int6464#4,48(<0rop=int64#1)
+# asm 2: movdqa <0r3=%xmm3,48(<0rop=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm: *(int128 *)(0rop +  64) =  0r4
+# asm 1: movdqa <0r4=int6464#5,64(<0rop=int64#1)
+# asm 2: movdqa <0r4=%xmm4,64(<0rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(0rop +  80) =  0r5
+# asm 1: movdqa <0r5=int6464#6,80(<0rop=int64#1)
+# asm 2: movdqa <0r5=%xmm5,80(<0rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: *(int128 *)(0rop +  96) =  0r6
+# asm 1: movdqa <0r6=int6464#7,96(<0rop=int64#1)
+# asm 2: movdqa <0r6=%xmm6,96(<0rop=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm: *(int128 *)(0rop + 112) =  0r7
+# asm 1: movdqa <0r7=int6464#8,112(<0rop=int64#1)
+# asm 2: movdqa <0r7=%xmm7,112(<0rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(0rop + 128) =  0r8
+# asm 1: movdqa <0r8=int6464#9,128(<0rop=int64#1)
+# asm 2: movdqa <0r8=%xmm8,128(<0rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: *(int128 *)(0rop + 144) =  0r9
+# asm 1: movdqa <0r9=int6464#10,144(<0rop=int64#1)
+# asm 2: movdqa <0r9=%xmm9,144(<0rop=%rdi)
+movdqa %xmm9,144(%rdi)
+
+# qhasm: *(int128 *)(0rop + 160) = 0r10
+# asm 1: movdqa <0r10=int6464#11,160(<0rop=int64#1)
+# asm 2: movdqa <0r10=%xmm10,160(<0rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(0rop + 176) = 0r11
+# asm 1: movdqa <0r11=int6464#12,176(<0rop=int64#1)
+# asm 2: movdqa <0r11=%xmm11,176(<0rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 249 - 0
dclxvi-20130329/fp2e_triple.s

@@ -0,0 +1,249 @@
+# File:   dclxvi-20130329/fp2e_triple.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fp2e_triple_qhasm
+.text
+.p2align 5
+.globl _fp2e_triple_qhasm
+.globl fp2e_triple_qhasm
+_fp2e_triple_qhasm:
+fp2e_triple_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: int64 0rop
+
+# qhasm: int64 0op
+
+# qhasm: input 0rop
+
+# qhasm: input 0op
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: int6464 0t0
+
+# qhasm: int6464 0t1
+
+# qhasm: int6464 0t2
+
+# qhasm: int6464 0t3
+
+# qhasm: 0r0  = *(int128 *)(0op +   0)
+# asm 1: movdqa 0(<0op=int64#2),>0r0=int6464#1
+# asm 2: movdqa 0(<0op=%rsi),>0r0=%xmm0
+movdqa 0(%rsi),%xmm0
+
+# qhasm: 0r1  = *(int128 *)(0op +  16)
+# asm 1: movdqa 16(<0op=int64#2),>0r1=int6464#2
+# asm 2: movdqa 16(<0op=%rsi),>0r1=%xmm1
+movdqa 16(%rsi),%xmm1
+
+# qhasm: 0r2  = *(int128 *)(0op +  32)
+# asm 1: movdqa 32(<0op=int64#2),>0r2=int6464#3
+# asm 2: movdqa 32(<0op=%rsi),>0r2=%xmm2
+movdqa 32(%rsi),%xmm2
+
+# qhasm: 0r3  = *(int128 *)(0op +  48)
+# asm 1: movdqa 48(<0op=int64#2),>0r3=int6464#4
+# asm 2: movdqa 48(<0op=%rsi),>0r3=%xmm3
+movdqa 48(%rsi),%xmm3
+
+# qhasm: 0r4  = *(int128 *)(0op +  64)
+# asm 1: movdqa 64(<0op=int64#2),>0r4=int6464#5
+# asm 2: movdqa 64(<0op=%rsi),>0r4=%xmm4
+movdqa 64(%rsi),%xmm4
+
+# qhasm: 0r5  = *(int128 *)(0op +  80)
+# asm 1: movdqa 80(<0op=int64#2),>0r5=int6464#6
+# asm 2: movdqa 80(<0op=%rsi),>0r5=%xmm5
+movdqa 80(%rsi),%xmm5
+
+# qhasm: 0r6  = *(int128 *)(0op +  96)
+# asm 1: movdqa 96(<0op=int64#2),>0r6=int6464#7
+# asm 2: movdqa 96(<0op=%rsi),>0r6=%xmm6
+movdqa 96(%rsi),%xmm6
+
+# qhasm: 0r7  = *(int128 *)(0op + 112)
+# asm 1: movdqa 112(<0op=int64#2),>0r7=int6464#8
+# asm 2: movdqa 112(<0op=%rsi),>0r7=%xmm7
+movdqa 112(%rsi),%xmm7
+
+# qhasm: 0r8  = *(int128 *)(0op + 128)
+# asm 1: movdqa 128(<0op=int64#2),>0r8=int6464#9
+# asm 2: movdqa 128(<0op=%rsi),>0r8=%xmm8
+movdqa 128(%rsi),%xmm8
+
+# qhasm: 0r9  = *(int128 *)(0op + 144)
+# asm 1: movdqa 144(<0op=int64#2),>0r9=int6464#10
+# asm 2: movdqa 144(<0op=%rsi),>0r9=%xmm9
+movdqa 144(%rsi),%xmm9
+
+# qhasm: 0r10 = *(int128 *)(0op + 160)
+# asm 1: movdqa 160(<0op=int64#2),>0r10=int6464#11
+# asm 2: movdqa 160(<0op=%rsi),>0r10=%xmm10
+movdqa 160(%rsi),%xmm10
+
+# qhasm: 0r11 = *(int128 *)(0op + 176)
+# asm 1: movdqa 176(<0op=int64#2),>0r11=int6464#12
+# asm 2: movdqa 176(<0op=%rsi),>0r11=%xmm11
+movdqa 176(%rsi),%xmm11
+
+# qhasm: int6464 1t0
+
+# qhasm: 1t0 = THREE_THREE
+# asm 1: movdqa THREE_THREE,<1t0=int6464#13
+# asm 2: movdqa THREE_THREE,<1t0=%xmm12
+movdqa THREE_THREE,%xmm12
+
+# qhasm: float6464 0r0  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r0=int6464#1
+# asm 2: mulpd <1t0=%xmm12,<0r0=%xmm0
+mulpd %xmm12,%xmm0
+
+# qhasm: float6464 0r1  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r1=int6464#2
+# asm 2: mulpd <1t0=%xmm12,<0r1=%xmm1
+mulpd %xmm12,%xmm1
+
+# qhasm: float6464 0r2  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r2=int6464#3
+# asm 2: mulpd <1t0=%xmm12,<0r2=%xmm2
+mulpd %xmm12,%xmm2
+
+# qhasm: float6464 0r3  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r3=int6464#4
+# asm 2: mulpd <1t0=%xmm12,<0r3=%xmm3
+mulpd %xmm12,%xmm3
+
+# qhasm: float6464 0r4  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r4=int6464#5
+# asm 2: mulpd <1t0=%xmm12,<0r4=%xmm4
+mulpd %xmm12,%xmm4
+
+# qhasm: float6464 0r5  *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r5=int6464#6
+# asm 2: mulpd <1t0=%xmm12,<0r5=%xmm5
+mulpd %xmm12,%xmm5
+
+# qhasm: float6464 0r6  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r6=int6464#7
+# asm 2: mulpd <1t0=%xmm12,<0r6=%xmm6
+mulpd %xmm12,%xmm6
+
+# qhasm: float6464 0r7  *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r7=int6464#8
+# asm 2: mulpd <1t0=%xmm12,<0r7=%xmm7
+mulpd %xmm12,%xmm7
+
+# qhasm: float6464 0r8  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r8=int6464#9
+# asm 2: mulpd <1t0=%xmm12,<0r8=%xmm8
+mulpd %xmm12,%xmm8
+
+# qhasm: float6464 0r9  *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r9=int6464#10
+# asm 2: mulpd <1t0=%xmm12,<0r9=%xmm9
+mulpd %xmm12,%xmm9
+
+# qhasm: float6464 0r10 *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r10=int6464#11
+# asm 2: mulpd <1t0=%xmm12,<0r10=%xmm10
+mulpd %xmm12,%xmm10
+
+# qhasm: float6464 0r11 *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r11=int6464#12
+# asm 2: mulpd <1t0=%xmm12,<0r11=%xmm11
+mulpd %xmm12,%xmm11
+
+# qhasm: *(int128 *)(0rop +   0) =  0r0
+# asm 1: movdqa <0r0=int6464#1,0(<0rop=int64#1)
+# asm 2: movdqa <0r0=%xmm0,0(<0rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: *(int128 *)(0rop +  16) =  0r1
+# asm 1: movdqa <0r1=int6464#2,16(<0rop=int64#1)
+# asm 2: movdqa <0r1=%xmm1,16(<0rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(0rop +  32) =  0r2
+# asm 1: movdqa <0r2=int6464#3,32(<0rop=int64#1)
+# asm 2: movdqa <0r2=%xmm2,32(<0rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: *(int128 *)(0rop +  48) =  0r3
+# asm 1: movdqa <0r3=int6464#4,48(<0rop=int64#1)
+# asm 2: movdqa <0r3=%xmm3,48(<0rop=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm: *(int128 *)(0rop +  64) =  0r4
+# asm 1: movdqa <0r4=int6464#5,64(<0rop=int64#1)
+# asm 2: movdqa <0r4=%xmm4,64(<0rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(0rop +  80) =  0r5
+# asm 1: movdqa <0r5=int6464#6,80(<0rop=int64#1)
+# asm 2: movdqa <0r5=%xmm5,80(<0rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: *(int128 *)(0rop +  96) =  0r6
+# asm 1: movdqa <0r6=int6464#7,96(<0rop=int64#1)
+# asm 2: movdqa <0r6=%xmm6,96(<0rop=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm: *(int128 *)(0rop + 112) =  0r7
+# asm 1: movdqa <0r7=int6464#8,112(<0rop=int64#1)
+# asm 2: movdqa <0r7=%xmm7,112(<0rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(0rop + 128) =  0r8
+# asm 1: movdqa <0r8=int6464#9,128(<0rop=int64#1)
+# asm 2: movdqa <0r8=%xmm8,128(<0rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: *(int128 *)(0rop + 144) =  0r9
+# asm 1: movdqa <0r9=int6464#10,144(<0rop=int64#1)
+# asm 2: movdqa <0r9=%xmm9,144(<0rop=%rdi)
+movdqa %xmm9,144(%rdi)
+
+# qhasm: *(int128 *)(0rop + 160) = 0r10
+# asm 1: movdqa <0r10=int6464#11,160(<0rop=int64#1)
+# asm 2: movdqa <0r10=%xmm10,160(<0rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(0rop + 176) = 0r11
+# asm 1: movdqa <0r11=int6464#12,176(<0rop=int64#1)
+# asm 2: movdqa <0r11=%xmm11,176(<0rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 245 - 0
dclxvi-20130329/fp2e_triple2.s

@@ -0,0 +1,245 @@
+# File:   dclxvi-20130329/fp2e_triple2.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fp2e_triple2_qhasm
+.text
+.p2align 5
+.globl _fp2e_triple2_qhasm
+.globl fp2e_triple2_qhasm
+_fp2e_triple2_qhasm:
+fp2e_triple2_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: int64 rop
+
+# qhasm: input rop
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: int6464 0t0
+
+# qhasm: int6464 0t1
+
+# qhasm: int6464 0t2
+
+# qhasm: int6464 0t3
+
+# qhasm: 0r0  = *(int128 *)(rop +   0)
+# asm 1: movdqa 0(<rop=int64#1),>0r0=int6464#1
+# asm 2: movdqa 0(<rop=%rdi),>0r0=%xmm0
+movdqa 0(%rdi),%xmm0
+
+# qhasm: 0r1  = *(int128 *)(rop +  16)
+# asm 1: movdqa 16(<rop=int64#1),>0r1=int6464#2
+# asm 2: movdqa 16(<rop=%rdi),>0r1=%xmm1
+movdqa 16(%rdi),%xmm1
+
+# qhasm: 0r2  = *(int128 *)(rop +  32)
+# asm 1: movdqa 32(<rop=int64#1),>0r2=int6464#3
+# asm 2: movdqa 32(<rop=%rdi),>0r2=%xmm2
+movdqa 32(%rdi),%xmm2
+
+# qhasm: 0r3  = *(int128 *)(rop +  48)
+# asm 1: movdqa 48(<rop=int64#1),>0r3=int6464#4
+# asm 2: movdqa 48(<rop=%rdi),>0r3=%xmm3
+movdqa 48(%rdi),%xmm3
+
+# qhasm: 0r4  = *(int128 *)(rop +  64)
+# asm 1: movdqa 64(<rop=int64#1),>0r4=int6464#5
+# asm 2: movdqa 64(<rop=%rdi),>0r4=%xmm4
+movdqa 64(%rdi),%xmm4
+
+# qhasm: 0r5  = *(int128 *)(rop +  80)
+# asm 1: movdqa 80(<rop=int64#1),>0r5=int6464#6
+# asm 2: movdqa 80(<rop=%rdi),>0r5=%xmm5
+movdqa 80(%rdi),%xmm5
+
+# qhasm: 0r6  = *(int128 *)(rop +  96)
+# asm 1: movdqa 96(<rop=int64#1),>0r6=int6464#7
+# asm 2: movdqa 96(<rop=%rdi),>0r6=%xmm6
+movdqa 96(%rdi),%xmm6
+
+# qhasm: 0r7  = *(int128 *)(rop + 112)
+# asm 1: movdqa 112(<rop=int64#1),>0r7=int6464#8
+# asm 2: movdqa 112(<rop=%rdi),>0r7=%xmm7
+movdqa 112(%rdi),%xmm7
+
+# qhasm: 0r8  = *(int128 *)(rop + 128)
+# asm 1: movdqa 128(<rop=int64#1),>0r8=int6464#9
+# asm 2: movdqa 128(<rop=%rdi),>0r8=%xmm8
+movdqa 128(%rdi),%xmm8
+
+# qhasm: 0r9  = *(int128 *)(rop + 144)
+# asm 1: movdqa 144(<rop=int64#1),>0r9=int6464#10
+# asm 2: movdqa 144(<rop=%rdi),>0r9=%xmm9
+movdqa 144(%rdi),%xmm9
+
+# qhasm: 0r10 = *(int128 *)(rop + 160)
+# asm 1: movdqa 160(<rop=int64#1),>0r10=int6464#11
+# asm 2: movdqa 160(<rop=%rdi),>0r10=%xmm10
+movdqa 160(%rdi),%xmm10
+
+# qhasm: 0r11 = *(int128 *)(rop + 176)
+# asm 1: movdqa 176(<rop=int64#1),>0r11=int6464#12
+# asm 2: movdqa 176(<rop=%rdi),>0r11=%xmm11
+movdqa 176(%rdi),%xmm11
+
+# qhasm: int6464 1t0
+
+# qhasm: 1t0 = THREE_THREE
+# asm 1: movdqa THREE_THREE,<1t0=int6464#13
+# asm 2: movdqa THREE_THREE,<1t0=%xmm12
+movdqa THREE_THREE,%xmm12
+
+# qhasm: float6464 0r0  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r0=int6464#1
+# asm 2: mulpd <1t0=%xmm12,<0r0=%xmm0
+mulpd %xmm12,%xmm0
+
+# qhasm: float6464 0r1  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r1=int6464#2
+# asm 2: mulpd <1t0=%xmm12,<0r1=%xmm1
+mulpd %xmm12,%xmm1
+
+# qhasm: float6464 0r2  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r2=int6464#3
+# asm 2: mulpd <1t0=%xmm12,<0r2=%xmm2
+mulpd %xmm12,%xmm2
+
+# qhasm: float6464 0r3  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r3=int6464#4
+# asm 2: mulpd <1t0=%xmm12,<0r3=%xmm3
+mulpd %xmm12,%xmm3
+
+# qhasm: float6464 0r4  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r4=int6464#5
+# asm 2: mulpd <1t0=%xmm12,<0r4=%xmm4
+mulpd %xmm12,%xmm4
+
+# qhasm: float6464 0r5  *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r5=int6464#6
+# asm 2: mulpd <1t0=%xmm12,<0r5=%xmm5
+mulpd %xmm12,%xmm5
+
+# qhasm: float6464 0r6  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r6=int6464#7
+# asm 2: mulpd <1t0=%xmm12,<0r6=%xmm6
+mulpd %xmm12,%xmm6
+
+# qhasm: float6464 0r7  *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r7=int6464#8
+# asm 2: mulpd <1t0=%xmm12,<0r7=%xmm7
+mulpd %xmm12,%xmm7
+
+# qhasm: float6464 0r8  *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r8=int6464#9
+# asm 2: mulpd <1t0=%xmm12,<0r8=%xmm8
+mulpd %xmm12,%xmm8
+
+# qhasm: float6464 0r9  *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r9=int6464#10
+# asm 2: mulpd <1t0=%xmm12,<0r9=%xmm9
+mulpd %xmm12,%xmm9
+
+# qhasm: float6464 0r10 *= 1t0
+# asm 1: mulpd <1t0=int6464#13,<0r10=int6464#11
+# asm 2: mulpd <1t0=%xmm12,<0r10=%xmm10
+mulpd %xmm12,%xmm10
+
+# qhasm: float6464 0r11 *= 1t0 
+# asm 1: mulpd <1t0=int6464#13,<0r11=int6464#12
+# asm 2: mulpd <1t0=%xmm12,<0r11=%xmm11
+mulpd %xmm12,%xmm11
+
+# qhasm: *(int128 *)(rop +   0) =  0r0
+# asm 1: movdqa <0r0=int6464#1,0(<rop=int64#1)
+# asm 2: movdqa <0r0=%xmm0,0(<rop=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm: *(int128 *)(rop +  16) =  0r1
+# asm 1: movdqa <0r1=int6464#2,16(<rop=int64#1)
+# asm 2: movdqa <0r1=%xmm1,16(<rop=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm: *(int128 *)(rop +  32) =  0r2
+# asm 1: movdqa <0r2=int6464#3,32(<rop=int64#1)
+# asm 2: movdqa <0r2=%xmm2,32(<rop=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm: *(int128 *)(rop +  48) =  0r3
+# asm 1: movdqa <0r3=int6464#4,48(<rop=int64#1)
+# asm 2: movdqa <0r3=%xmm3,48(<rop=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm: *(int128 *)(rop +  64) =  0r4
+# asm 1: movdqa <0r4=int6464#5,64(<rop=int64#1)
+# asm 2: movdqa <0r4=%xmm4,64(<rop=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm: *(int128 *)(rop +  80) =  0r5
+# asm 1: movdqa <0r5=int6464#6,80(<rop=int64#1)
+# asm 2: movdqa <0r5=%xmm5,80(<rop=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm: *(int128 *)(rop +  96) =  0r6
+# asm 1: movdqa <0r6=int6464#7,96(<rop=int64#1)
+# asm 2: movdqa <0r6=%xmm6,96(<rop=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm: *(int128 *)(rop + 112) =  0r7
+# asm 1: movdqa <0r7=int6464#8,112(<rop=int64#1)
+# asm 2: movdqa <0r7=%xmm7,112(<rop=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm: *(int128 *)(rop + 128) =  0r8
+# asm 1: movdqa <0r8=int6464#9,128(<rop=int64#1)
+# asm 2: movdqa <0r8=%xmm8,128(<rop=%rdi)
+movdqa %xmm8,128(%rdi)
+
+# qhasm: *(int128 *)(rop + 144) =  0r9
+# asm 1: movdqa <0r9=int6464#10,144(<rop=int64#1)
+# asm 2: movdqa <0r9=%xmm9,144(<rop=%rdi)
+movdqa %xmm9,144(%rdi)
+
+# qhasm: *(int128 *)(rop + 160) = 0r10
+# asm 1: movdqa <0r10=int6464#11,160(<rop=int64#1)
+# asm 2: movdqa <0r10=%xmm10,160(<rop=%rdi)
+movdqa %xmm10,160(%rdi)
+
+# qhasm: *(int128 *)(rop + 176) = 0r11
+# asm 1: movdqa <0r11=int6464#12,176(<rop=int64#1)
+# asm 2: movdqa <0r11=%xmm11,176(<rop=%rdi)
+movdqa %xmm11,176(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 335 - 0
dclxvi-20130329/fp6e.c

@@ -0,0 +1,335 @@
+/*
+ * File:   dclxvi-20130329/fp6e.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include <assert.h>
+
+//AVANT
+//#include "fp6e.h"
+
+
+//APRES
+
+#include "mul.h"
+extern "C" {	
+#include "fpe.h"
+#include "fp2e.h"
+#include "fp6e.h"
+} 
+
+
+extern const fp2e_t bn_ypminus1;
+extern const fp2e_t bn_ypminus1_squ;
+extern const fpe_t bn_zeta;
+extern const fpe_t bn_zeta2;
+
+
+void fp6e_short_coeffred(fp6e_t rop)
+{
+  fp2e_short_coeffred(rop->m_a);
+  fp2e_short_coeffred(rop->m_b);
+  fp2e_short_coeffred(rop->m_c);
+}
+
+// Set fp6e_t rop to given value:
+void fp6e_set(fp6e_t rop, const fp6e_t op)
+{
+	fp2e_set(rop->m_a, op->m_a);
+	fp2e_set(rop->m_b, op->m_b);
+	fp2e_set(rop->m_c, op->m_c);
+}
+
+// Initialize an fp6e, set to value given in three fp2es
+void fp6e_set_fp2e(fp6e_t rop, const fp2e_t a, const fp2e_t b, const fp2e_t c)
+{
+	fp2e_set(rop->m_a, a);
+	fp2e_set(rop->m_b, b);
+	fp2e_set(rop->m_c, c);
+}
+
+// Set rop to one:
+void fp6e_setone(fp6e_t rop)
+{
+	fp2e_setzero(rop->m_a);
+	fp2e_setzero(rop->m_b);
+	fp2e_setone(rop->m_c);
+}
+
+// Set rop to zero:
+void fp6e_setzero(fp6e_t rop)
+{
+	fp2e_setzero(rop->m_a);
+	fp2e_setzero(rop->m_b);
+	fp2e_setzero(rop->m_c);
+}
+
+// Compare for equality:
+int fp6e_iseq(const fp6e_t op1, const fp6e_t op2)
+{
+  int ret = fp2e_iseq(op1->m_a, op2->m_a);
+  ret = ret && fp2e_iseq(op1->m_b, op2->m_b);
+  ret = ret && fp2e_iseq(op1->m_c, op2->m_c);
+  return ret;
+}
+
+int fp6e_isone(const fp6e_t op)
+{
+  int ret = fp2e_iszero(op->m_a);
+  ret = ret && fp2e_iszero(op->m_b);
+  ret = ret && fp2e_isone(op->m_c);
+  return ret;
+}
+
+int fp6e_iszero(const fp6e_t op)
+{
+  int ret = fp2e_iszero(op->m_a);
+  ret = ret && fp2e_iszero(op->m_b);
+  ret = ret && fp2e_iszero(op->m_c);
+  return ret;
+}
+
+void fp6e_cmov(fp6e_t rop, const fp6e_t op, int c)
+{
+  fp2e_cmov(rop->m_a, op->m_a, c);
+  fp2e_cmov(rop->m_b, op->m_b, c);
+  fp2e_cmov(rop->m_c, op->m_c, c);
+}
+
+// Add two fp6e, store result in rop:
+void fp6e_add(fp6e_t rop, const fp6e_t op1, const fp6e_t op2)
+{
+	fp2e_add(rop->m_a, op1->m_a, op2->m_a);
+	fp2e_add(rop->m_b, op1->m_b, op2->m_b);
+	fp2e_add(rop->m_c, op1->m_c, op2->m_c);
+}
+
+// Subtract op2 from op1, store result in rop:
+void fp6e_sub(fp6e_t rop, const fp6e_t op1, const fp6e_t op2)
+{
+	fp2e_sub(rop->m_a, op1->m_a, op2->m_a);
+	fp2e_sub(rop->m_b, op1->m_b, op2->m_b);
+	fp2e_sub(rop->m_c, op1->m_c, op2->m_c);
+}
+
+// Subtract op2 from op1, store result in rop:
+void fp6e_neg(fp6e_t rop, const fp6e_t op)
+{
+	fp2e_neg(rop->m_a, op->m_a);
+	fp2e_neg(rop->m_b, op->m_b);
+	fp2e_neg(rop->m_c, op->m_c);
+}
+
+// Multiply two fp6e, store result in rop:
+void fp6e_mul(fp6e_t rop, const fp6e_t op1, const fp6e_t op2)
+{
+	fp2e_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; // Needed for intermediary values
+
+	// See "Multiplication and Squaring in Pairing-Friendly Fields", section 4, Karatsuba method
+	fp2e_mul(tmp3, op1->m_a, op2->m_a);
+	fp2e_mul(tmp2, op1->m_b, op2->m_b);
+	fp2e_mul(tmp1, op1->m_c, op2->m_c);
+
+	fp2e_add(tmp4, op1->m_a, op1->m_b);
+  //fp2e_short_coeffred(tmp4);
+	fp2e_add(tmp5, op2->m_a, op2->m_b);
+  //fp2e_short_coeffred(tmp5);
+	fp2e_mul(tmp6, tmp4, tmp5); 
+	fp2e_sub2(tmp6, tmp2);
+  //fp2e_short_coeffred(tmp6);
+	fp2e_sub2(tmp6, tmp3);
+  //fp2e_short_coeffred(tmp6);
+	fp2e_mulxi(tmp6, tmp6);
+	fp2e_add2(tmp6, tmp1);
+
+	fp2e_add(tmp4, op1->m_b, op1->m_c);
+  //fp2e_short_coeffred(tmp4);
+	fp2e_add(tmp5, op2->m_b, op2->m_c);
+  //fp2e_short_coeffred(tmp5);
+	fp2e_mul(rop->m_b, tmp4, tmp5);
+	fp2e_sub2(rop->m_b, tmp1);
+	fp2e_sub2(rop->m_b, tmp2);
+  //fp2e_short_coeffred(rop->m_b);
+	fp2e_mulxi(tmp4, tmp3);
+	fp2e_add2(rop->m_b, tmp4);
+  fp2e_short_coeffred(rop->m_b);
+
+	fp2e_add(tmp4, op1->m_a, op1->m_c);
+  //fp2e_short_coeffred(tmp4);
+	fp2e_add(tmp5, op2->m_a, op2->m_c);
+  //fp2e_short_coeffred(tmp5);
+
+	fp2e_set(rop->m_c, tmp6);
+  fp2e_short_coeffred(rop->m_c);
+
+	fp2e_mul(rop->m_a, tmp4, tmp5);
+	fp2e_sub2(rop->m_a, tmp1);
+	fp2e_add2(rop->m_a, tmp2);
+	fp2e_sub2(rop->m_a, tmp3);
+  fp2e_short_coeffred(rop->m_a);
+}
+
+// Compute the double of the Square of an fp6e, store result in rop, uses Chung-Hasan (CH-SQR3x in pairing-friendly fields)
+void fp6e_squaredouble(fp6e_t rop, const fp6e_t op)
+{
+	//fp6e_mul(rop, op, op); //XXX make faster!
+  fp2e_t s0, s1, s2, s3, s4, t;
+
+  fp2e_square(s0,op->m_c);
+
+  fp2e_add(t, op->m_a, op->m_c);
+
+  fp2e_add(s1, t, op->m_b);
+  fp2e_short_coeffred(s1);
+  fp2e_square(s1, s1);
+
+  fp2e_sub(s2, t, op->m_b);
+  fp2e_short_coeffred(s2);
+  fp2e_square(s2, s2);
+
+  fp2e_mul(s3, op->m_a, op->m_b);
+  fp2e_double(s3, s3);
+
+  fp2e_square(s4, op->m_a);
+
+  fp2e_mulxi(rop->m_c, s3);
+  fp2e_add(rop->m_c, rop->m_c, s0);
+  fp2e_double(rop->m_c, rop->m_c);
+  fp2e_short_coeffred(rop->m_c);
+
+  fp2e_mulxi(rop->m_b, s4);
+  fp2e_sub(rop->m_b, s3, rop->m_b);
+  fp2e_double(rop->m_b, rop->m_b);
+  fp2e_sub(rop->m_b, s1, rop->m_b);
+  fp2e_sub(rop->m_b, rop->m_b, s2);
+  fp2e_short_coeffred(rop->m_b);
+
+  fp2e_add(rop->m_a, s0, s4);
+  fp2e_double(rop->m_a, rop->m_a);
+  fp2e_sub(rop->m_a, s1, rop->m_a);
+  fp2e_add(rop->m_a, rop->m_a, s2);
+  fp2e_short_coeffred(rop->m_a);
+}
+
+// Multiply with tau:
+void fp6e_multau(fp6e_t rop, const fp6e_t op)
+{
+  fp2e_t tmp1;
+  fp2e_set(tmp1, op->m_b);
+  fp2e_set(rop->m_b, op->m_c);
+  fp2e_mulxi(rop->m_c, op->m_a);
+  fp2e_set(rop->m_a, tmp1);
+}
+
+void fp6e_mul_fpe(fp6e_t rop, const fp6e_t op1, const fpe_t op2)
+{
+	fp2e_mul_fpe(rop->m_a, op1->m_a, op2);
+	fp2e_mul_fpe(rop->m_b, op1->m_b, op2);
+	fp2e_mul_fpe(rop->m_c, op1->m_c, op2);
+}
+
+void fp6e_mul_fp2e(fp6e_t rop, const fp6e_t op1, const fp2e_t op2)
+{
+	fp2e_mul(rop->m_a, op1->m_a, op2);
+	fp2e_mul(rop->m_b, op1->m_b, op2);
+	fp2e_mul(rop->m_c, op1->m_c, op2);
+}
+
+// Multiply an fp6e by a short fp6e, store result in rop:
+// the short fp6e op2 has a2 = 0, i.e. op2 = b2*tau + c2.
+void fp6e_mul_shortfp6e(fp6e_t rop, const fp6e_t op1, const fp6e_t op2)
+{
+	fp2e_t tmp1, tmp2, tmp3, tmp4, tmp5; // Needed for intermediary values
+
+	fp2e_mul(tmp2, op1->m_b, op2->m_b); // tmp2 = b1*b2
+	fp2e_mul(tmp1, op1->m_c, op2->m_c); // tmp1 = c1*c2
+
+  fp2e_mul(tmp3, op1->m_a, op2->m_b);   // tmp3 = a1*b2
+	fp2e_mulxi(tmp3, tmp3);               // tmp3 = a1*b2*xi
+	fp2e_add(tmp5, tmp3, tmp1);       // tmp5 = c1*c2 + a1*b2*xi
+
+	fp2e_add(tmp4, op1->m_b, op1->m_c);   // tmp4 = b1+c1
+  //fp2e_short_coeffred(tmp4);
+	fp2e_add(tmp3, op2->m_b, op2->m_c);   // tmp3 = b2+c2
+  //fp2e_short_coeffred(tmp3);
+	fp2e_mul(rop->m_b, tmp4, tmp3);       // b3 = (b1+c1)*(b2+c2)
+	fp2e_sub2(rop->m_b, tmp1);
+	fp2e_sub2(rop->m_b, tmp2);   // b3 = b1*c2 + b2*c1
+  fp2e_short_coeffred(rop->m_b);
+
+  fp2e_mul(rop->m_a, op1->m_a, op2->m_c);   // a3 = a1*c2
+	fp2e_add2(rop->m_a, tmp2);   // a3 = a1*c2 + b1*b2
+    
+  fp2e_set(rop->m_c, tmp5);             // c3 =  c1*c2 + a1*b2*xi
+}
+
+void fp6e_invert(fp6e_t rop, const fp6e_t op)
+{
+	fp2e_t tmp1, tmp2, tmp3, tmp4, tmp5;  // Needed to store intermediary results
+
+	// See "Implementing cryptographic pairings"
+	fp2e_square(tmp1, op->m_c);
+	fp2e_mul(tmp5, op->m_a, op->m_b);
+	fp2e_mulxi(tmp5, tmp5);
+	fp2e_sub2(tmp1, tmp5); // A
+  fp2e_short_coeffred(tmp1);
+	
+	fp2e_square(tmp2, op->m_a);
+	fp2e_mulxi(tmp2, tmp2);
+	fp2e_mul(tmp5, op->m_b, op->m_c);
+	fp2e_sub2(tmp2, tmp5); // B
+  fp2e_short_coeffred(tmp2);
+
+	fp2e_square(tmp3, op->m_b);
+	fp2e_mul(tmp5, op->m_a, op->m_c);
+	fp2e_sub2(tmp3, tmp5); // C
+  //fp2e_short_coeffred(tmp3);
+
+	fp2e_mul(tmp4, tmp3, op->m_b);
+	fp2e_mulxi(tmp4, tmp4);
+	fp2e_mul(tmp5, tmp1, op->m_c);
+	fp2e_add2(tmp4, tmp5);
+	fp2e_mul(tmp5, tmp2, op->m_a);
+	fp2e_mulxi(tmp5, tmp5);
+	fp2e_add2(tmp4, tmp5); // F
+  fp2e_short_coeffred(tmp4);
+	
+	fp2e_invert(tmp4, tmp4);
+
+	fp2e_mul(rop->m_a, tmp3, tmp4);
+	fp2e_mul(rop->m_b, tmp2, tmp4);
+	fp2e_mul(rop->m_c, tmp1, tmp4);
+}
+
+void fp6e_frobenius_p(fp6e_t rop, const fp6e_t op)
+{
+	fp6e_set(rop, op);
+	fp2e_conjugate(rop->m_a, rop->m_a);
+	fp2e_conjugate(rop->m_b, rop->m_b);
+	fp2e_conjugate(rop->m_c, rop->m_c);
+
+	fp2e_mul(rop->m_b, rop->m_b, bn_ypminus1);
+	fp2e_mul(rop->m_a, rop->m_a, bn_ypminus1_squ);
+}
+
+void fp6e_frobenius_p2(fp6e_t rop, const fp6e_t op)
+{
+	fp2e_set(rop->m_c, op->m_c);
+	fp2e_mul_fpe(rop->m_b, op->m_b, bn_zeta2);
+	fp2e_mul_fpe(rop->m_a, op->m_a, bn_zeta);
+}
+
+// Print the fp6e:
+void fp6e_print(FILE * outfile, const fp6e_t op)
+{
+	fprintf(outfile, "[");
+	fp2e_print(outfile, op->m_a);
+	fprintf(outfile, " * Y^2\n  + ");
+	fp2e_print(outfile, op->m_b);
+	fprintf(outfile, " * Y\n + ");
+	fp2e_print(outfile, op->m_c);
+	fprintf(outfile, "]");
+}
+

+ 85 - 0
dclxvi-20130329/fp6e.h

@@ -0,0 +1,85 @@
+/*
+ * File:   dclxvi-20130329/fp6e.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef FP6E_H
+#define FP6E_H
+
+#include "fp2e.h"
+
+// Elements from F_{p^6}= F_{p^2}[Y] / (Y^3 - xi)F_{p^2}[Y] are represented as aY^2 + bY + c 
+typedef struct fp6e_struct fp6e_struct_t;
+
+struct fp6e_struct
+{
+	fp2e_t m_a;
+	fp2e_t m_b;
+	fp2e_t m_c;
+};
+
+typedef fp6e_struct_t fp6e_t[1];
+
+void fp6e_short_coeffred(fp6e_t rop);
+
+// Set fp6e_t rop to given value:
+void fp6e_set(fp6e_t rop, const fp6e_t op);
+
+// Initialize an fp6e, set to value given in three fp2es
+void fp6e_set_fp2e(fp6e_t rop, const fp2e_t a, const fp2e_t b, const fp2e_t c);
+
+// Initialize an fp6e, set to value given in six strings
+void fp6e_set_str(fp6e_t rop, const char *a1, const char *a0, const char *b1, const char *b0, const char *c1, const char *c0);
+
+// Set rop to one:
+void fp6e_setone(fp6e_t rop);
+
+// Set rop to zero:
+void fp6e_setzero(fp6e_t rop);
+
+// Compare for equality:
+int fp6e_iseq(const fp6e_t op1, const fp6e_t op2);
+
+int fp6e_isone(const fp6e_t op);
+
+int fp6e_iszero(const fp6e_t op);
+
+void fp6e_cmov(fp6e_t rop, const fp6e_t op, int c);
+
+// Add two fp6e, store result in rop:
+void fp6e_add(fp6e_t rop, const fp6e_t op1, const fp6e_t op2);
+
+// Subtract op2 from op1, store result in rop:
+void fp6e_sub(fp6e_t rop, const fp6e_t op1, const fp6e_t op2);
+
+// Negate an fp6e
+void fp6e_neg(fp6e_t rop, const fp6e_t op);
+
+// Multiply two fp6e, store result in rop:
+void fp6e_mul(fp6e_t rop, const fp6e_t op1, const fp6e_t op2);
+
+// Compute the double of a square of an fp6e, store result in rop:
+void fp6e_squaredouble(fp6e_t rop, const fp6e_t op);
+
+// Multiply with tau:
+void fp6e_multau(fp6e_t rop, const fp6e_t op);
+
+void fp6e_mul_fpe(fp6e_t rop, const fp6e_t op1, const fpe_t op2);
+
+void fp6e_mul_fp2e(fp6e_t rop, const fp6e_t op1, const fp2e_t op2);
+
+// Multiply an fp6e by a short fp6e, store result in rop:
+// the short fp6e is given by 2 fp2e elements op2 = b2*tau + c2.
+void fp6e_mul_shortfp6e(fp6e_t rop, const fp6e_t op1, const fp6e_t op2);
+
+void fp6e_invert(fp6e_t rop, const fp6e_t op);
+
+void fp6e_frobenius_p(fp6e_t rop, const fp6e_t op);
+
+void fp6e_frobenius_p2(fp6e_t rop, const fp6e_t op);
+
+// Print the element to stdout:
+void fp6e_print(FILE *outfile, const fp6e_t op);
+
+#endif // ifndef FP6E_H

+ 263 - 0
dclxvi-20130329/fpe.c

@@ -0,0 +1,263 @@
+/*
+ * File:   dclxvi-20130329/fpe.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+
+#include <math.h> 
+#include <assert.h>
+#ifdef NEW_PARAMETERS
+#include "scalar_512.h"
+#else
+#include "scalar.h"
+#endif
+
+
+#include "mul.h" 
+extern "C" {	
+#include "fpe.h"
+} 
+#include "zout.hpp"
+
+// sans mul.h la cible « ../obj/fpe_666.o » a échouée erreur: ‘coeffred_round_par’ was not declared in this scope
+// sans extern "C" la cible « bgn » a échouée référence indéfinie vers « fpe_iszero »
+// sans "fpe.h" la cible « ../obj/fpe_666.o » a échouée erreur: variable or field ‘fpe_short_coeffred’ declared void
+
+
+
+extern const scalar_t bn_pminus2;
+extern const double bn_v;
+extern const double bn_v6;
+
+void fpe_short_coeffred(fpe_t rop)
+{
+  mydouble carry11 = round(rop->v[11]/bn_v);
+  rop->v[11] = remround(rop->v[11],bn_v);
+  rop->v[0] = rop->v[0] - carry11;
+  rop->v[3] = rop->v[3] - carry11;
+  rop->v[6] = rop->v[6] - 4*carry11;
+  rop->v[9] = rop->v[9] - carry11;
+  mydouble carry0 = round(rop->v[0]/bn_v6);
+  mydouble carry1 = round(rop->v[1]/bn_v);
+  mydouble carry2 = round(rop->v[2]/bn_v);
+  mydouble carry3 = round(rop->v[3]/bn_v);
+  mydouble carry4 = round(rop->v[4]/bn_v);
+  mydouble carry5 = round(rop->v[5]/bn_v);
+  mydouble carry6 = round(rop->v[6]/bn_v6);
+  mydouble carry7 = round(rop->v[7]/bn_v);
+  mydouble carry8 = round(rop->v[8]/bn_v);
+  mydouble carry9 = round(rop->v[9]/bn_v);
+  mydouble carry10 = round(rop->v[10]/bn_v);
+  rop->v[0] = remround(rop->v[0],bn_v6);
+  rop->v[1] = remround(rop->v[1],bn_v);
+  rop->v[2] = remround(rop->v[2],bn_v);
+  rop->v[3] = remround(rop->v[3],bn_v);
+  rop->v[4] = remround(rop->v[4],bn_v);
+  rop->v[5] = remround(rop->v[5],bn_v);
+  rop->v[6] = remround(rop->v[6],bn_v6);
+  rop->v[7] = remround(rop->v[7],bn_v);
+  rop->v[8] = remround(rop->v[8],bn_v);
+  rop->v[9] = remround(rop->v[9],bn_v);
+  rop->v[10] = remround(rop->v[10],bn_v);
+  rop->v[1] += carry0;
+  rop->v[2] += carry1;
+  rop->v[3] += carry2;
+  rop->v[4] += carry3;
+  rop->v[5] += carry4;
+  rop->v[6] += carry5;
+  rop->v[7] += carry6;
+  rop->v[8] += carry7;
+  rop->v[9] += carry8;
+  rop->v[10] += carry9;
+  rop->v[11] += carry10;
+}
+
+// Set fpe_t rop to given value:
+void fpe_set(fpe_t rop, const fpe_t op)
+{
+  int i;
+  for(i=0;i<12;i++)
+    rop->v[i] = op->v[i];
+}
+
+/* Communicate the fact that the fpe is reduced (and that we don't know anything more about it) */
+void fpe_isreduced(fpe_t rop)
+{
+  setmax(rop->v[0],(long)bn_v6/2);
+  setmax(rop->v[6],(long)bn_v6/2);
+
+  setmax(rop->v[1],(long)bn_v/2);
+  setmax(rop->v[3],(long)bn_v/2);
+  setmax(rop->v[4],(long)bn_v/2);
+  setmax(rop->v[7],(long)bn_v/2);
+  setmax(rop->v[9],(long)bn_v/2);
+  setmax(rop->v[10],(long)bn_v/2);
+
+  //XXX: Change additive constant:
+  setmax(rop->v[2],(long)bn_v/2+2331);
+  setmax(rop->v[5],(long)bn_v/2+2331);
+  setmax(rop->v[8],(long)bn_v/2+2331);
+  setmax(rop->v[11],(long)bn_v/2+2331);
+}
+
+// Set fpe_t rop to value given in double array of length 12
+void fpe_set_doublearray(fpe_t rop, const mydouble op[12])
+{
+  int i;
+  for(i=0;i<12;i++)
+    rop->v[i] = op[i];
+}
+
+// Set rop to one
+void fpe_setone(fpe_t rop)
+{
+  int i;
+  for(i=1;i<12;i++)
+    rop->v[i] = 0.;
+  rop->v[0] = 1;
+}
+
+// Set rop to zero
+void fpe_setzero(fpe_t rop)
+{
+  int i;
+  for(i=0;i<12;i++)
+    rop->v[i] = 0.;
+}
+
+int fpe_iseq(const fpe_t op1, const fpe_t op2)
+{
+  fpe_t t;
+  fpe_sub(t, op1, op2);
+  return fpe_iszero(t);
+}
+
+int fpe_isone(const fpe_t op)
+{
+  fpe_t t;
+  int i;
+  for(i=1;i<12;i++)
+    t->v[i] = op->v[i];
+  t->v[0] = op->v[0] - 1.;
+  return fpe_iszero(t);
+}
+
+int fpe_iszero(const fpe_t op)
+{
+  fpe_t t;
+  double d;
+  int i;
+  unsigned long long tr = 0;
+  unsigned int differentbits=0;
+  for(i=0;i<12;i++)
+    t->v[i] = op->v[i];
+  coeffred_round_par(t->v);
+
+  //Constant-time comparison
+  double zero = 0.;
+  unsigned long long *zp = (unsigned long long *)&zero;;
+  unsigned long long *tp;
+
+  for(i=0;i<12;i++)
+  {
+    d = todouble(t->v[i]);
+    tp = (unsigned long long *)&d;
+    tr |= (*tp ^ *zp);
+  }
+  for(i=0;i<8;i++)
+    differentbits |= i[(unsigned char*)&tr];
+
+  return 1 & ((differentbits - 1) >> 8);
+}
+
+// Compute the negative of an fpe
+void fpe_neg(fpe_t rop, const fpe_t op)
+{
+  int i;
+  for(i=0;i<12;i++)
+    rop->v[i] = -op->v[i];
+}
+
+// Double an fpe:
+void fpe_double(fpe_t rop, const fpe_t op)
+{
+//printf("\n\n\nop=");  fpe_print(stdout,op);
+  int i;
+  for(i=0;i<12;i++)
+    rop->v[i] = op->v[i]*2;
+    //printf("%f\n",rop->v[i]);
+}
+
+// Double an fpe:
+void fpe_triple(fpe_t rop, const fpe_t op)
+{
+  int i;
+  for(i=0;i<12;i++)
+    rop->v[i] = op->v[i]*3;
+}
+
+
+
+// Add two fpe, store result in rop:
+void fpe_add(fpe_t rop, const fpe_t op1, const fpe_t op2)
+{
+  int i;
+  for(i=0;i<12;i++)
+    rop->v[i] = op1->v[i] + op2->v[i];
+}
+
+// Subtract op2 from op1, store result in rop:
+void fpe_sub(fpe_t rop, const fpe_t op1, const fpe_t op2)
+{
+  int i;
+  for(i=0;i<12;i++)
+    rop->v[i] = op1->v[i] - op2->v[i];
+}
+
+// Multiply two fpe, store result in rop:
+#ifndef QHASM
+void fpe_mul_c(fpe_t rop, const fpe_t op1, const fpe_t op2)
+{
+	//debug(50);
+	mydouble h[24];
+	//abc;
+	polymul(h,op1->v,op2->v);
+	//xyz;
+	degred(h);
+	coeffred_round_par(h); 
+	int i;
+	for (i=0;i<12;i++)
+	rop->v[i] = h[i];
+}
+#endif
+
+// Square an fpe, store result in rop:
+void fpe_square(fpe_t rop, const fpe_t op)
+{
+  /* Not used during pairing computation */
+  fpe_mul(rop, op, op);
+}
+
+// Compute inverse of an fpe, store result in rop:
+void fpe_invert(fpe_t rop, const fpe_t op1)
+{
+  fpe_set(rop,op1);
+  int i;
+  for(i = 254; i >= 0; i--)
+  {
+    fpe_mul(rop,rop,rop);
+    if(scalar_getbit(bn_pminus2, i))
+      fpe_mul(rop,rop,op1);
+  }
+}
+
+// Print the element to stdout:
+void fpe_print(FILE * outfile, const fpe_t op)
+{
+  int i;
+  //for(i=0;i<11;i++) fprintf(outfile, "%10lf, ", todouble(op->v[i]));
+  //fprintf(outfile, "%10lf", todouble(op->v[11]));
+  for(i=0;i<11;i++) fprintf(outfile, "%.0lf, ", todouble(op->v[i]));
+  fprintf(outfile, "%.0lf", todouble(op->v[11]));
+}

+ 105 - 0
dclxvi-20130329/fpe.h

@@ -0,0 +1,105 @@
+/*
+ * File:   dclxvi-20130329/fpe.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef FPE_H
+#define FPE_H
+
+
+#include <stdio.h>
+#include "mydouble.h"
+
+#ifdef BENCH
+unsigned long long int multpcycles; unsigned long long int nummultp;
+unsigned long long int nummultzerop;
+unsigned long long int nummultonep;
+unsigned long long int sqpcycles; unsigned long long int numsqp;
+unsigned long long invpcycles; unsigned long long numinvp;
+#endif
+
+typedef struct fpe_struct fpe_struct_t;
+
+struct fpe_struct
+{
+  mydouble v[12];
+} __attribute__ ((aligned (16)));
+
+typedef fpe_struct_t fpe_t[1];
+
+void fpe_short_coeffred(fpe_t rop);
+
+// Set fpe_t rop to given value:
+void fpe_set(fpe_t rop, const fpe_t op);
+
+/* Communicate the fact that the fpe is reduced (and that we don't know anything more about it) */
+void fpe_isreduced(fpe_t rop);
+
+// Set fpe_t rop to value given in bytearray -- inverse function to fpe_to_bytearray
+void fpe_set_bytearray(fpe_t rop, const unsigned char *op, size_t oplen);
+
+// Set fpe_t rop to value given in double array of length 12
+void fpe_set_doublearray(fpe_t rop, const mydouble op[12]);
+
+// Set rop to one
+void fpe_setone(fpe_t rop);
+
+// Set rop to zero
+void fpe_setzero(fpe_t rop);
+
+// Compare for equality:
+int fpe_iseq(const fpe_t op1, const fpe_t op2);
+
+// Is the element equal to 1:
+int fpe_isone(const fpe_t op);
+
+// Is the element equal to 0:
+int fpe_iszero(const fpe_t op);
+
+// Compute the negative of an fpe
+void fpe_neg(fpe_t rop, const fpe_t op);
+
+// Double an fpe:
+void fpe_double(fpe_t rop, const fpe_t op);
+
+// Triple an fpe:
+void fpe_triple(fpe_t rop, const fpe_t op);
+
+// Add two fpe, store result in rop:
+void fpe_add(fpe_t rop, const fpe_t op1, const fpe_t op2);
+
+// Subtract op2 from op1, store result in rop:
+void fpe_sub(fpe_t rop, const fpe_t op1, const fpe_t op2);
+
+#ifdef QHASM
+#define fpe_mul fpe_mul_qhasm
+#else
+#define fpe_mul fpe_mul_c
+#endif
+// Multiply two fpe, store result in rop:
+void fpe_mul(fpe_t rop, const fpe_t op1, const fpe_t op2);
+
+// Square an fpe, store result in rop:
+void fpe_square(fpe_t rop, const fpe_t op);
+
+// Compute inverse of an fpe, store result in rop:
+void fpe_invert(fpe_t rop, const fpe_t op1);
+
+// Print the element to stdout:
+void fpe_print(FILE * outfile, const fpe_t op);
+
+// Convert fpe into a bytearray
+void fpe_to_bytearray(unsigned char * rop, const fpe_t op);
+
+/*
+// Field constants
+fpe_t fpe_one;
+fpe_t zeta; // Third root of unity in F_p fulfilling Z^{p^2} = -zeta * Z
+fpe_t _1o3modp; // 1/3 \in \F_p
+// Two constants needed for the cometa-pairing computation
+fpe_t cometa_c0_const;
+fpe_t cometa_c1_const;
+*/
+
+#endif 

BIN
dclxvi-20130329/fpe.h.gch


+ 4027 - 0
dclxvi-20130329/fpe_mul.s

@@ -0,0 +1,4027 @@
+# File:   dclxvi-20130329/fpe_mul.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: enter fpe_mul_qhasm
+.text
+.p2align 5
+.globl _fpe_mul_qhasm
+.globl fpe_mul_qhasm
+_fpe_mul_qhasm:
+fpe_mul_qhasm:
+mov %rsp,%r11
+and $31,%r11
+add $192,%r11
+sub %r11,%rsp
+
+# qhasm: int64 rop
+
+# qhasm: int64 op1
+
+# qhasm: int64 op2
+
+# qhasm: input rop
+
+# qhasm: input op1
+
+# qhasm: input op2
+
+# qhasm: stack1536 mystack
+
+# qhasm: int64 c1
+
+# qhasm: int64 c2
+
+# qhasm: int64 c3
+
+# qhasm: int64 c4
+
+# qhasm: int64 c5
+
+# qhasm: int64 c6
+
+# qhasm: int64 c7
+
+# qhasm: caller c1
+
+# qhasm: caller c2
+
+# qhasm: caller c3
+
+# qhasm: caller c4
+
+# qhasm: caller c5
+
+# qhasm: caller c6
+
+# qhasm: caller c7
+
+# qhasm: stack64 c1_stack
+
+# qhasm: stack64 c2_stack
+
+# qhasm: stack64 c3_stack
+
+# qhasm: stack64 c4_stack
+
+# qhasm: stack64 c5_stack
+
+# qhasm: stack64 c6_stack
+
+# qhasm: stack64 c7_stack
+
+# qhasm: int6464 r0
+
+# qhasm: int6464 r1
+
+# qhasm: int6464 r2
+
+# qhasm: int6464 r3
+
+# qhasm: int6464 r4
+
+# qhasm: int6464 r5
+
+# qhasm: int6464 r6
+
+# qhasm: int6464 r7
+
+# qhasm: int6464 r8
+
+# qhasm: int6464 r9
+
+# qhasm: int6464 r10
+
+# qhasm: int6464 r11
+
+# qhasm: int6464 0yoff
+
+# qhasm: int6464 0r0
+
+# qhasm: int6464 0r1
+
+# qhasm: int6464 0r2
+
+# qhasm: int6464 0r3
+
+# qhasm: int6464 0r4
+
+# qhasm: int6464 0r5
+
+# qhasm: int6464 0r6
+
+# qhasm: int6464 0r7
+
+# qhasm: int6464 0r8
+
+# qhasm: int6464 0r9
+
+# qhasm: int6464 0r10
+
+# qhasm: int6464 0r11
+
+# qhasm: int6464 0r12
+
+# qhasm: int6464 0r13
+
+# qhasm: int6464 0r14
+
+# qhasm: int6464 0r15
+
+# qhasm: int6464 0r16
+
+# qhasm: int6464 0r17
+
+# qhasm: int6464 0r18
+
+# qhasm: int6464 0r19
+
+# qhasm: int6464 0r20
+
+# qhasm: int6464 0r21
+
+# qhasm: int6464 0r22
+
+# qhasm: int6464 0t0
+
+# qhasm: int6464 0t1
+
+# qhasm: int6464 0t2
+
+# qhasm: int6464 0t3
+
+# qhasm: int6464 0t4
+
+# qhasm: int6464 0t5
+
+# qhasm: int6464 0t6
+
+# qhasm: int6464 0t7
+
+# qhasm: int6464 0t8
+
+# qhasm: int6464 0t9
+
+# qhasm: int6464 0t10
+
+# qhasm: int6464 0t11
+
+# qhasm: int6464 0t12
+
+# qhasm: int6464 0t13
+
+# qhasm: int6464 0t14
+
+# qhasm: int6464 0t15
+
+# qhasm: int6464 0t16
+
+# qhasm: int6464 0t17
+
+# qhasm: int6464 0t18
+
+# qhasm: int6464 0t19
+
+# qhasm: int6464 0t20
+
+# qhasm: int6464 0t21
+
+# qhasm: int6464 0t22
+
+# qhasm: int6464 0ab0
+
+# qhasm: int6464 0ab1
+
+# qhasm: int6464 0ab2
+
+# qhasm: int6464 0ab3
+
+# qhasm: int6464 0ab4
+
+# qhasm: int6464 0ab5
+
+# qhasm: int6464 0ab6
+
+# qhasm: int6464 0ab7
+
+# qhasm: int6464 0ab8
+
+# qhasm: int6464 0ab9
+
+# qhasm: int6464 0ab10
+
+# qhasm: int6464 0ab11
+
+# qhasm: int6464 0ab0six
+
+# qhasm: int6464 0ab1six
+
+# qhasm: int6464 0ab2six
+
+# qhasm: int6464 0ab3six
+
+# qhasm: int6464 0ab4six
+
+# qhasm: int6464 0ab5six
+
+# qhasm: int6464 0ab6six
+
+# qhasm: int6464 0ab7six
+
+# qhasm: int6464 0ab8six
+
+# qhasm: int6464 0ab9six
+
+# qhasm: int6464 0ab10six
+
+# qhasm: int6464 0ab11six
+
+# qhasm: int64 0mysp
+
+# qhasm: 0mysp = &mystack
+# asm 1: leaq <mystack=stack1536#1,>0mysp=int64#4
+# asm 2: leaq <mystack=0(%rsp),>0mysp=%rcx
+leaq 0(%rsp),%rcx
+
+# qhasm: 0ab0[0] = *(float64 *)(op1 + 0)
+# asm 1: movlpd 0(<op1=int64#2),>0ab0=int6464#1
+# asm 2: movlpd 0(<op1=%rsi),>0ab0=%xmm0
+movlpd 0(%rsi),%xmm0
+
+# qhasm: 0t0 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t0=int6464#2
+# asm 2: movdqa <0ab0=%xmm0,>0t0=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: float6464 0t0[0] *= *(float64 *)(op2 + 0)
+# asm 1: mulsd 0(<op2=int64#3),<0t0=int6464#2
+# asm 2: mulsd 0(<op2=%rdx),<0t0=%xmm1
+mulsd 0(%rdx),%xmm1
+
+# qhasm: 0r0 =0t0
+# asm 1: movdqa <0t0=int6464#2,>0r0=int6464#2
+# asm 2: movdqa <0t0=%xmm1,>0r0=%xmm1
+movdqa %xmm1,%xmm1
+
+# qhasm: 0t1 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t1=int6464#3
+# asm 2: movdqa <0ab0=%xmm0,>0t1=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: float6464 0t1[0] *= *(float64 *)(op2 + 8)
+# asm 1: mulsd 8(<op2=int64#3),<0t1=int6464#3
+# asm 2: mulsd 8(<op2=%rdx),<0t1=%xmm2
+mulsd 8(%rdx),%xmm2
+
+# qhasm: 0r1 =0t1
+# asm 1: movdqa <0t1=int6464#3,>0r1=int6464#3
+# asm 2: movdqa <0t1=%xmm2,>0r1=%xmm2
+movdqa %xmm2,%xmm2
+
+# qhasm: 0t2 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t2=int6464#4
+# asm 2: movdqa <0ab0=%xmm0,>0t2=%xmm3
+movdqa %xmm0,%xmm3
+
+# qhasm: float6464 0t2[0] *= *(float64 *)(op2 + 16)
+# asm 1: mulsd 16(<op2=int64#3),<0t2=int6464#4
+# asm 2: mulsd 16(<op2=%rdx),<0t2=%xmm3
+mulsd 16(%rdx),%xmm3
+
+# qhasm: 0r2 =0t2
+# asm 1: movdqa <0t2=int6464#4,>0r2=int6464#4
+# asm 2: movdqa <0t2=%xmm3,>0r2=%xmm3
+movdqa %xmm3,%xmm3
+
+# qhasm: 0t3 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t3=int6464#5
+# asm 2: movdqa <0ab0=%xmm0,>0t3=%xmm4
+movdqa %xmm0,%xmm4
+
+# qhasm: float6464 0t3[0] *= *(float64  *)(op2 + 24)
+# asm 1: mulsd 24(<op2=int64#3),<0t3=int6464#5
+# asm 2: mulsd 24(<op2=%rdx),<0t3=%xmm4
+mulsd 24(%rdx),%xmm4
+
+# qhasm: 0r3 =0t3
+# asm 1: movdqa <0t3=int6464#5,>0r3=int6464#5
+# asm 2: movdqa <0t3=%xmm4,>0r3=%xmm4
+movdqa %xmm4,%xmm4
+
+# qhasm: 0t4 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t4=int6464#6
+# asm 2: movdqa <0ab0=%xmm0,>0t4=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm: float6464 0t4[0] *= *(float64  *)(op2 + 32)
+# asm 1: mulsd 32(<op2=int64#3),<0t4=int6464#6
+# asm 2: mulsd 32(<op2=%rdx),<0t4=%xmm5
+mulsd 32(%rdx),%xmm5
+
+# qhasm: 0r4 =0t4
+# asm 1: movdqa <0t4=int6464#6,>0r4=int6464#6
+# asm 2: movdqa <0t4=%xmm5,>0r4=%xmm5
+movdqa %xmm5,%xmm5
+
+# qhasm: 0t5 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t5=int6464#7
+# asm 2: movdqa <0ab0=%xmm0,>0t5=%xmm6
+movdqa %xmm0,%xmm6
+
+# qhasm: float6464 0t5[0] *= *(float64 *)(op2 + 40)
+# asm 1: mulsd 40(<op2=int64#3),<0t5=int6464#7
+# asm 2: mulsd 40(<op2=%rdx),<0t5=%xmm6
+mulsd 40(%rdx),%xmm6
+
+# qhasm: 0r5 =0t5
+# asm 1: movdqa <0t5=int6464#7,>0r5=int6464#7
+# asm 2: movdqa <0t5=%xmm6,>0r5=%xmm6
+movdqa %xmm6,%xmm6
+
+# qhasm: 0t6 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t6=int6464#8
+# asm 2: movdqa <0ab0=%xmm0,>0t6=%xmm7
+movdqa %xmm0,%xmm7
+
+# qhasm: float6464 0t6[0] *= *(float64 *)(op2 + 48)
+# asm 1: mulsd 48(<op2=int64#3),<0t6=int6464#8
+# asm 2: mulsd 48(<op2=%rdx),<0t6=%xmm7
+mulsd 48(%rdx),%xmm7
+
+# qhasm: 0r6 =0t6
+# asm 1: movdqa <0t6=int6464#8,>0r6=int6464#8
+# asm 2: movdqa <0t6=%xmm7,>0r6=%xmm7
+movdqa %xmm7,%xmm7
+
+# qhasm: 0t7 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t7=int6464#9
+# asm 2: movdqa <0ab0=%xmm0,>0t7=%xmm8
+movdqa %xmm0,%xmm8
+
+# qhasm: float6464 0t7[0] *= *(float64 *)(op2 + 56)
+# asm 1: mulsd 56(<op2=int64#3),<0t7=int6464#9
+# asm 2: mulsd 56(<op2=%rdx),<0t7=%xmm8
+mulsd 56(%rdx),%xmm8
+
+# qhasm: 0r7 =0t7
+# asm 1: movdqa <0t7=int6464#9,>0r7=int6464#9
+# asm 2: movdqa <0t7=%xmm8,>0r7=%xmm8
+movdqa %xmm8,%xmm8
+
+# qhasm: 0t8 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t8=int6464#10
+# asm 2: movdqa <0ab0=%xmm0,>0t8=%xmm9
+movdqa %xmm0,%xmm9
+
+# qhasm: float6464 0t8[0] *= *(float64 *)(op2 + 64)
+# asm 1: mulsd 64(<op2=int64#3),<0t8=int6464#10
+# asm 2: mulsd 64(<op2=%rdx),<0t8=%xmm9
+mulsd 64(%rdx),%xmm9
+
+# qhasm: 0r8 =0t8
+# asm 1: movdqa <0t8=int6464#10,>0r8=int6464#10
+# asm 2: movdqa <0t8=%xmm9,>0r8=%xmm9
+movdqa %xmm9,%xmm9
+
+# qhasm: 0t9 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t9=int6464#11
+# asm 2: movdqa <0ab0=%xmm0,>0t9=%xmm10
+movdqa %xmm0,%xmm10
+
+# qhasm: float6464 0t9[0] *= *(float64 *)(op2 + 72)
+# asm 1: mulsd 72(<op2=int64#3),<0t9=int6464#11
+# asm 2: mulsd 72(<op2=%rdx),<0t9=%xmm10
+mulsd 72(%rdx),%xmm10
+
+# qhasm: 0r9 =0t9
+# asm 1: movdqa <0t9=int6464#11,>0r9=int6464#11
+# asm 2: movdqa <0t9=%xmm10,>0r9=%xmm10
+movdqa %xmm10,%xmm10
+
+# qhasm: 0t10 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t10=int6464#12
+# asm 2: movdqa <0ab0=%xmm0,>0t10=%xmm11
+movdqa %xmm0,%xmm11
+
+# qhasm: float6464 0t10[0] *= *(float64 *)(op2 + 80)
+# asm 1: mulsd 80(<op2=int64#3),<0t10=int6464#12
+# asm 2: mulsd 80(<op2=%rdx),<0t10=%xmm11
+mulsd 80(%rdx),%xmm11
+
+# qhasm: 0r10 =0t10
+# asm 1: movdqa <0t10=int6464#12,>0r10=int6464#12
+# asm 2: movdqa <0t10=%xmm11,>0r10=%xmm11
+movdqa %xmm11,%xmm11
+
+# qhasm: 0t11 = 0ab0
+# asm 1: movdqa <0ab0=int6464#1,>0t11=int6464#1
+# asm 2: movdqa <0ab0=%xmm0,>0t11=%xmm0
+movdqa %xmm0,%xmm0
+
+# qhasm: float6464 0t11[0] *= *(float64 *)(op2 + 88)
+# asm 1: mulsd 88(<op2=int64#3),<0t11=int6464#1
+# asm 2: mulsd 88(<op2=%rdx),<0t11=%xmm0
+mulsd 88(%rdx),%xmm0
+
+# qhasm: 0r11 =0t11
+# asm 1: movdqa <0t11=int6464#1,>0r11=int6464#1
+# asm 2: movdqa <0t11=%xmm0,>0r11=%xmm0
+movdqa %xmm0,%xmm0
+
+# qhasm: *(float64 *)(0mysp + 0) = 0r0[0]
+# asm 1: movlpd <0r0=int6464#2,0(<0mysp=int64#4)
+# asm 2: movlpd <0r0=%xmm1,0(<0mysp=%rcx)
+movlpd %xmm1,0(%rcx)
+
+# qhasm: 0ab1[0] = *(float64 *)(op1 + 8)
+# asm 1: movlpd 8(<op1=int64#2),>0ab1=int6464#2
+# asm 2: movlpd 8(<op1=%rsi),>0ab1=%xmm1
+movlpd 8(%rsi),%xmm1
+
+# qhasm: 0ab1six = 0ab1
+# asm 1: movdqa <0ab1=int6464#2,>0ab1six=int6464#13
+# asm 2: movdqa <0ab1=%xmm1,>0ab1six=%xmm12
+movdqa %xmm1,%xmm12
+
+# qhasm: float6464 0ab1six[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0ab1six=int6464#13
+# asm 2: mulsd SIX_SIX,<0ab1six=%xmm12
+mulsd SIX_SIX,%xmm12
+
+# qhasm: 0t1 = 0ab1
+# asm 1: movdqa <0ab1=int6464#2,>0t1=int6464#14
+# asm 2: movdqa <0ab1=%xmm1,>0t1=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm: float6464 0t1[0] *= *(float64 *)(op2 + 0)
+# asm 1: mulsd 0(<op2=int64#3),<0t1=int6464#14
+# asm 2: mulsd 0(<op2=%rdx),<0t1=%xmm13
+mulsd 0(%rdx),%xmm13
+
+# qhasm: float6464 0r1[0] +=0t1[0]
+# asm 1: addsd <0t1=int6464#14,<0r1=int6464#3
+# asm 2: addsd <0t1=%xmm13,<0r1=%xmm2
+addsd %xmm13,%xmm2
+
+# qhasm: 0t7 = 0ab1
+# asm 1: movdqa <0ab1=int6464#2,>0t7=int6464#2
+# asm 2: movdqa <0ab1=%xmm1,>0t7=%xmm1
+movdqa %xmm1,%xmm1
+
+# qhasm: float6464 0t7[0] *= *(float64 *)(op2 + 48)
+# asm 1: mulsd 48(<op2=int64#3),<0t7=int6464#2
+# asm 2: mulsd 48(<op2=%rdx),<0t7=%xmm1
+mulsd 48(%rdx),%xmm1
+
+# qhasm: float6464 0r7[0] +=0t7[0]
+# asm 1: addsd <0t7=int6464#2,<0r7=int6464#9
+# asm 2: addsd <0t7=%xmm1,<0r7=%xmm8
+addsd %xmm1,%xmm8
+
+# qhasm: 0t2 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t2=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t2=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t2[0] *= *(float64 *)(op2 + 8)
+# asm 1: mulsd 8(<op2=int64#3),<0t2=int6464#2
+# asm 2: mulsd 8(<op2=%rdx),<0t2=%xmm1
+mulsd 8(%rdx),%xmm1
+
+# qhasm: float6464 0r2[0] +=0t2[0]
+# asm 1: addsd <0t2=int6464#2,<0r2=int6464#4
+# asm 2: addsd <0t2=%xmm1,<0r2=%xmm3
+addsd %xmm1,%xmm3
+
+# qhasm: 0t3 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t3=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t3=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t3[0] *= *(float64 *)(op2 + 16)
+# asm 1: mulsd 16(<op2=int64#3),<0t3=int6464#2
+# asm 2: mulsd 16(<op2=%rdx),<0t3=%xmm1
+mulsd 16(%rdx),%xmm1
+
+# qhasm: float6464 0r3[0] +=0t3[0]
+# asm 1: addsd <0t3=int6464#2,<0r3=int6464#5
+# asm 2: addsd <0t3=%xmm1,<0r3=%xmm4
+addsd %xmm1,%xmm4
+
+# qhasm: 0t4 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t4=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t4=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t4[0] *= *(float64 *)(op2 + 24)
+# asm 1: mulsd 24(<op2=int64#3),<0t4=int6464#2
+# asm 2: mulsd 24(<op2=%rdx),<0t4=%xmm1
+mulsd 24(%rdx),%xmm1
+
+# qhasm: float6464 0r4[0] +=0t4[0]
+# asm 1: addsd <0t4=int6464#2,<0r4=int6464#6
+# asm 2: addsd <0t4=%xmm1,<0r4=%xmm5
+addsd %xmm1,%xmm5
+
+# qhasm: 0t5 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t5=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t5=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t5[0] *= *(float64 *)(op2 + 32)
+# asm 1: mulsd 32(<op2=int64#3),<0t5=int6464#2
+# asm 2: mulsd 32(<op2=%rdx),<0t5=%xmm1
+mulsd 32(%rdx),%xmm1
+
+# qhasm: float6464 0r5[0] +=0t5[0]
+# asm 1: addsd <0t5=int6464#2,<0r5=int6464#7
+# asm 2: addsd <0t5=%xmm1,<0r5=%xmm6
+addsd %xmm1,%xmm6
+
+# qhasm: 0t6 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t6=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t6=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t6[0] *= *(float64 *)(op2 + 40)
+# asm 1: mulsd 40(<op2=int64#3),<0t6=int6464#2
+# asm 2: mulsd 40(<op2=%rdx),<0t6=%xmm1
+mulsd 40(%rdx),%xmm1
+
+# qhasm: float6464 0r6[0] +=0t6[0]
+# asm 1: addsd <0t6=int6464#2,<0r6=int6464#8
+# asm 2: addsd <0t6=%xmm1,<0r6=%xmm7
+addsd %xmm1,%xmm7
+
+# qhasm: 0t8 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t8=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t8=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t8[0] *= *(float64 *)(op2 + 56)
+# asm 1: mulsd 56(<op2=int64#3),<0t8=int6464#2
+# asm 2: mulsd 56(<op2=%rdx),<0t8=%xmm1
+mulsd 56(%rdx),%xmm1
+
+# qhasm: float6464 0r8[0] +=0t8[0]
+# asm 1: addsd <0t8=int6464#2,<0r8=int6464#10
+# asm 2: addsd <0t8=%xmm1,<0r8=%xmm9
+addsd %xmm1,%xmm9
+
+# qhasm: 0t9 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t9=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t9=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t9[0] *= *(float64 *)(op2 + 64)
+# asm 1: mulsd 64(<op2=int64#3),<0t9=int6464#2
+# asm 2: mulsd 64(<op2=%rdx),<0t9=%xmm1
+mulsd 64(%rdx),%xmm1
+
+# qhasm: float6464 0r9[0] +=0t9[0]
+# asm 1: addsd <0t9=int6464#2,<0r9=int6464#11
+# asm 2: addsd <0t9=%xmm1,<0r9=%xmm10
+addsd %xmm1,%xmm10
+
+# qhasm: 0t10 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t10=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t10=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t10[0] *= *(float64 *)(op2 + 72)
+# asm 1: mulsd 72(<op2=int64#3),<0t10=int6464#2
+# asm 2: mulsd 72(<op2=%rdx),<0t10=%xmm1
+mulsd 72(%rdx),%xmm1
+
+# qhasm: float6464 0r10[0] +=0t10[0]
+# asm 1: addsd <0t10=int6464#2,<0r10=int6464#12
+# asm 2: addsd <0t10=%xmm1,<0r10=%xmm11
+addsd %xmm1,%xmm11
+
+# qhasm: 0t11 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t11=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t11=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t11[0] *= *(float64 *)(op2 + 80)
+# asm 1: mulsd 80(<op2=int64#3),<0t11=int6464#2
+# asm 2: mulsd 80(<op2=%rdx),<0t11=%xmm1
+mulsd 80(%rdx),%xmm1
+
+# qhasm: float6464 0r11[0] +=0t11[0]
+# asm 1: addsd <0t11=int6464#2,<0r11=int6464#1
+# asm 2: addsd <0t11=%xmm1,<0r11=%xmm0
+addsd %xmm1,%xmm0
+
+# qhasm: 0t12 = 0ab1six
+# asm 1: movdqa <0ab1six=int6464#13,>0t12=int6464#2
+# asm 2: movdqa <0ab1six=%xmm12,>0t12=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: float6464 0t12[0] *= *(float64 *)(op2 + 88)
+# asm 1: mulsd 88(<op2=int64#3),<0t12=int6464#2
+# asm 2: mulsd 88(<op2=%rdx),<0t12=%xmm1
+mulsd 88(%rdx),%xmm1
+
+# qhasm: 0r12 =0t12
+# asm 1: movdqa <0t12=int6464#2,>0r12=int6464#2
+# asm 2: movdqa <0t12=%xmm1,>0r12=%xmm1
+movdqa %xmm1,%xmm1
+
+# qhasm: *(float64 *)(0mysp + 8) = 0r1[0]
+# asm 1: movlpd <0r1=int6464#3,8(<0mysp=int64#4)
+# asm 2: movlpd <0r1=%xmm2,8(<0mysp=%rcx)
+movlpd %xmm2,8(%rcx)
+
+# qhasm: 0ab2[0] = *(float64 *)(op1 + 16)
+# asm 1: movlpd 16(<op1=int64#2),>0ab2=int6464#3
+# asm 2: movlpd 16(<op1=%rsi),>0ab2=%xmm2
+movlpd 16(%rsi),%xmm2
+
+# qhasm: 0ab2six = 0ab2
+# asm 1: movdqa <0ab2=int6464#3,>0ab2six=int6464#13
+# asm 2: movdqa <0ab2=%xmm2,>0ab2six=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm: float6464 0ab2six[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0ab2six=int6464#13
+# asm 2: mulsd SIX_SIX,<0ab2six=%xmm12
+mulsd SIX_SIX,%xmm12
+
+# qhasm: 0t2 = 0ab2
+# asm 1: movdqa <0ab2=int6464#3,>0t2=int6464#14
+# asm 2: movdqa <0ab2=%xmm2,>0t2=%xmm13
+movdqa %xmm2,%xmm13
+
+# qhasm: float6464 0t2[0] *= *(float64 *)(op2 + 0)
+# asm 1: mulsd 0(<op2=int64#3),<0t2=int6464#14
+# asm 2: mulsd 0(<op2=%rdx),<0t2=%xmm13
+mulsd 0(%rdx),%xmm13
+
+# qhasm: float6464 0r2[0] +=0t2[0]
+# asm 1: addsd <0t2=int6464#14,<0r2=int6464#4
+# asm 2: addsd <0t2=%xmm13,<0r2=%xmm3
+addsd %xmm13,%xmm3
+
+# qhasm: 0t7 = 0ab2
+# asm 1: movdqa <0ab2=int6464#3,>0t7=int6464#14
+# asm 2: movdqa <0ab2=%xmm2,>0t7=%xmm13
+movdqa %xmm2,%xmm13
+
+# qhasm: float6464 0t7[0] *= *(float64 *)(op2 + 40)
+# asm 1: mulsd 40(<op2=int64#3),<0t7=int6464#14
+# asm 2: mulsd 40(<op2=%rdx),<0t7=%xmm13
+mulsd 40(%rdx),%xmm13
+
+# qhasm: float6464 0r7[0] +=0t7[0]
+# asm 1: addsd <0t7=int6464#14,<0r7=int6464#9
+# asm 2: addsd <0t7=%xmm13,<0r7=%xmm8
+addsd %xmm13,%xmm8
+
+# qhasm: 0t8 = 0ab2
+# asm 1: movdqa <0ab2=int6464#3,>0t8=int6464#14
+# asm 2: movdqa <0ab2=%xmm2,>0t8=%xmm13
+movdqa %xmm2,%xmm13
+
+# qhasm: float6464 0t8[0] *= *(float64 *)(op2 + 48)
+# asm 1: mulsd 48(<op2=int64#3),<0t8=int6464#14
+# asm 2: mulsd 48(<op2=%rdx),<0t8=%xmm13
+mulsd 48(%rdx),%xmm13
+
+# qhasm: float6464 0r8[0] +=0t8[0]
+# asm 1: addsd <0t8=int6464#14,<0r8=int6464#10
+# asm 2: addsd <0t8=%xmm13,<0r8=%xmm9
+addsd %xmm13,%xmm9
+
+# qhasm: 0t13 = 0ab2
+# asm 1: movdqa <0ab2=int6464#3,>0t13=int6464#3
+# asm 2: movdqa <0ab2=%xmm2,>0t13=%xmm2
+movdqa %xmm2,%xmm2
+
+# qhasm: float6464 0t13[0] *= *(float64 *)(op2 + 88)
+# asm 1: mulsd 88(<op2=int64#3),<0t13=int6464#3
+# asm 2: mulsd 88(<op2=%rdx),<0t13=%xmm2
+mulsd 88(%rdx),%xmm2
+
+# qhasm: 0r13 =0t13
+# asm 1: movdqa <0t13=int6464#3,>0r13=int6464#3
+# asm 2: movdqa <0t13=%xmm2,>0r13=%xmm2
+movdqa %xmm2,%xmm2
+
+# qhasm: 0t3 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>0t3=int6464#14
+# asm 2: movdqa <0ab2six=%xmm12,>0t3=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t3[0] *= *(float64 *)(op2 + 8)
+# asm 1: mulsd 8(<op2=int64#3),<0t3=int6464#14
+# asm 2: mulsd 8(<op2=%rdx),<0t3=%xmm13
+mulsd 8(%rdx),%xmm13
+
+# qhasm: float6464 0r3[0] +=0t3[0]
+# asm 1: addsd <0t3=int6464#14,<0r3=int6464#5
+# asm 2: addsd <0t3=%xmm13,<0r3=%xmm4
+addsd %xmm13,%xmm4
+
+# qhasm: 0t4 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>0t4=int6464#14
+# asm 2: movdqa <0ab2six=%xmm12,>0t4=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t4[0] *= *(float64 *)(op2 + 16)
+# asm 1: mulsd 16(<op2=int64#3),<0t4=int6464#14
+# asm 2: mulsd 16(<op2=%rdx),<0t4=%xmm13
+mulsd 16(%rdx),%xmm13
+
+# qhasm: float6464 0r4[0] +=0t4[0]
+# asm 1: addsd <0t4=int6464#14,<0r4=int6464#6
+# asm 2: addsd <0t4=%xmm13,<0r4=%xmm5
+addsd %xmm13,%xmm5
+
+# qhasm: 0t5 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>0t5=int6464#14
+# asm 2: movdqa <0ab2six=%xmm12,>0t5=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t5[0] *= *(float64 *)(op2 + 24)
+# asm 1: mulsd 24(<op2=int64#3),<0t5=int6464#14
+# asm 2: mulsd 24(<op2=%rdx),<0t5=%xmm13
+mulsd 24(%rdx),%xmm13
+
+# qhasm: float6464 0r5[0] +=0t5[0]
+# asm 1: addsd <0t5=int6464#14,<0r5=int6464#7
+# asm 2: addsd <0t5=%xmm13,<0r5=%xmm6
+addsd %xmm13,%xmm6
+
+# qhasm: 0t6 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>0t6=int6464#14
+# asm 2: movdqa <0ab2six=%xmm12,>0t6=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t6[0] *= *(float64 *)(op2 + 32)
+# asm 1: mulsd 32(<op2=int64#3),<0t6=int6464#14
+# asm 2: mulsd 32(<op2=%rdx),<0t6=%xmm13
+mulsd 32(%rdx),%xmm13
+
+# qhasm: float6464 0r6[0] +=0t6[0]
+# asm 1: addsd <0t6=int6464#14,<0r6=int6464#8
+# asm 2: addsd <0t6=%xmm13,<0r6=%xmm7
+addsd %xmm13,%xmm7
+
+# qhasm: 0t9 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>0t9=int6464#14
+# asm 2: movdqa <0ab2six=%xmm12,>0t9=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t9[0] *= *(float64 *)(op2 + 56)
+# asm 1: mulsd 56(<op2=int64#3),<0t9=int6464#14
+# asm 2: mulsd 56(<op2=%rdx),<0t9=%xmm13
+mulsd 56(%rdx),%xmm13
+
+# qhasm: float6464 0r9[0] +=0t9[0]
+# asm 1: addsd <0t9=int6464#14,<0r9=int6464#11
+# asm 2: addsd <0t9=%xmm13,<0r9=%xmm10
+addsd %xmm13,%xmm10
+
+# qhasm: 0t10 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>0t10=int6464#14
+# asm 2: movdqa <0ab2six=%xmm12,>0t10=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t10[0] *= *(float64 *)(op2 + 64)
+# asm 1: mulsd 64(<op2=int64#3),<0t10=int6464#14
+# asm 2: mulsd 64(<op2=%rdx),<0t10=%xmm13
+mulsd 64(%rdx),%xmm13
+
+# qhasm: float6464 0r10[0] +=0t10[0]
+# asm 1: addsd <0t10=int6464#14,<0r10=int6464#12
+# asm 2: addsd <0t10=%xmm13,<0r10=%xmm11
+addsd %xmm13,%xmm11
+
+# qhasm: 0t11 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>0t11=int6464#14
+# asm 2: movdqa <0ab2six=%xmm12,>0t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t11[0] *= *(float64 *)(op2 + 72)
+# asm 1: mulsd 72(<op2=int64#3),<0t11=int6464#14
+# asm 2: mulsd 72(<op2=%rdx),<0t11=%xmm13
+mulsd 72(%rdx),%xmm13
+
+# qhasm: float6464 0r11[0] +=0t11[0]
+# asm 1: addsd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addsd <0t11=%xmm13,<0r11=%xmm0
+addsd %xmm13,%xmm0
+
+# qhasm: 0t12 = 0ab2six
+# asm 1: movdqa <0ab2six=int6464#13,>0t12=int6464#13
+# asm 2: movdqa <0ab2six=%xmm12,>0t12=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 0t12[0] *= *(float64 *)(op2 + 80)
+# asm 1: mulsd 80(<op2=int64#3),<0t12=int6464#13
+# asm 2: mulsd 80(<op2=%rdx),<0t12=%xmm12
+mulsd 80(%rdx),%xmm12
+
+# qhasm: float6464 0r12[0] += 0t12[0]
+# asm 1: addsd <0t12=int6464#13,<0r12=int6464#2
+# asm 2: addsd <0t12=%xmm12,<0r12=%xmm1
+addsd %xmm12,%xmm1
+
+# qhasm: *(float64 *)(0mysp + 16) = 0r2[0]
+# asm 1: movlpd <0r2=int6464#4,16(<0mysp=int64#4)
+# asm 2: movlpd <0r2=%xmm3,16(<0mysp=%rcx)
+movlpd %xmm3,16(%rcx)
+
+# qhasm: 0ab3[0] = *(float64 *)(op1 + 24)
+# asm 1: movlpd 24(<op1=int64#2),>0ab3=int6464#4
+# asm 2: movlpd 24(<op1=%rsi),>0ab3=%xmm3
+movlpd 24(%rsi),%xmm3
+
+# qhasm: 0ab3six = 0ab3
+# asm 1: movdqa <0ab3=int6464#4,>0ab3six=int6464#13
+# asm 2: movdqa <0ab3=%xmm3,>0ab3six=%xmm12
+movdqa %xmm3,%xmm12
+
+# qhasm: float6464 0ab3six[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0ab3six=int6464#13
+# asm 2: mulsd SIX_SIX,<0ab3six=%xmm12
+mulsd SIX_SIX,%xmm12
+
+# qhasm: 0t3 = 0ab3
+# asm 1: movdqa <0ab3=int6464#4,>0t3=int6464#14
+# asm 2: movdqa <0ab3=%xmm3,>0t3=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 0t3[0] *= *(float64 *)(op2 + 0)
+# asm 1: mulsd 0(<op2=int64#3),<0t3=int6464#14
+# asm 2: mulsd 0(<op2=%rdx),<0t3=%xmm13
+mulsd 0(%rdx),%xmm13
+
+# qhasm: float6464 0r3[0] +=0t3[0]
+# asm 1: addsd <0t3=int6464#14,<0r3=int6464#5
+# asm 2: addsd <0t3=%xmm13,<0r3=%xmm4
+addsd %xmm13,%xmm4
+
+# qhasm: 0t7 = 0ab3
+# asm 1: movdqa <0ab3=int6464#4,>0t7=int6464#14
+# asm 2: movdqa <0ab3=%xmm3,>0t7=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 0t7[0] *= *(float64 *)(op2 + 32)
+# asm 1: mulsd 32(<op2=int64#3),<0t7=int6464#14
+# asm 2: mulsd 32(<op2=%rdx),<0t7=%xmm13
+mulsd 32(%rdx),%xmm13
+
+# qhasm: float6464 0r7[0] +=0t7[0]
+# asm 1: addsd <0t7=int6464#14,<0r7=int6464#9
+# asm 2: addsd <0t7=%xmm13,<0r7=%xmm8
+addsd %xmm13,%xmm8
+
+# qhasm: 0t8 = 0ab3
+# asm 1: movdqa <0ab3=int6464#4,>0t8=int6464#14
+# asm 2: movdqa <0ab3=%xmm3,>0t8=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 0t8[0] *= *(float64 *)(op2 + 40)
+# asm 1: mulsd 40(<op2=int64#3),<0t8=int6464#14
+# asm 2: mulsd 40(<op2=%rdx),<0t8=%xmm13
+mulsd 40(%rdx),%xmm13
+
+# qhasm: float6464 0r8[0] +=0t8[0]
+# asm 1: addsd <0t8=int6464#14,<0r8=int6464#10
+# asm 2: addsd <0t8=%xmm13,<0r8=%xmm9
+addsd %xmm13,%xmm9
+
+# qhasm: 0t9 = 0ab3
+# asm 1: movdqa <0ab3=int6464#4,>0t9=int6464#14
+# asm 2: movdqa <0ab3=%xmm3,>0t9=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 0t9[0] *= *(float64 *)(op2 + 48)
+# asm 1: mulsd 48(<op2=int64#3),<0t9=int6464#14
+# asm 2: mulsd 48(<op2=%rdx),<0t9=%xmm13
+mulsd 48(%rdx),%xmm13
+
+# qhasm: float6464 0r9[0] +=0t9[0]
+# asm 1: addsd <0t9=int6464#14,<0r9=int6464#11
+# asm 2: addsd <0t9=%xmm13,<0r9=%xmm10
+addsd %xmm13,%xmm10
+
+# qhasm: 0t13 = 0ab3
+# asm 1: movdqa <0ab3=int6464#4,>0t13=int6464#14
+# asm 2: movdqa <0ab3=%xmm3,>0t13=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 0t13[0] *= *(float64 *)(op2 + 80)
+# asm 1: mulsd 80(<op2=int64#3),<0t13=int6464#14
+# asm 2: mulsd 80(<op2=%rdx),<0t13=%xmm13
+mulsd 80(%rdx),%xmm13
+
+# qhasm: float6464 0r13[0] +=0t13[0]
+# asm 1: addsd <0t13=int6464#14,<0r13=int6464#3
+# asm 2: addsd <0t13=%xmm13,<0r13=%xmm2
+addsd %xmm13,%xmm2
+
+# qhasm: 0t14 = 0ab3
+# asm 1: movdqa <0ab3=int6464#4,>0t14=int6464#4
+# asm 2: movdqa <0ab3=%xmm3,>0t14=%xmm3
+movdqa %xmm3,%xmm3
+
+# qhasm: float6464 0t14[0] *= *(float64 *)(op2 + 88)
+# asm 1: mulsd 88(<op2=int64#3),<0t14=int6464#4
+# asm 2: mulsd 88(<op2=%rdx),<0t14=%xmm3
+mulsd 88(%rdx),%xmm3
+
+# qhasm: 0r14 =0t14
+# asm 1: movdqa <0t14=int6464#4,>0r14=int6464#4
+# asm 2: movdqa <0t14=%xmm3,>0r14=%xmm3
+movdqa %xmm3,%xmm3
+
+# qhasm: 0t4 = 0ab3six
+# asm 1: movdqa <0ab3six=int6464#13,>0t4=int6464#14
+# asm 2: movdqa <0ab3six=%xmm12,>0t4=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t4[0] *= *(float64 *)(op2 + 8)
+# asm 1: mulsd 8(<op2=int64#3),<0t4=int6464#14
+# asm 2: mulsd 8(<op2=%rdx),<0t4=%xmm13
+mulsd 8(%rdx),%xmm13
+
+# qhasm: float6464 0r4[0] +=0t4[0]
+# asm 1: addsd <0t4=int6464#14,<0r4=int6464#6
+# asm 2: addsd <0t4=%xmm13,<0r4=%xmm5
+addsd %xmm13,%xmm5
+
+# qhasm: 0t5 = 0ab3six
+# asm 1: movdqa <0ab3six=int6464#13,>0t5=int6464#14
+# asm 2: movdqa <0ab3six=%xmm12,>0t5=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t5[0] *= *(float64 *)(op2 + 16)
+# asm 1: mulsd 16(<op2=int64#3),<0t5=int6464#14
+# asm 2: mulsd 16(<op2=%rdx),<0t5=%xmm13
+mulsd 16(%rdx),%xmm13
+
+# qhasm: float6464 0r5[0] +=0t5[0]
+# asm 1: addsd <0t5=int6464#14,<0r5=int6464#7
+# asm 2: addsd <0t5=%xmm13,<0r5=%xmm6
+addsd %xmm13,%xmm6
+
+# qhasm: 0t6 = 0ab3six
+# asm 1: movdqa <0ab3six=int6464#13,>0t6=int6464#14
+# asm 2: movdqa <0ab3six=%xmm12,>0t6=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t6[0] *= *(float64 *)(op2 + 24)
+# asm 1: mulsd 24(<op2=int64#3),<0t6=int6464#14
+# asm 2: mulsd 24(<op2=%rdx),<0t6=%xmm13
+mulsd 24(%rdx),%xmm13
+
+# qhasm: float6464 0r6[0] +=0t6[0]
+# asm 1: addsd <0t6=int6464#14,<0r6=int6464#8
+# asm 2: addsd <0t6=%xmm13,<0r6=%xmm7
+addsd %xmm13,%xmm7
+
+# qhasm: 0t10 = 0ab3six
+# asm 1: movdqa <0ab3six=int6464#13,>0t10=int6464#14
+# asm 2: movdqa <0ab3six=%xmm12,>0t10=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t10[0] *= *(float64 *)(op2 + 56)
+# asm 1: mulsd 56(<op2=int64#3),<0t10=int6464#14
+# asm 2: mulsd 56(<op2=%rdx),<0t10=%xmm13
+mulsd 56(%rdx),%xmm13
+
+# qhasm: float6464 0r10[0] +=0t10[0]
+# asm 1: addsd <0t10=int6464#14,<0r10=int6464#12
+# asm 2: addsd <0t10=%xmm13,<0r10=%xmm11
+addsd %xmm13,%xmm11
+
+# qhasm: 0t11 = 0ab3six
+# asm 1: movdqa <0ab3six=int6464#13,>0t11=int6464#14
+# asm 2: movdqa <0ab3six=%xmm12,>0t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t11[0] *= *(float64 *)(op2 + 64)
+# asm 1: mulsd 64(<op2=int64#3),<0t11=int6464#14
+# asm 2: mulsd 64(<op2=%rdx),<0t11=%xmm13
+mulsd 64(%rdx),%xmm13
+
+# qhasm: float6464 0r11[0] +=0t11[0]
+# asm 1: addsd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addsd <0t11=%xmm13,<0r11=%xmm0
+addsd %xmm13,%xmm0
+
+# qhasm: 0t12 = 0ab3six
+# asm 1: movdqa <0ab3six=int6464#13,>0t12=int6464#13
+# asm 2: movdqa <0ab3six=%xmm12,>0t12=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 0t12[0] *= *(float64 *)(op2 + 72)
+# asm 1: mulsd 72(<op2=int64#3),<0t12=int6464#13
+# asm 2: mulsd 72(<op2=%rdx),<0t12=%xmm12
+mulsd 72(%rdx),%xmm12
+
+# qhasm: float6464 0r12[0] +=0t12[0]
+# asm 1: addsd <0t12=int6464#13,<0r12=int6464#2
+# asm 2: addsd <0t12=%xmm12,<0r12=%xmm1
+addsd %xmm12,%xmm1
+
+# qhasm: *(float64 *)(0mysp + 24) = 0r3[0]
+# asm 1: movlpd <0r3=int6464#5,24(<0mysp=int64#4)
+# asm 2: movlpd <0r3=%xmm4,24(<0mysp=%rcx)
+movlpd %xmm4,24(%rcx)
+
+# qhasm: 0ab4[0] = *(float64 *)(op1 + 32)
+# asm 1: movlpd 32(<op1=int64#2),>0ab4=int6464#5
+# asm 2: movlpd 32(<op1=%rsi),>0ab4=%xmm4
+movlpd 32(%rsi),%xmm4
+
+# qhasm: 0ab4six = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>0ab4six=int6464#13
+# asm 2: movdqa <0ab4=%xmm4,>0ab4six=%xmm12
+movdqa %xmm4,%xmm12
+
+# qhasm: float6464 0ab4six[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0ab4six=int6464#13
+# asm 2: mulsd SIX_SIX,<0ab4six=%xmm12
+mulsd SIX_SIX,%xmm12
+
+# qhasm: 0t4 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>0t4=int6464#14
+# asm 2: movdqa <0ab4=%xmm4,>0t4=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0t4[0] *= *(float64 *)(op2 + 0)
+# asm 1: mulsd 0(<op2=int64#3),<0t4=int6464#14
+# asm 2: mulsd 0(<op2=%rdx),<0t4=%xmm13
+mulsd 0(%rdx),%xmm13
+
+# qhasm: float6464 0r4[0] +=0t4[0]
+# asm 1: addsd <0t4=int6464#14,<0r4=int6464#6
+# asm 2: addsd <0t4=%xmm13,<0r4=%xmm5
+addsd %xmm13,%xmm5
+
+# qhasm: 0t7 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>0t7=int6464#14
+# asm 2: movdqa <0ab4=%xmm4,>0t7=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0t7[0] *= *(float64 *)(op2 + 24)
+# asm 1: mulsd 24(<op2=int64#3),<0t7=int6464#14
+# asm 2: mulsd 24(<op2=%rdx),<0t7=%xmm13
+mulsd 24(%rdx),%xmm13
+
+# qhasm: float6464 0r7[0] +=0t7[0]
+# asm 1: addsd <0t7=int6464#14,<0r7=int6464#9
+# asm 2: addsd <0t7=%xmm13,<0r7=%xmm8
+addsd %xmm13,%xmm8
+
+# qhasm: 0t8 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>0t8=int6464#14
+# asm 2: movdqa <0ab4=%xmm4,>0t8=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0t8[0] *= *(float64 *)(op2 + 32)
+# asm 1: mulsd 32(<op2=int64#3),<0t8=int6464#14
+# asm 2: mulsd 32(<op2=%rdx),<0t8=%xmm13
+mulsd 32(%rdx),%xmm13
+
+# qhasm: float6464 0r8[0] +=0t8[0]
+# asm 1: addsd <0t8=int6464#14,<0r8=int6464#10
+# asm 2: addsd <0t8=%xmm13,<0r8=%xmm9
+addsd %xmm13,%xmm9
+
+# qhasm: 0t9 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>0t9=int6464#14
+# asm 2: movdqa <0ab4=%xmm4,>0t9=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0t9[0] *= *(float64 *)(op2 + 40)
+# asm 1: mulsd 40(<op2=int64#3),<0t9=int6464#14
+# asm 2: mulsd 40(<op2=%rdx),<0t9=%xmm13
+mulsd 40(%rdx),%xmm13
+
+# qhasm: float6464 0r9[0] +=0t9[0]
+# asm 1: addsd <0t9=int6464#14,<0r9=int6464#11
+# asm 2: addsd <0t9=%xmm13,<0r9=%xmm10
+addsd %xmm13,%xmm10
+
+# qhasm: 0t10 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>0t10=int6464#14
+# asm 2: movdqa <0ab4=%xmm4,>0t10=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0t10[0] *= *(float64 *)(op2 + 48)
+# asm 1: mulsd 48(<op2=int64#3),<0t10=int6464#14
+# asm 2: mulsd 48(<op2=%rdx),<0t10=%xmm13
+mulsd 48(%rdx),%xmm13
+
+# qhasm: float6464 0r10[0] +=0t10[0]
+# asm 1: addsd <0t10=int6464#14,<0r10=int6464#12
+# asm 2: addsd <0t10=%xmm13,<0r10=%xmm11
+addsd %xmm13,%xmm11
+
+# qhasm: 0t13 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>0t13=int6464#14
+# asm 2: movdqa <0ab4=%xmm4,>0t13=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0t13[0] *= *(float64 *)(op2 + 72)
+# asm 1: mulsd 72(<op2=int64#3),<0t13=int6464#14
+# asm 2: mulsd 72(<op2=%rdx),<0t13=%xmm13
+mulsd 72(%rdx),%xmm13
+
+# qhasm: float6464 0r13[0] +=0t13[0]
+# asm 1: addsd <0t13=int6464#14,<0r13=int6464#3
+# asm 2: addsd <0t13=%xmm13,<0r13=%xmm2
+addsd %xmm13,%xmm2
+
+# qhasm: 0t14 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>0t14=int6464#14
+# asm 2: movdqa <0ab4=%xmm4,>0t14=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0t14[0] *= *(float64 *)(op2 + 80)
+# asm 1: mulsd 80(<op2=int64#3),<0t14=int6464#14
+# asm 2: mulsd 80(<op2=%rdx),<0t14=%xmm13
+mulsd 80(%rdx),%xmm13
+
+# qhasm: float6464 0r14[0] +=0t14[0]
+# asm 1: addsd <0t14=int6464#14,<0r14=int6464#4
+# asm 2: addsd <0t14=%xmm13,<0r14=%xmm3
+addsd %xmm13,%xmm3
+
+# qhasm: 0t15 = 0ab4
+# asm 1: movdqa <0ab4=int6464#5,>0t15=int6464#5
+# asm 2: movdqa <0ab4=%xmm4,>0t15=%xmm4
+movdqa %xmm4,%xmm4
+
+# qhasm: float6464 0t15[0] *= *(float64 *)(op2 + 88)
+# asm 1: mulsd 88(<op2=int64#3),<0t15=int6464#5
+# asm 2: mulsd 88(<op2=%rdx),<0t15=%xmm4
+mulsd 88(%rdx),%xmm4
+
+# qhasm: 0r15 =0t15
+# asm 1: movdqa <0t15=int6464#5,>0r15=int6464#5
+# asm 2: movdqa <0t15=%xmm4,>0r15=%xmm4
+movdqa %xmm4,%xmm4
+
+# qhasm: 0t5 = 0ab4six
+# asm 1: movdqa <0ab4six=int6464#13,>0t5=int6464#14
+# asm 2: movdqa <0ab4six=%xmm12,>0t5=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t5[0] *= *(float64 *)(op2 + 8)
+# asm 1: mulsd 8(<op2=int64#3),<0t5=int6464#14
+# asm 2: mulsd 8(<op2=%rdx),<0t5=%xmm13
+mulsd 8(%rdx),%xmm13
+
+# qhasm: float6464 0r5[0] +=0t5[0]
+# asm 1: addsd <0t5=int6464#14,<0r5=int6464#7
+# asm 2: addsd <0t5=%xmm13,<0r5=%xmm6
+addsd %xmm13,%xmm6
+
+# qhasm: 0t6 = 0ab4six
+# asm 1: movdqa <0ab4six=int6464#13,>0t6=int6464#14
+# asm 2: movdqa <0ab4six=%xmm12,>0t6=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t6[0] *= *(float64 *)(op2 + 16)
+# asm 1: mulsd 16(<op2=int64#3),<0t6=int6464#14
+# asm 2: mulsd 16(<op2=%rdx),<0t6=%xmm13
+mulsd 16(%rdx),%xmm13
+
+# qhasm: float6464 0r6[0] +=0t6[0]
+# asm 1: addsd <0t6=int6464#14,<0r6=int6464#8
+# asm 2: addsd <0t6=%xmm13,<0r6=%xmm7
+addsd %xmm13,%xmm7
+
+# qhasm: 0t11 = 0ab4six
+# asm 1: movdqa <0ab4six=int6464#13,>0t11=int6464#14
+# asm 2: movdqa <0ab4six=%xmm12,>0t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t11[0] *= *(float64 *)(op2 + 56)
+# asm 1: mulsd 56(<op2=int64#3),<0t11=int6464#14
+# asm 2: mulsd 56(<op2=%rdx),<0t11=%xmm13
+mulsd 56(%rdx),%xmm13
+
+# qhasm: float6464 0r11[0] +=0t11[0]
+# asm 1: addsd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addsd <0t11=%xmm13,<0r11=%xmm0
+addsd %xmm13,%xmm0
+
+# qhasm: 0t12 = 0ab4six
+# asm 1: movdqa <0ab4six=int6464#13,>0t12=int6464#13
+# asm 2: movdqa <0ab4six=%xmm12,>0t12=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 0t12[0] *= *(float64 *)(op2 + 64)
+# asm 1: mulsd 64(<op2=int64#3),<0t12=int6464#13
+# asm 2: mulsd 64(<op2=%rdx),<0t12=%xmm12
+mulsd 64(%rdx),%xmm12
+
+# qhasm: float6464 0r12[0] +=0t12[0]
+# asm 1: addsd <0t12=int6464#13,<0r12=int6464#2
+# asm 2: addsd <0t12=%xmm12,<0r12=%xmm1
+addsd %xmm12,%xmm1
+
+# qhasm: *(float64 *)(0mysp + 32) = 0r4[0]
+# asm 1: movlpd <0r4=int6464#6,32(<0mysp=int64#4)
+# asm 2: movlpd <0r4=%xmm5,32(<0mysp=%rcx)
+movlpd %xmm5,32(%rcx)
+
+# qhasm: 0ab5[0] = *(float64 *)(op1 + 40)
+# asm 1: movlpd 40(<op1=int64#2),>0ab5=int6464#6
+# asm 2: movlpd 40(<op1=%rsi),>0ab5=%xmm5
+movlpd 40(%rsi),%xmm5
+
+# qhasm: 0ab5six = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0ab5six=int6464#13
+# asm 2: movdqa <0ab5=%xmm5,>0ab5six=%xmm12
+movdqa %xmm5,%xmm12
+
+# qhasm: float6464 0ab5six[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0ab5six=int6464#13
+# asm 2: mulsd SIX_SIX,<0ab5six=%xmm12
+mulsd SIX_SIX,%xmm12
+
+# qhasm: 0t5 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t5=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>0t5=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0t5[0] *= *(float64 *)(op2 + 0)
+# asm 1: mulsd 0(<op2=int64#3),<0t5=int6464#14
+# asm 2: mulsd 0(<op2=%rdx),<0t5=%xmm13
+mulsd 0(%rdx),%xmm13
+
+# qhasm: float6464 0r5[0] +=0t5[0]
+# asm 1: addsd <0t5=int6464#14,<0r5=int6464#7
+# asm 2: addsd <0t5=%xmm13,<0r5=%xmm6
+addsd %xmm13,%xmm6
+
+# qhasm: 0t7 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t7=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>0t7=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0t7[0] *= *(float64 *)(op2 + 16)
+# asm 1: mulsd 16(<op2=int64#3),<0t7=int6464#14
+# asm 2: mulsd 16(<op2=%rdx),<0t7=%xmm13
+mulsd 16(%rdx),%xmm13
+
+# qhasm: float6464 0r7[0] +=0t7[0]
+# asm 1: addsd <0t7=int6464#14,<0r7=int6464#9
+# asm 2: addsd <0t7=%xmm13,<0r7=%xmm8
+addsd %xmm13,%xmm8
+
+# qhasm: 0t8 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t8=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>0t8=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0t8[0] *= *(float64 *)(op2 + 24)
+# asm 1: mulsd 24(<op2=int64#3),<0t8=int6464#14
+# asm 2: mulsd 24(<op2=%rdx),<0t8=%xmm13
+mulsd 24(%rdx),%xmm13
+
+# qhasm: float6464 0r8[0] +=0t8[0]
+# asm 1: addsd <0t8=int6464#14,<0r8=int6464#10
+# asm 2: addsd <0t8=%xmm13,<0r8=%xmm9
+addsd %xmm13,%xmm9
+
+# qhasm: 0t9 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t9=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>0t9=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0t9[0] *= *(float64 *)(op2 + 32)
+# asm 1: mulsd 32(<op2=int64#3),<0t9=int6464#14
+# asm 2: mulsd 32(<op2=%rdx),<0t9=%xmm13
+mulsd 32(%rdx),%xmm13
+
+# qhasm: float6464 0r9[0] +=0t9[0]
+# asm 1: addsd <0t9=int6464#14,<0r9=int6464#11
+# asm 2: addsd <0t9=%xmm13,<0r9=%xmm10
+addsd %xmm13,%xmm10
+
+# qhasm: 0t10 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t10=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>0t10=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0t10[0] *= *(float64 *)(op2 + 40)
+# asm 1: mulsd 40(<op2=int64#3),<0t10=int6464#14
+# asm 2: mulsd 40(<op2=%rdx),<0t10=%xmm13
+mulsd 40(%rdx),%xmm13
+
+# qhasm: float6464 0r10[0] +=0t10[0]
+# asm 1: addsd <0t10=int6464#14,<0r10=int6464#12
+# asm 2: addsd <0t10=%xmm13,<0r10=%xmm11
+addsd %xmm13,%xmm11
+
+# qhasm: 0t11 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t11=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>0t11=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0t11[0] *= *(float64 *)(op2 + 48)
+# asm 1: mulsd 48(<op2=int64#3),<0t11=int6464#14
+# asm 2: mulsd 48(<op2=%rdx),<0t11=%xmm13
+mulsd 48(%rdx),%xmm13
+
+# qhasm: float6464 0r11[0] +=0t11[0]
+# asm 1: addsd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addsd <0t11=%xmm13,<0r11=%xmm0
+addsd %xmm13,%xmm0
+
+# qhasm: 0t13 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t13=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>0t13=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0t13[0] *= *(float64 *)(op2 + 64)
+# asm 1: mulsd 64(<op2=int64#3),<0t13=int6464#14
+# asm 2: mulsd 64(<op2=%rdx),<0t13=%xmm13
+mulsd 64(%rdx),%xmm13
+
+# qhasm: float6464 0r13[0] +=0t13[0]
+# asm 1: addsd <0t13=int6464#14,<0r13=int6464#3
+# asm 2: addsd <0t13=%xmm13,<0r13=%xmm2
+addsd %xmm13,%xmm2
+
+# qhasm: 0t14 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t14=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>0t14=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0t14[0] *= *(float64 *)(op2 + 72)
+# asm 1: mulsd 72(<op2=int64#3),<0t14=int6464#14
+# asm 2: mulsd 72(<op2=%rdx),<0t14=%xmm13
+mulsd 72(%rdx),%xmm13
+
+# qhasm: float6464 0r14[0] +=0t14[0]
+# asm 1: addsd <0t14=int6464#14,<0r14=int6464#4
+# asm 2: addsd <0t14=%xmm13,<0r14=%xmm3
+addsd %xmm13,%xmm3
+
+# qhasm: 0t15 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t15=int6464#14
+# asm 2: movdqa <0ab5=%xmm5,>0t15=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0t15[0] *= *(float64 *)(op2 + 80)
+# asm 1: mulsd 80(<op2=int64#3),<0t15=int6464#14
+# asm 2: mulsd 80(<op2=%rdx),<0t15=%xmm13
+mulsd 80(%rdx),%xmm13
+
+# qhasm: float6464 0r15[0] +=0t15[0]
+# asm 1: addsd <0t15=int6464#14,<0r15=int6464#5
+# asm 2: addsd <0t15=%xmm13,<0r15=%xmm4
+addsd %xmm13,%xmm4
+
+# qhasm: 0t16 = 0ab5
+# asm 1: movdqa <0ab5=int6464#6,>0t16=int6464#6
+# asm 2: movdqa <0ab5=%xmm5,>0t16=%xmm5
+movdqa %xmm5,%xmm5
+
+# qhasm: float6464 0t16[0] *= *(float64 *)(op2 + 88)
+# asm 1: mulsd 88(<op2=int64#3),<0t16=int6464#6
+# asm 2: mulsd 88(<op2=%rdx),<0t16=%xmm5
+mulsd 88(%rdx),%xmm5
+
+# qhasm: 0r16 =0t16
+# asm 1: movdqa <0t16=int6464#6,>0r16=int6464#6
+# asm 2: movdqa <0t16=%xmm5,>0r16=%xmm5
+movdqa %xmm5,%xmm5
+
+# qhasm: 0t6 = 0ab5six
+# asm 1: movdqa <0ab5six=int6464#13,>0t6=int6464#14
+# asm 2: movdqa <0ab5six=%xmm12,>0t6=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t6[0] *= *(float64 *)(op2 + 8)
+# asm 1: mulsd 8(<op2=int64#3),<0t6=int6464#14
+# asm 2: mulsd 8(<op2=%rdx),<0t6=%xmm13
+mulsd 8(%rdx),%xmm13
+
+# qhasm: float6464 0r6[0] +=0t6[0]
+# asm 1: addsd <0t6=int6464#14,<0r6=int6464#8
+# asm 2: addsd <0t6=%xmm13,<0r6=%xmm7
+addsd %xmm13,%xmm7
+
+# qhasm: 0t12 = 0ab5six
+# asm 1: movdqa <0ab5six=int6464#13,>0t12=int6464#13
+# asm 2: movdqa <0ab5six=%xmm12,>0t12=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 0t12[0] *= *(float64 *)(op2 + 56)
+# asm 1: mulsd 56(<op2=int64#3),<0t12=int6464#13
+# asm 2: mulsd 56(<op2=%rdx),<0t12=%xmm12
+mulsd 56(%rdx),%xmm12
+
+# qhasm: float6464 0r12[0] +=0t12[0]
+# asm 1: addsd <0t12=int6464#13,<0r12=int6464#2
+# asm 2: addsd <0t12=%xmm12,<0r12=%xmm1
+addsd %xmm12,%xmm1
+
+# qhasm: *(float64 *)(0mysp + 40) = 0r5[0]
+# asm 1: movlpd <0r5=int6464#7,40(<0mysp=int64#4)
+# asm 2: movlpd <0r5=%xmm6,40(<0mysp=%rcx)
+movlpd %xmm6,40(%rcx)
+
+# qhasm: 0ab6[0] = *(float64 *)(op1 + 48)
+# asm 1: movlpd 48(<op1=int64#2),>0ab6=int6464#7
+# asm 2: movlpd 48(<op1=%rsi),>0ab6=%xmm6
+movlpd 48(%rsi),%xmm6
+
+# qhasm: 0t6 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t6=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t6=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t6[0] *= *(float64 *)(op2 + 0)
+# asm 1: mulsd 0(<op2=int64#3),<0t6=int6464#13
+# asm 2: mulsd 0(<op2=%rdx),<0t6=%xmm12
+mulsd 0(%rdx),%xmm12
+
+# qhasm: float6464 0r6[0] +=0t6[0]
+# asm 1: addsd <0t6=int6464#13,<0r6=int6464#8
+# asm 2: addsd <0t6=%xmm12,<0r6=%xmm7
+addsd %xmm12,%xmm7
+
+# qhasm: 0t7 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t7=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t7=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t7[0] *= *(float64 *)(op2 + 8)
+# asm 1: mulsd 8(<op2=int64#3),<0t7=int6464#13
+# asm 2: mulsd 8(<op2=%rdx),<0t7=%xmm12
+mulsd 8(%rdx),%xmm12
+
+# qhasm: float6464 0r7[0] +=0t7[0]
+# asm 1: addsd <0t7=int6464#13,<0r7=int6464#9
+# asm 2: addsd <0t7=%xmm12,<0r7=%xmm8
+addsd %xmm12,%xmm8
+
+# qhasm: 0t8 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t8=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t8=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t8[0] *= *(float64 *)(op2 + 16)
+# asm 1: mulsd 16(<op2=int64#3),<0t8=int6464#13
+# asm 2: mulsd 16(<op2=%rdx),<0t8=%xmm12
+mulsd 16(%rdx),%xmm12
+
+# qhasm: float6464 0r8[0] +=0t8[0]
+# asm 1: addsd <0t8=int6464#13,<0r8=int6464#10
+# asm 2: addsd <0t8=%xmm12,<0r8=%xmm9
+addsd %xmm12,%xmm9
+
+# qhasm: 0t9 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t9=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t9=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t9[0] *= *(float64 *)(op2 + 24)
+# asm 1: mulsd 24(<op2=int64#3),<0t9=int6464#13
+# asm 2: mulsd 24(<op2=%rdx),<0t9=%xmm12
+mulsd 24(%rdx),%xmm12
+
+# qhasm: float6464 0r9[0] +=0t9[0]
+# asm 1: addsd <0t9=int6464#13,<0r9=int6464#11
+# asm 2: addsd <0t9=%xmm12,<0r9=%xmm10
+addsd %xmm12,%xmm10
+
+# qhasm: 0t10 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t10=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t10=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t10[0] *= *(float64 *)(op2 + 32)
+# asm 1: mulsd 32(<op2=int64#3),<0t10=int6464#13
+# asm 2: mulsd 32(<op2=%rdx),<0t10=%xmm12
+mulsd 32(%rdx),%xmm12
+
+# qhasm: float6464 0r10[0] +=0t10[0]
+# asm 1: addsd <0t10=int6464#13,<0r10=int6464#12
+# asm 2: addsd <0t10=%xmm12,<0r10=%xmm11
+addsd %xmm12,%xmm11
+
+# qhasm: 0t11 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t11=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t11=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t11[0] *= *(float64 *)(op2 + 40)
+# asm 1: mulsd 40(<op2=int64#3),<0t11=int6464#13
+# asm 2: mulsd 40(<op2=%rdx),<0t11=%xmm12
+mulsd 40(%rdx),%xmm12
+
+# qhasm: float6464 0r11[0] +=0t11[0]
+# asm 1: addsd <0t11=int6464#13,<0r11=int6464#1
+# asm 2: addsd <0t11=%xmm12,<0r11=%xmm0
+addsd %xmm12,%xmm0
+
+# qhasm: 0t12 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t12=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t12[0] *= *(float64 *)(op2 + 48)
+# asm 1: mulsd 48(<op2=int64#3),<0t12=int6464#13
+# asm 2: mulsd 48(<op2=%rdx),<0t12=%xmm12
+mulsd 48(%rdx),%xmm12
+
+# qhasm: float6464 0r12[0] +=0t12[0]
+# asm 1: addsd <0t12=int6464#13,<0r12=int6464#2
+# asm 2: addsd <0t12=%xmm12,<0r12=%xmm1
+addsd %xmm12,%xmm1
+
+# qhasm: 0t13 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t13=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t13=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t13[0] *= *(float64 *)(op2 + 56)
+# asm 1: mulsd 56(<op2=int64#3),<0t13=int6464#13
+# asm 2: mulsd 56(<op2=%rdx),<0t13=%xmm12
+mulsd 56(%rdx),%xmm12
+
+# qhasm: float6464 0r13[0] +=0t13[0]
+# asm 1: addsd <0t13=int6464#13,<0r13=int6464#3
+# asm 2: addsd <0t13=%xmm12,<0r13=%xmm2
+addsd %xmm12,%xmm2
+
+# qhasm: 0t14 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t14=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t14=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t14[0] *= *(float64 *)(op2 + 64)
+# asm 1: mulsd 64(<op2=int64#3),<0t14=int6464#13
+# asm 2: mulsd 64(<op2=%rdx),<0t14=%xmm12
+mulsd 64(%rdx),%xmm12
+
+# qhasm: float6464 0r14[0] +=0t14[0]
+# asm 1: addsd <0t14=int6464#13,<0r14=int6464#4
+# asm 2: addsd <0t14=%xmm12,<0r14=%xmm3
+addsd %xmm12,%xmm3
+
+# qhasm: 0t15 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t15=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t15=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t15[0] *= *(float64 *)(op2 + 72)
+# asm 1: mulsd 72(<op2=int64#3),<0t15=int6464#13
+# asm 2: mulsd 72(<op2=%rdx),<0t15=%xmm12
+mulsd 72(%rdx),%xmm12
+
+# qhasm: float6464 0r15[0] +=0t15[0]
+# asm 1: addsd <0t15=int6464#13,<0r15=int6464#5
+# asm 2: addsd <0t15=%xmm12,<0r15=%xmm4
+addsd %xmm12,%xmm4
+
+# qhasm: 0t16 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t16=int6464#13
+# asm 2: movdqa <0ab6=%xmm6,>0t16=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm: float6464 0t16[0] *= *(float64 *)(op2 + 80)
+# asm 1: mulsd 80(<op2=int64#3),<0t16=int6464#13
+# asm 2: mulsd 80(<op2=%rdx),<0t16=%xmm12
+mulsd 80(%rdx),%xmm12
+
+# qhasm: float6464 0r16[0] +=0t16[0]
+# asm 1: addsd <0t16=int6464#13,<0r16=int6464#6
+# asm 2: addsd <0t16=%xmm12,<0r16=%xmm5
+addsd %xmm12,%xmm5
+
+# qhasm: 0t17 = 0ab6
+# asm 1: movdqa <0ab6=int6464#7,>0t17=int6464#7
+# asm 2: movdqa <0ab6=%xmm6,>0t17=%xmm6
+movdqa %xmm6,%xmm6
+
+# qhasm: float6464 0t17[0] *= *(float64 *)(op2 + 88)
+# asm 1: mulsd 88(<op2=int64#3),<0t17=int6464#7
+# asm 2: mulsd 88(<op2=%rdx),<0t17=%xmm6
+mulsd 88(%rdx),%xmm6
+
+# qhasm: 0r17 =0t17
+# asm 1: movdqa <0t17=int6464#7,>0r17=int6464#7
+# asm 2: movdqa <0t17=%xmm6,>0r17=%xmm6
+movdqa %xmm6,%xmm6
+
+# qhasm: *(float64 *)(0mysp + 48) = 0r6[0]
+# asm 1: movlpd <0r6=int6464#8,48(<0mysp=int64#4)
+# asm 2: movlpd <0r6=%xmm7,48(<0mysp=%rcx)
+movlpd %xmm7,48(%rcx)
+
+# qhasm: 0ab7[0] = *(float64 *)(op1 + 56)
+# asm 1: movlpd 56(<op1=int64#2),>0ab7=int6464#8
+# asm 2: movlpd 56(<op1=%rsi),>0ab7=%xmm7
+movlpd 56(%rsi),%xmm7
+
+# qhasm: 0ab7six = 0ab7
+# asm 1: movdqa <0ab7=int6464#8,>0ab7six=int6464#13
+# asm 2: movdqa <0ab7=%xmm7,>0ab7six=%xmm12
+movdqa %xmm7,%xmm12
+
+# qhasm: float6464 0ab7six[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0ab7six=int6464#13
+# asm 2: mulsd SIX_SIX,<0ab7six=%xmm12
+mulsd SIX_SIX,%xmm12
+
+# qhasm: 0t7 = 0ab7
+# asm 1: movdqa <0ab7=int6464#8,>0t7=int6464#14
+# asm 2: movdqa <0ab7=%xmm7,>0t7=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 0t7[0] *= *(float64 *)(op2 + 0)
+# asm 1: mulsd 0(<op2=int64#3),<0t7=int6464#14
+# asm 2: mulsd 0(<op2=%rdx),<0t7=%xmm13
+mulsd 0(%rdx),%xmm13
+
+# qhasm: float6464 0r7[0] +=0t7[0]
+# asm 1: addsd <0t7=int6464#14,<0r7=int6464#9
+# asm 2: addsd <0t7=%xmm13,<0r7=%xmm8
+addsd %xmm13,%xmm8
+
+# qhasm: 0t13 = 0ab7
+# asm 1: movdqa <0ab7=int6464#8,>0t13=int6464#8
+# asm 2: movdqa <0ab7=%xmm7,>0t13=%xmm7
+movdqa %xmm7,%xmm7
+
+# qhasm: float6464 0t13[0] *= *(float64 *)(op2 + 48)
+# asm 1: mulsd 48(<op2=int64#3),<0t13=int6464#8
+# asm 2: mulsd 48(<op2=%rdx),<0t13=%xmm7
+mulsd 48(%rdx),%xmm7
+
+# qhasm: float6464 0r13[0] +=0t13[0]
+# asm 1: addsd <0t13=int6464#8,<0r13=int6464#3
+# asm 2: addsd <0t13=%xmm7,<0r13=%xmm2
+addsd %xmm7,%xmm2
+
+# qhasm: 0t8 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>0t8=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>0t8=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 0t8[0] *= *(float64 *)(op2 + 8)
+# asm 1: mulsd 8(<op2=int64#3),<0t8=int6464#8
+# asm 2: mulsd 8(<op2=%rdx),<0t8=%xmm7
+mulsd 8(%rdx),%xmm7
+
+# qhasm: float6464 0r8[0] +=0t8[0]
+# asm 1: addsd <0t8=int6464#8,<0r8=int6464#10
+# asm 2: addsd <0t8=%xmm7,<0r8=%xmm9
+addsd %xmm7,%xmm9
+
+# qhasm: 0t9 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>0t9=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>0t9=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 0t9[0] *= *(float64 *)(op2 + 16)
+# asm 1: mulsd 16(<op2=int64#3),<0t9=int6464#8
+# asm 2: mulsd 16(<op2=%rdx),<0t9=%xmm7
+mulsd 16(%rdx),%xmm7
+
+# qhasm: float6464 0r9[0] +=0t9[0]
+# asm 1: addsd <0t9=int6464#8,<0r9=int6464#11
+# asm 2: addsd <0t9=%xmm7,<0r9=%xmm10
+addsd %xmm7,%xmm10
+
+# qhasm: 0t10 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>0t10=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>0t10=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 0t10[0] *= *(float64 *)(op2 + 24)
+# asm 1: mulsd 24(<op2=int64#3),<0t10=int6464#8
+# asm 2: mulsd 24(<op2=%rdx),<0t10=%xmm7
+mulsd 24(%rdx),%xmm7
+
+# qhasm: float6464 0r10[0] +=0t10[0]
+# asm 1: addsd <0t10=int6464#8,<0r10=int6464#12
+# asm 2: addsd <0t10=%xmm7,<0r10=%xmm11
+addsd %xmm7,%xmm11
+
+# qhasm: 0t11 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>0t11=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>0t11=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 0t11[0] *= *(float64 *)(op2 + 32)
+# asm 1: mulsd 32(<op2=int64#3),<0t11=int6464#8
+# asm 2: mulsd 32(<op2=%rdx),<0t11=%xmm7
+mulsd 32(%rdx),%xmm7
+
+# qhasm: float6464 0r11[0] +=0t11[0]
+# asm 1: addsd <0t11=int6464#8,<0r11=int6464#1
+# asm 2: addsd <0t11=%xmm7,<0r11=%xmm0
+addsd %xmm7,%xmm0
+
+# qhasm: 0t12 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>0t12=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>0t12=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 0t12[0] *= *(float64 *)(op2 + 40)
+# asm 1: mulsd 40(<op2=int64#3),<0t12=int6464#8
+# asm 2: mulsd 40(<op2=%rdx),<0t12=%xmm7
+mulsd 40(%rdx),%xmm7
+
+# qhasm: float6464 0r12[0] +=0t12[0]
+# asm 1: addsd <0t12=int6464#8,<0r12=int6464#2
+# asm 2: addsd <0t12=%xmm7,<0r12=%xmm1
+addsd %xmm7,%xmm1
+
+# qhasm: 0t14 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>0t14=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>0t14=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 0t14[0] *= *(float64 *)(op2 + 56)
+# asm 1: mulsd 56(<op2=int64#3),<0t14=int6464#8
+# asm 2: mulsd 56(<op2=%rdx),<0t14=%xmm7
+mulsd 56(%rdx),%xmm7
+
+# qhasm: float6464 0r14[0] +=0t14[0]
+# asm 1: addsd <0t14=int6464#8,<0r14=int6464#4
+# asm 2: addsd <0t14=%xmm7,<0r14=%xmm3
+addsd %xmm7,%xmm3
+
+# qhasm: 0t15 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>0t15=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>0t15=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 0t15[0] *= *(float64 *)(op2 + 64)
+# asm 1: mulsd 64(<op2=int64#3),<0t15=int6464#8
+# asm 2: mulsd 64(<op2=%rdx),<0t15=%xmm7
+mulsd 64(%rdx),%xmm7
+
+# qhasm: float6464 0r15[0] +=0t15[0]
+# asm 1: addsd <0t15=int6464#8,<0r15=int6464#5
+# asm 2: addsd <0t15=%xmm7,<0r15=%xmm4
+addsd %xmm7,%xmm4
+
+# qhasm: 0t16 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>0t16=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>0t16=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 0t16[0] *= *(float64 *)(op2 + 72)
+# asm 1: mulsd 72(<op2=int64#3),<0t16=int6464#8
+# asm 2: mulsd 72(<op2=%rdx),<0t16=%xmm7
+mulsd 72(%rdx),%xmm7
+
+# qhasm: float6464 0r16[0] +=0t16[0]
+# asm 1: addsd <0t16=int6464#8,<0r16=int6464#6
+# asm 2: addsd <0t16=%xmm7,<0r16=%xmm5
+addsd %xmm7,%xmm5
+
+# qhasm: 0t17 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>0t17=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>0t17=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 0t17[0] *= *(float64 *)(op2 + 80)
+# asm 1: mulsd 80(<op2=int64#3),<0t17=int6464#8
+# asm 2: mulsd 80(<op2=%rdx),<0t17=%xmm7
+mulsd 80(%rdx),%xmm7
+
+# qhasm: float6464 0r17[0] +=0t17[0]
+# asm 1: addsd <0t17=int6464#8,<0r17=int6464#7
+# asm 2: addsd <0t17=%xmm7,<0r17=%xmm6
+addsd %xmm7,%xmm6
+
+# qhasm: 0t18 = 0ab7six
+# asm 1: movdqa <0ab7six=int6464#13,>0t18=int6464#8
+# asm 2: movdqa <0ab7six=%xmm12,>0t18=%xmm7
+movdqa %xmm12,%xmm7
+
+# qhasm: float6464 0t18[0] *= *(float64 *)(op2 + 88)
+# asm 1: mulsd 88(<op2=int64#3),<0t18=int6464#8
+# asm 2: mulsd 88(<op2=%rdx),<0t18=%xmm7
+mulsd 88(%rdx),%xmm7
+
+# qhasm: 0r18 =0t18
+# asm 1: movdqa <0t18=int6464#8,>0r18=int6464#8
+# asm 2: movdqa <0t18=%xmm7,>0r18=%xmm7
+movdqa %xmm7,%xmm7
+
+# qhasm: *(float64 *)(0mysp + 56) = 0r7[0]
+# asm 1: movlpd <0r7=int6464#9,56(<0mysp=int64#4)
+# asm 2: movlpd <0r7=%xmm8,56(<0mysp=%rcx)
+movlpd %xmm8,56(%rcx)
+
+# qhasm: 0ab8[0] = *(float64 *)(op1 + 64)
+# asm 1: movlpd 64(<op1=int64#2),>0ab8=int6464#9
+# asm 2: movlpd 64(<op1=%rsi),>0ab8=%xmm8
+movlpd 64(%rsi),%xmm8
+
+# qhasm: 0ab8six = 0ab8
+# asm 1: movdqa <0ab8=int6464#9,>0ab8six=int6464#13
+# asm 2: movdqa <0ab8=%xmm8,>0ab8six=%xmm12
+movdqa %xmm8,%xmm12
+
+# qhasm: float6464 0ab8six[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0ab8six=int6464#13
+# asm 2: mulsd SIX_SIX,<0ab8six=%xmm12
+mulsd SIX_SIX,%xmm12
+
+# qhasm: 0t8 = 0ab8
+# asm 1: movdqa <0ab8=int6464#9,>0t8=int6464#14
+# asm 2: movdqa <0ab8=%xmm8,>0t8=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm: float6464 0t8[0] *= *(float64 *)(op2 + 0)
+# asm 1: mulsd 0(<op2=int64#3),<0t8=int6464#14
+# asm 2: mulsd 0(<op2=%rdx),<0t8=%xmm13
+mulsd 0(%rdx),%xmm13
+
+# qhasm: float6464 0r8[0] +=0t8[0]
+# asm 1: addsd <0t8=int6464#14,<0r8=int6464#10
+# asm 2: addsd <0t8=%xmm13,<0r8=%xmm9
+addsd %xmm13,%xmm9
+
+# qhasm: 0t13 = 0ab8
+# asm 1: movdqa <0ab8=int6464#9,>0t13=int6464#14
+# asm 2: movdqa <0ab8=%xmm8,>0t13=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm: float6464 0t13[0] *= *(float64 *)(op2 + 40)
+# asm 1: mulsd 40(<op2=int64#3),<0t13=int6464#14
+# asm 2: mulsd 40(<op2=%rdx),<0t13=%xmm13
+mulsd 40(%rdx),%xmm13
+
+# qhasm: float6464 0r13[0] +=0t13[0]
+# asm 1: addsd <0t13=int6464#14,<0r13=int6464#3
+# asm 2: addsd <0t13=%xmm13,<0r13=%xmm2
+addsd %xmm13,%xmm2
+
+# qhasm: 0t14 = 0ab8
+# asm 1: movdqa <0ab8=int6464#9,>0t14=int6464#14
+# asm 2: movdqa <0ab8=%xmm8,>0t14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm: float6464 0t14[0] *= *(float64 *)(op2 + 48)
+# asm 1: mulsd 48(<op2=int64#3),<0t14=int6464#14
+# asm 2: mulsd 48(<op2=%rdx),<0t14=%xmm13
+mulsd 48(%rdx),%xmm13
+
+# qhasm: float6464 0r14[0] +=0t14[0]
+# asm 1: addsd <0t14=int6464#14,<0r14=int6464#4
+# asm 2: addsd <0t14=%xmm13,<0r14=%xmm3
+addsd %xmm13,%xmm3
+
+# qhasm: 0t19 = 0ab8
+# asm 1: movdqa <0ab8=int6464#9,>0t19=int6464#9
+# asm 2: movdqa <0ab8=%xmm8,>0t19=%xmm8
+movdqa %xmm8,%xmm8
+
+# qhasm: float6464 0t19[0] *= *(float64 *)(op2 + 88)
+# asm 1: mulsd 88(<op2=int64#3),<0t19=int6464#9
+# asm 2: mulsd 88(<op2=%rdx),<0t19=%xmm8
+mulsd 88(%rdx),%xmm8
+
+# qhasm: 0r19 =0t19
+# asm 1: movdqa <0t19=int6464#9,>0r19=int6464#9
+# asm 2: movdqa <0t19=%xmm8,>0r19=%xmm8
+movdqa %xmm8,%xmm8
+
+# qhasm: 0t9 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>0t9=int6464#14
+# asm 2: movdqa <0ab8six=%xmm12,>0t9=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t9[0] *= *(float64 *)(op2 + 8)
+# asm 1: mulsd 8(<op2=int64#3),<0t9=int6464#14
+# asm 2: mulsd 8(<op2=%rdx),<0t9=%xmm13
+mulsd 8(%rdx),%xmm13
+
+# qhasm: float6464 0r9[0] +=0t9[0]
+# asm 1: addsd <0t9=int6464#14,<0r9=int6464#11
+# asm 2: addsd <0t9=%xmm13,<0r9=%xmm10
+addsd %xmm13,%xmm10
+
+# qhasm: 0t10 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>0t10=int6464#14
+# asm 2: movdqa <0ab8six=%xmm12,>0t10=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t10[0] *= *(float64 *)(op2 + 16)
+# asm 1: mulsd 16(<op2=int64#3),<0t10=int6464#14
+# asm 2: mulsd 16(<op2=%rdx),<0t10=%xmm13
+mulsd 16(%rdx),%xmm13
+
+# qhasm: float6464 0r10[0] +=0t10[0]
+# asm 1: addsd <0t10=int6464#14,<0r10=int6464#12
+# asm 2: addsd <0t10=%xmm13,<0r10=%xmm11
+addsd %xmm13,%xmm11
+
+# qhasm: 0t11 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>0t11=int6464#14
+# asm 2: movdqa <0ab8six=%xmm12,>0t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t11[0] *= *(float64 *)(op2 + 24)
+# asm 1: mulsd 24(<op2=int64#3),<0t11=int6464#14
+# asm 2: mulsd 24(<op2=%rdx),<0t11=%xmm13
+mulsd 24(%rdx),%xmm13
+
+# qhasm: float6464 0r11[0] +=0t11[0]
+# asm 1: addsd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addsd <0t11=%xmm13,<0r11=%xmm0
+addsd %xmm13,%xmm0
+
+# qhasm: 0t12 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>0t12=int6464#14
+# asm 2: movdqa <0ab8six=%xmm12,>0t12=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t12[0] *= *(float64 *)(op2 + 32)
+# asm 1: mulsd 32(<op2=int64#3),<0t12=int6464#14
+# asm 2: mulsd 32(<op2=%rdx),<0t12=%xmm13
+mulsd 32(%rdx),%xmm13
+
+# qhasm: float6464 0r12[0] +=0t12[0]
+# asm 1: addsd <0t12=int6464#14,<0r12=int6464#2
+# asm 2: addsd <0t12=%xmm13,<0r12=%xmm1
+addsd %xmm13,%xmm1
+
+# qhasm: 0t15 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>0t15=int6464#14
+# asm 2: movdqa <0ab8six=%xmm12,>0t15=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t15[0] *= *(float64 *)(op2 + 56)
+# asm 1: mulsd 56(<op2=int64#3),<0t15=int6464#14
+# asm 2: mulsd 56(<op2=%rdx),<0t15=%xmm13
+mulsd 56(%rdx),%xmm13
+
+# qhasm: float6464 0r15[0] +=0t15[0]
+# asm 1: addsd <0t15=int6464#14,<0r15=int6464#5
+# asm 2: addsd <0t15=%xmm13,<0r15=%xmm4
+addsd %xmm13,%xmm4
+
+# qhasm: 0t16 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>0t16=int6464#14
+# asm 2: movdqa <0ab8six=%xmm12,>0t16=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t16[0] *= *(float64 *)(op2 + 64)
+# asm 1: mulsd 64(<op2=int64#3),<0t16=int6464#14
+# asm 2: mulsd 64(<op2=%rdx),<0t16=%xmm13
+mulsd 64(%rdx),%xmm13
+
+# qhasm: float6464 0r16[0] +=0t16[0]
+# asm 1: addsd <0t16=int6464#14,<0r16=int6464#6
+# asm 2: addsd <0t16=%xmm13,<0r16=%xmm5
+addsd %xmm13,%xmm5
+
+# qhasm: 0t17 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>0t17=int6464#14
+# asm 2: movdqa <0ab8six=%xmm12,>0t17=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t17[0] *= *(float64 *)(op2 + 72)
+# asm 1: mulsd 72(<op2=int64#3),<0t17=int6464#14
+# asm 2: mulsd 72(<op2=%rdx),<0t17=%xmm13
+mulsd 72(%rdx),%xmm13
+
+# qhasm: float6464 0r17[0] +=0t17[0]
+# asm 1: addsd <0t17=int6464#14,<0r17=int6464#7
+# asm 2: addsd <0t17=%xmm13,<0r17=%xmm6
+addsd %xmm13,%xmm6
+
+# qhasm: 0t18 = 0ab8six
+# asm 1: movdqa <0ab8six=int6464#13,>0t18=int6464#13
+# asm 2: movdqa <0ab8six=%xmm12,>0t18=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 0t18[0] *= *(float64 *)(op2 + 80)
+# asm 1: mulsd 80(<op2=int64#3),<0t18=int6464#13
+# asm 2: mulsd 80(<op2=%rdx),<0t18=%xmm12
+mulsd 80(%rdx),%xmm12
+
+# qhasm: float6464 0r18[0] +=0t18[0]
+# asm 1: addsd <0t18=int6464#13,<0r18=int6464#8
+# asm 2: addsd <0t18=%xmm12,<0r18=%xmm7
+addsd %xmm12,%xmm7
+
+# qhasm: *(float64 *)(0mysp + 64) = 0r8[0]
+# asm 1: movlpd <0r8=int6464#10,64(<0mysp=int64#4)
+# asm 2: movlpd <0r8=%xmm9,64(<0mysp=%rcx)
+movlpd %xmm9,64(%rcx)
+
+# qhasm: 0ab9[0] = *(float64 *)(op1 + 72)
+# asm 1: movlpd 72(<op1=int64#2),>0ab9=int6464#10
+# asm 2: movlpd 72(<op1=%rsi),>0ab9=%xmm9
+movlpd 72(%rsi),%xmm9
+
+# qhasm: 0ab9six = 0ab9
+# asm 1: movdqa <0ab9=int6464#10,>0ab9six=int6464#13
+# asm 2: movdqa <0ab9=%xmm9,>0ab9six=%xmm12
+movdqa %xmm9,%xmm12
+
+# qhasm: float6464 0ab9six[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0ab9six=int6464#13
+# asm 2: mulsd SIX_SIX,<0ab9six=%xmm12
+mulsd SIX_SIX,%xmm12
+
+# qhasm: 0t9 = 0ab9
+# asm 1: movdqa <0ab9=int6464#10,>0t9=int6464#14
+# asm 2: movdqa <0ab9=%xmm9,>0t9=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 0t9[0] *= *(float64 *)(op2 + 0)
+# asm 1: mulsd 0(<op2=int64#3),<0t9=int6464#14
+# asm 2: mulsd 0(<op2=%rdx),<0t9=%xmm13
+mulsd 0(%rdx),%xmm13
+
+# qhasm: float6464 0r9[0] +=0t9[0]
+# asm 1: addsd <0t9=int6464#14,<0r9=int6464#11
+# asm 2: addsd <0t9=%xmm13,<0r9=%xmm10
+addsd %xmm13,%xmm10
+
+# qhasm: 0t13 = 0ab9
+# asm 1: movdqa <0ab9=int6464#10,>0t13=int6464#14
+# asm 2: movdqa <0ab9=%xmm9,>0t13=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 0t13[0] *= *(float64 *)(op2 + 32)
+# asm 1: mulsd 32(<op2=int64#3),<0t13=int6464#14
+# asm 2: mulsd 32(<op2=%rdx),<0t13=%xmm13
+mulsd 32(%rdx),%xmm13
+
+# qhasm: float6464 0r13[0] +=0t13[0]
+# asm 1: addsd <0t13=int6464#14,<0r13=int6464#3
+# asm 2: addsd <0t13=%xmm13,<0r13=%xmm2
+addsd %xmm13,%xmm2
+
+# qhasm: 0t14 = 0ab9
+# asm 1: movdqa <0ab9=int6464#10,>0t14=int6464#14
+# asm 2: movdqa <0ab9=%xmm9,>0t14=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 0t14[0] *= *(float64 *)(op2 + 40)
+# asm 1: mulsd 40(<op2=int64#3),<0t14=int6464#14
+# asm 2: mulsd 40(<op2=%rdx),<0t14=%xmm13
+mulsd 40(%rdx),%xmm13
+
+# qhasm: float6464 0r14[0] +=0t14[0]
+# asm 1: addsd <0t14=int6464#14,<0r14=int6464#4
+# asm 2: addsd <0t14=%xmm13,<0r14=%xmm3
+addsd %xmm13,%xmm3
+
+# qhasm: 0t15 = 0ab9
+# asm 1: movdqa <0ab9=int6464#10,>0t15=int6464#14
+# asm 2: movdqa <0ab9=%xmm9,>0t15=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 0t15[0] *= *(float64 *)(op2 + 48)
+# asm 1: mulsd 48(<op2=int64#3),<0t15=int6464#14
+# asm 2: mulsd 48(<op2=%rdx),<0t15=%xmm13
+mulsd 48(%rdx),%xmm13
+
+# qhasm: float6464 0r15[0] +=0t15[0]
+# asm 1: addsd <0t15=int6464#14,<0r15=int6464#5
+# asm 2: addsd <0t15=%xmm13,<0r15=%xmm4
+addsd %xmm13,%xmm4
+
+# qhasm: 0t19 = 0ab9
+# asm 1: movdqa <0ab9=int6464#10,>0t19=int6464#14
+# asm 2: movdqa <0ab9=%xmm9,>0t19=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 0t19[0] *= *(float64 *)(op2 + 80)
+# asm 1: mulsd 80(<op2=int64#3),<0t19=int6464#14
+# asm 2: mulsd 80(<op2=%rdx),<0t19=%xmm13
+mulsd 80(%rdx),%xmm13
+
+# qhasm: float6464 0r19[0] +=0t19[0]
+# asm 1: addsd <0t19=int6464#14,<0r19=int6464#9
+# asm 2: addsd <0t19=%xmm13,<0r19=%xmm8
+addsd %xmm13,%xmm8
+
+# qhasm: 0t20 = 0ab9
+# asm 1: movdqa <0ab9=int6464#10,>0t20=int6464#10
+# asm 2: movdqa <0ab9=%xmm9,>0t20=%xmm9
+movdqa %xmm9,%xmm9
+
+# qhasm: float6464 0t20[0] *= *(float64 *)(op2 + 88)
+# asm 1: mulsd 88(<op2=int64#3),<0t20=int6464#10
+# asm 2: mulsd 88(<op2=%rdx),<0t20=%xmm9
+mulsd 88(%rdx),%xmm9
+
+# qhasm: 0r20 =0t20
+# asm 1: movdqa <0t20=int6464#10,>0r20=int6464#10
+# asm 2: movdqa <0t20=%xmm9,>0r20=%xmm9
+movdqa %xmm9,%xmm9
+
+# qhasm: 0t10 = 0ab9six
+# asm 1: movdqa <0ab9six=int6464#13,>0t10=int6464#14
+# asm 2: movdqa <0ab9six=%xmm12,>0t10=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t10[0] *= *(float64 *)(op2 + 8)
+# asm 1: mulsd 8(<op2=int64#3),<0t10=int6464#14
+# asm 2: mulsd 8(<op2=%rdx),<0t10=%xmm13
+mulsd 8(%rdx),%xmm13
+
+# qhasm: float6464 0r10[0] +=0t10[0]
+# asm 1: addsd <0t10=int6464#14,<0r10=int6464#12
+# asm 2: addsd <0t10=%xmm13,<0r10=%xmm11
+addsd %xmm13,%xmm11
+
+# qhasm: 0t11 = 0ab9six
+# asm 1: movdqa <0ab9six=int6464#13,>0t11=int6464#14
+# asm 2: movdqa <0ab9six=%xmm12,>0t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t11[0] *= *(float64 *)(op2 + 16)
+# asm 1: mulsd 16(<op2=int64#3),<0t11=int6464#14
+# asm 2: mulsd 16(<op2=%rdx),<0t11=%xmm13
+mulsd 16(%rdx),%xmm13
+
+# qhasm: float6464 0r11[0] +=0t11[0]
+# asm 1: addsd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addsd <0t11=%xmm13,<0r11=%xmm0
+addsd %xmm13,%xmm0
+
+# qhasm: 0t12 = 0ab9six
+# asm 1: movdqa <0ab9six=int6464#13,>0t12=int6464#14
+# asm 2: movdqa <0ab9six=%xmm12,>0t12=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t12[0] *= *(float64 *)(op2 + 24)
+# asm 1: mulsd 24(<op2=int64#3),<0t12=int6464#14
+# asm 2: mulsd 24(<op2=%rdx),<0t12=%xmm13
+mulsd 24(%rdx),%xmm13
+
+# qhasm: float6464 0r12[0] +=0t12[0]
+# asm 1: addsd <0t12=int6464#14,<0r12=int6464#2
+# asm 2: addsd <0t12=%xmm13,<0r12=%xmm1
+addsd %xmm13,%xmm1
+
+# qhasm: 0t16 = 0ab9six
+# asm 1: movdqa <0ab9six=int6464#13,>0t16=int6464#14
+# asm 2: movdqa <0ab9six=%xmm12,>0t16=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t16[0] *= *(float64 *)(op2 + 56)
+# asm 1: mulsd 56(<op2=int64#3),<0t16=int6464#14
+# asm 2: mulsd 56(<op2=%rdx),<0t16=%xmm13
+mulsd 56(%rdx),%xmm13
+
+# qhasm: float6464 0r16[0] +=0t16[0]
+# asm 1: addsd <0t16=int6464#14,<0r16=int6464#6
+# asm 2: addsd <0t16=%xmm13,<0r16=%xmm5
+addsd %xmm13,%xmm5
+
+# qhasm: 0t17 = 0ab9six
+# asm 1: movdqa <0ab9six=int6464#13,>0t17=int6464#14
+# asm 2: movdqa <0ab9six=%xmm12,>0t17=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t17[0] *= *(float64 *)(op2 + 64)
+# asm 1: mulsd 64(<op2=int64#3),<0t17=int6464#14
+# asm 2: mulsd 64(<op2=%rdx),<0t17=%xmm13
+mulsd 64(%rdx),%xmm13
+
+# qhasm: float6464 0r17[0] +=0t17[0]
+# asm 1: addsd <0t17=int6464#14,<0r17=int6464#7
+# asm 2: addsd <0t17=%xmm13,<0r17=%xmm6
+addsd %xmm13,%xmm6
+
+# qhasm: 0t18 = 0ab9six
+# asm 1: movdqa <0ab9six=int6464#13,>0t18=int6464#13
+# asm 2: movdqa <0ab9six=%xmm12,>0t18=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 0t18[0] *= *(float64 *)(op2 + 72)
+# asm 1: mulsd 72(<op2=int64#3),<0t18=int6464#13
+# asm 2: mulsd 72(<op2=%rdx),<0t18=%xmm12
+mulsd 72(%rdx),%xmm12
+
+# qhasm: float6464 0r18[0] +=0t18[0]
+# asm 1: addsd <0t18=int6464#13,<0r18=int6464#8
+# asm 2: addsd <0t18=%xmm12,<0r18=%xmm7
+addsd %xmm12,%xmm7
+
+# qhasm: *(float64 *)(0mysp + 72) = 0r9[0]
+# asm 1: movlpd <0r9=int6464#11,72(<0mysp=int64#4)
+# asm 2: movlpd <0r9=%xmm10,72(<0mysp=%rcx)
+movlpd %xmm10,72(%rcx)
+
+# qhasm: 0ab10[0] = *(float64 *)(op1 + 80)
+# asm 1: movlpd 80(<op1=int64#2),>0ab10=int6464#11
+# asm 2: movlpd 80(<op1=%rsi),>0ab10=%xmm10
+movlpd 80(%rsi),%xmm10
+
+# qhasm: 0ab10six = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>0ab10six=int6464#13
+# asm 2: movdqa <0ab10=%xmm10,>0ab10six=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm: float6464 0ab10six[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0ab10six=int6464#13
+# asm 2: mulsd SIX_SIX,<0ab10six=%xmm12
+mulsd SIX_SIX,%xmm12
+
+# qhasm: 0t10 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>0t10=int6464#14
+# asm 2: movdqa <0ab10=%xmm10,>0t10=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 0t10[0] *= *(float64 *)(op2 + 0)
+# asm 1: mulsd 0(<op2=int64#3),<0t10=int6464#14
+# asm 2: mulsd 0(<op2=%rdx),<0t10=%xmm13
+mulsd 0(%rdx),%xmm13
+
+# qhasm: float6464 0r10[0] +=0t10[0]
+# asm 1: addsd <0t10=int6464#14,<0r10=int6464#12
+# asm 2: addsd <0t10=%xmm13,<0r10=%xmm11
+addsd %xmm13,%xmm11
+
+# qhasm: 0t13 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>0t13=int6464#14
+# asm 2: movdqa <0ab10=%xmm10,>0t13=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 0t13[0] *= *(float64 *)(op2 + 24)
+# asm 1: mulsd 24(<op2=int64#3),<0t13=int6464#14
+# asm 2: mulsd 24(<op2=%rdx),<0t13=%xmm13
+mulsd 24(%rdx),%xmm13
+
+# qhasm: float6464 0r13[0] +=0t13[0]
+# asm 1: addsd <0t13=int6464#14,<0r13=int6464#3
+# asm 2: addsd <0t13=%xmm13,<0r13=%xmm2
+addsd %xmm13,%xmm2
+
+# qhasm: 0t14 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>0t14=int6464#14
+# asm 2: movdqa <0ab10=%xmm10,>0t14=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 0t14[0] *= *(float64 *)(op2 + 32)
+# asm 1: mulsd 32(<op2=int64#3),<0t14=int6464#14
+# asm 2: mulsd 32(<op2=%rdx),<0t14=%xmm13
+mulsd 32(%rdx),%xmm13
+
+# qhasm: float6464 0r14[0] +=0t14[0]
+# asm 1: addsd <0t14=int6464#14,<0r14=int6464#4
+# asm 2: addsd <0t14=%xmm13,<0r14=%xmm3
+addsd %xmm13,%xmm3
+
+# qhasm: 0t16 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>0t16=int6464#14
+# asm 2: movdqa <0ab10=%xmm10,>0t16=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 0t16[0] *= *(float64 *)(op2 + 48)
+# asm 1: mulsd 48(<op2=int64#3),<0t16=int6464#14
+# asm 2: mulsd 48(<op2=%rdx),<0t16=%xmm13
+mulsd 48(%rdx),%xmm13
+
+# qhasm: float6464 0r16[0] +=0t16[0]
+# asm 1: addsd <0t16=int6464#14,<0r16=int6464#6
+# asm 2: addsd <0t16=%xmm13,<0r16=%xmm5
+addsd %xmm13,%xmm5
+
+# qhasm: 0t15 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>0t15=int6464#14
+# asm 2: movdqa <0ab10=%xmm10,>0t15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 0t15[0] *= *(float64 *)(op2 + 40)
+# asm 1: mulsd 40(<op2=int64#3),<0t15=int6464#14
+# asm 2: mulsd 40(<op2=%rdx),<0t15=%xmm13
+mulsd 40(%rdx),%xmm13
+
+# qhasm: float6464 0r15[0] +=0t15[0]
+# asm 1: addsd <0t15=int6464#14,<0r15=int6464#5
+# asm 2: addsd <0t15=%xmm13,<0r15=%xmm4
+addsd %xmm13,%xmm4
+
+# qhasm: 0t19 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>0t19=int6464#14
+# asm 2: movdqa <0ab10=%xmm10,>0t19=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 0t19[0] *= *(float64 *)(op2 + 72)
+# asm 1: mulsd 72(<op2=int64#3),<0t19=int6464#14
+# asm 2: mulsd 72(<op2=%rdx),<0t19=%xmm13
+mulsd 72(%rdx),%xmm13
+
+# qhasm: float6464 0r19[0] +=0t19[0]
+# asm 1: addsd <0t19=int6464#14,<0r19=int6464#9
+# asm 2: addsd <0t19=%xmm13,<0r19=%xmm8
+addsd %xmm13,%xmm8
+
+# qhasm: 0t20 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>0t20=int6464#14
+# asm 2: movdqa <0ab10=%xmm10,>0t20=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 0t20[0] *= *(float64 *)(op2 + 80)
+# asm 1: mulsd 80(<op2=int64#3),<0t20=int6464#14
+# asm 2: mulsd 80(<op2=%rdx),<0t20=%xmm13
+mulsd 80(%rdx),%xmm13
+
+# qhasm: float6464 0r20[0] +=0t20[0]
+# asm 1: addsd <0t20=int6464#14,<0r20=int6464#10
+# asm 2: addsd <0t20=%xmm13,<0r20=%xmm9
+addsd %xmm13,%xmm9
+
+# qhasm: 0t21 = 0ab10
+# asm 1: movdqa <0ab10=int6464#11,>0t21=int6464#11
+# asm 2: movdqa <0ab10=%xmm10,>0t21=%xmm10
+movdqa %xmm10,%xmm10
+
+# qhasm: float6464 0t21[0] *= *(float64 *)(op2 + 88)
+# asm 1: mulsd 88(<op2=int64#3),<0t21=int6464#11
+# asm 2: mulsd 88(<op2=%rdx),<0t21=%xmm10
+mulsd 88(%rdx),%xmm10
+
+# qhasm: 0r21 =0t21
+# asm 1: movdqa <0t21=int6464#11,>0r21=int6464#11
+# asm 2: movdqa <0t21=%xmm10,>0r21=%xmm10
+movdqa %xmm10,%xmm10
+
+# qhasm: 0t11 = 0ab10six
+# asm 1: movdqa <0ab10six=int6464#13,>0t11=int6464#14
+# asm 2: movdqa <0ab10six=%xmm12,>0t11=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t11[0] *= *(float64 *)(op2 + 8)
+# asm 1: mulsd 8(<op2=int64#3),<0t11=int6464#14
+# asm 2: mulsd 8(<op2=%rdx),<0t11=%xmm13
+mulsd 8(%rdx),%xmm13
+
+# qhasm: float6464 0r11[0] +=0t11[0]
+# asm 1: addsd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addsd <0t11=%xmm13,<0r11=%xmm0
+addsd %xmm13,%xmm0
+
+# qhasm: 0t12 = 0ab10six
+# asm 1: movdqa <0ab10six=int6464#13,>0t12=int6464#14
+# asm 2: movdqa <0ab10six=%xmm12,>0t12=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t12[0] *= *(float64 *)(op2 + 16)
+# asm 1: mulsd 16(<op2=int64#3),<0t12=int6464#14
+# asm 2: mulsd 16(<op2=%rdx),<0t12=%xmm13
+mulsd 16(%rdx),%xmm13
+
+# qhasm: float6464 0r12[0] +=0t12[0]
+# asm 1: addsd <0t12=int6464#14,<0r12=int6464#2
+# asm 2: addsd <0t12=%xmm13,<0r12=%xmm1
+addsd %xmm13,%xmm1
+
+# qhasm: 0t17 = 0ab10six
+# asm 1: movdqa <0ab10six=int6464#13,>0t17=int6464#14
+# asm 2: movdqa <0ab10six=%xmm12,>0t17=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t17[0] *= *(float64 *)(op2 + 56)
+# asm 1: mulsd 56(<op2=int64#3),<0t17=int6464#14
+# asm 2: mulsd 56(<op2=%rdx),<0t17=%xmm13
+mulsd 56(%rdx),%xmm13
+
+# qhasm: float6464 0r17[0] +=0t17[0]
+# asm 1: addsd <0t17=int6464#14,<0r17=int6464#7
+# asm 2: addsd <0t17=%xmm13,<0r17=%xmm6
+addsd %xmm13,%xmm6
+
+# qhasm: 0t18 = 0ab10six
+# asm 1: movdqa <0ab10six=int6464#13,>0t18=int6464#13
+# asm 2: movdqa <0ab10six=%xmm12,>0t18=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 0t18[0] *= *(float64 *)(op2 + 64)
+# asm 1: mulsd 64(<op2=int64#3),<0t18=int6464#13
+# asm 2: mulsd 64(<op2=%rdx),<0t18=%xmm12
+mulsd 64(%rdx),%xmm12
+
+# qhasm: float6464 0r18[0] +=0t18[0]
+# asm 1: addsd <0t18=int6464#13,<0r18=int6464#8
+# asm 2: addsd <0t18=%xmm12,<0r18=%xmm7
+addsd %xmm12,%xmm7
+
+# qhasm: *(float64 *)(0mysp + 80) = 0r10[0]
+# asm 1: movlpd <0r10=int6464#12,80(<0mysp=int64#4)
+# asm 2: movlpd <0r10=%xmm11,80(<0mysp=%rcx)
+movlpd %xmm11,80(%rcx)
+
+# qhasm: 0ab11[0] = *(float64 *)(op1 + 88)
+# asm 1: movlpd 88(<op1=int64#2),>0ab11=int6464#12
+# asm 2: movlpd 88(<op1=%rsi),>0ab11=%xmm11
+movlpd 88(%rsi),%xmm11
+
+# qhasm: 0ab11six = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>0ab11six=int6464#13
+# asm 2: movdqa <0ab11=%xmm11,>0ab11six=%xmm12
+movdqa %xmm11,%xmm12
+
+# qhasm: float6464 0ab11six[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0ab11six=int6464#13
+# asm 2: mulsd SIX_SIX,<0ab11six=%xmm12
+mulsd SIX_SIX,%xmm12
+
+# qhasm: 0t11 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>0t11=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>0t11=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 0t11[0] *= *(float64 *)(op2 + 0)
+# asm 1: mulsd 0(<op2=int64#3),<0t11=int6464#14
+# asm 2: mulsd 0(<op2=%rdx),<0t11=%xmm13
+mulsd 0(%rdx),%xmm13
+
+# qhasm: float6464 0r11[0] +=0t11[0]
+# asm 1: addsd <0t11=int6464#14,<0r11=int6464#1
+# asm 2: addsd <0t11=%xmm13,<0r11=%xmm0
+addsd %xmm13,%xmm0
+
+# qhasm: 0t13 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>0t13=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>0t13=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 0t13[0] *= *(float64 *)(op2 + 16)
+# asm 1: mulsd 16(<op2=int64#3),<0t13=int6464#14
+# asm 2: mulsd 16(<op2=%rdx),<0t13=%xmm13
+mulsd 16(%rdx),%xmm13
+
+# qhasm: float6464 0r13[0] +=0t13[0]
+# asm 1: addsd <0t13=int6464#14,<0r13=int6464#3
+# asm 2: addsd <0t13=%xmm13,<0r13=%xmm2
+addsd %xmm13,%xmm2
+
+# qhasm: 0t14 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>0t14=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>0t14=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 0t14[0] *= *(float64 *)(op2 + 24)
+# asm 1: mulsd 24(<op2=int64#3),<0t14=int6464#14
+# asm 2: mulsd 24(<op2=%rdx),<0t14=%xmm13
+mulsd 24(%rdx),%xmm13
+
+# qhasm: float6464 0r14[0] +=0t14[0]
+# asm 1: addsd <0t14=int6464#14,<0r14=int6464#4
+# asm 2: addsd <0t14=%xmm13,<0r14=%xmm3
+addsd %xmm13,%xmm3
+
+# qhasm: 0t15 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>0t15=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>0t15=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 0t15[0] *= *(float64 *)(op2 + 32)
+# asm 1: mulsd 32(<op2=int64#3),<0t15=int6464#14
+# asm 2: mulsd 32(<op2=%rdx),<0t15=%xmm13
+mulsd 32(%rdx),%xmm13
+
+# qhasm: float6464 0r15[0] +=0t15[0]
+# asm 1: addsd <0t15=int6464#14,<0r15=int6464#5
+# asm 2: addsd <0t15=%xmm13,<0r15=%xmm4
+addsd %xmm13,%xmm4
+
+# qhasm: 0t16 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>0t16=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>0t16=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 0t16[0] *= *(float64 *)(op2 + 40)
+# asm 1: mulsd 40(<op2=int64#3),<0t16=int6464#14
+# asm 2: mulsd 40(<op2=%rdx),<0t16=%xmm13
+mulsd 40(%rdx),%xmm13
+
+# qhasm: float6464 0r16[0] +=0t16[0]
+# asm 1: addsd <0t16=int6464#14,<0r16=int6464#6
+# asm 2: addsd <0t16=%xmm13,<0r16=%xmm5
+addsd %xmm13,%xmm5
+
+# qhasm: 0t17 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>0t17=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>0t17=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 0t17[0] *= *(float64 *)(op2 + 48)
+# asm 1: mulsd 48(<op2=int64#3),<0t17=int6464#14
+# asm 2: mulsd 48(<op2=%rdx),<0t17=%xmm13
+mulsd 48(%rdx),%xmm13
+
+# qhasm: float6464 0r17[0] +=0t17[0]
+# asm 1: addsd <0t17=int6464#14,<0r17=int6464#7
+# asm 2: addsd <0t17=%xmm13,<0r17=%xmm6
+addsd %xmm13,%xmm6
+
+# qhasm: 0t19 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>0t19=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>0t19=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 0t19[0] *= *(float64 *)(op2 + 64)
+# asm 1: mulsd 64(<op2=int64#3),<0t19=int6464#14
+# asm 2: mulsd 64(<op2=%rdx),<0t19=%xmm13
+mulsd 64(%rdx),%xmm13
+
+# qhasm: float6464 0r19[0] +=0t19[0]
+# asm 1: addsd <0t19=int6464#14,<0r19=int6464#9
+# asm 2: addsd <0t19=%xmm13,<0r19=%xmm8
+addsd %xmm13,%xmm8
+
+# qhasm: 0t20 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>0t20=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>0t20=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 0t20[0] *= *(float64 *)(op2 + 72)
+# asm 1: mulsd 72(<op2=int64#3),<0t20=int6464#14
+# asm 2: mulsd 72(<op2=%rdx),<0t20=%xmm13
+mulsd 72(%rdx),%xmm13
+
+# qhasm: float6464 0r20[0] +=0t20[0]
+# asm 1: addsd <0t20=int6464#14,<0r20=int6464#10
+# asm 2: addsd <0t20=%xmm13,<0r20=%xmm9
+addsd %xmm13,%xmm9
+
+# qhasm: 0t21 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>0t21=int6464#14
+# asm 2: movdqa <0ab11=%xmm11,>0t21=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 0t21[0] *= *(float64 *)(op2 + 80)
+# asm 1: mulsd 80(<op2=int64#3),<0t21=int6464#14
+# asm 2: mulsd 80(<op2=%rdx),<0t21=%xmm13
+mulsd 80(%rdx),%xmm13
+
+# qhasm: float6464 0r21[0] +=0t21[0]
+# asm 1: addsd <0t21=int6464#14,<0r21=int6464#11
+# asm 2: addsd <0t21=%xmm13,<0r21=%xmm10
+addsd %xmm13,%xmm10
+
+# qhasm: 0t22 = 0ab11
+# asm 1: movdqa <0ab11=int6464#12,>0t22=int6464#12
+# asm 2: movdqa <0ab11=%xmm11,>0t22=%xmm11
+movdqa %xmm11,%xmm11
+
+# qhasm: float6464 0t22[0] *= *(float64 *)(op2 + 88)
+# asm 1: mulsd 88(<op2=int64#3),<0t22=int6464#12
+# asm 2: mulsd 88(<op2=%rdx),<0t22=%xmm11
+mulsd 88(%rdx),%xmm11
+
+# qhasm: 0r22 =0t22
+# asm 1: movdqa <0t22=int6464#12,>0r22=int6464#12
+# asm 2: movdqa <0t22=%xmm11,>0r22=%xmm11
+movdqa %xmm11,%xmm11
+
+# qhasm: 0t12 = 0ab11six
+# asm 1: movdqa <0ab11six=int6464#13,>0t12=int6464#14
+# asm 2: movdqa <0ab11six=%xmm12,>0t12=%xmm13
+movdqa %xmm12,%xmm13
+
+# qhasm: float6464 0t12[0] *= *(float64 *)(op2 + 8)
+# asm 1: mulsd 8(<op2=int64#3),<0t12=int6464#14
+# asm 2: mulsd 8(<op2=%rdx),<0t12=%xmm13
+mulsd 8(%rdx),%xmm13
+
+# qhasm: float6464 0r12[0] +=0t12[0]
+# asm 1: addsd <0t12=int6464#14,<0r12=int6464#2
+# asm 2: addsd <0t12=%xmm13,<0r12=%xmm1
+addsd %xmm13,%xmm1
+
+# qhasm: 0t18 = 0ab11six
+# asm 1: movdqa <0ab11six=int6464#13,>0t18=int6464#13
+# asm 2: movdqa <0ab11six=%xmm12,>0t18=%xmm12
+movdqa %xmm12,%xmm12
+
+# qhasm: float6464 0t18[0] *= *(float64 *)(op2 + 56)
+# asm 1: mulsd 56(<op2=int64#3),<0t18=int6464#13
+# asm 2: mulsd 56(<op2=%rdx),<0t18=%xmm12
+mulsd 56(%rdx),%xmm12
+
+# qhasm: float6464 0r18[0] +=0t18[0]
+# asm 1: addsd <0t18=int6464#13,<0r18=int6464#8
+# asm 2: addsd <0t18=%xmm12,<0r18=%xmm7
+addsd %xmm12,%xmm7
+
+# qhasm: *(float64 *)(0mysp + 88) = 0r11[0]
+# asm 1: movlpd <0r11=int6464#1,88(<0mysp=int64#4)
+# asm 2: movlpd <0r11=%xmm0,88(<0mysp=%rcx)
+movlpd %xmm0,88(%rcx)
+
+# qhasm: 0r0[0] = *(float64 *)(0mysp + 0)
+# asm 1: movlpd 0(<0mysp=int64#4),>0r0=int6464#1
+# asm 2: movlpd 0(<0mysp=%rcx),>0r0=%xmm0
+movlpd 0(%rcx),%xmm0
+
+# qhasm: float6464 0r0[0] -= 0r12[0]
+# asm 1: subsd <0r12=int6464#2,<0r0=int6464#1
+# asm 2: subsd <0r12=%xmm1,<0r0=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: 0t15 = 0r15
+# asm 1: movdqa <0r15=int6464#5,>0t15=int6464#13
+# asm 2: movdqa <0r15=%xmm4,>0t15=%xmm12
+movdqa %xmm4,%xmm12
+
+# qhasm: float6464 0t15[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0t15=int6464#13
+# asm 2: mulsd SIX_SIX,<0t15=%xmm12
+mulsd SIX_SIX,%xmm12
+
+# qhasm: float6464 0r0[0] += 0t15[0]
+# asm 1: addsd <0t15=int6464#13,<0r0=int6464#1
+# asm 2: addsd <0t15=%xmm12,<0r0=%xmm0
+addsd %xmm12,%xmm0
+
+# qhasm: 0t18 = 0r18
+# asm 1: movdqa <0r18=int6464#8,>0t18=int6464#13
+# asm 2: movdqa <0r18=%xmm7,>0t18=%xmm12
+movdqa %xmm7,%xmm12
+
+# qhasm: float6464 0t18[0] *= TWO_TWO
+# asm 1: mulsd TWO_TWO,<0t18=int6464#13
+# asm 2: mulsd TWO_TWO,<0t18=%xmm12
+mulsd TWO_TWO,%xmm12
+
+# qhasm: float6464 0r0[0] -= 0t18[0]
+# asm 1: subsd <0t18=int6464#13,<0r0=int6464#1
+# asm 2: subsd <0t18=%xmm12,<0r0=%xmm0
+subsd %xmm12,%xmm0
+
+# qhasm: 0t21 = 0r21
+# asm 1: movdqa <0r21=int6464#11,>0t21=int6464#13
+# asm 2: movdqa <0r21=%xmm10,>0t21=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm: float6464 0t21[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0t21=int6464#13
+# asm 2: mulsd SIX_SIX,<0t21=%xmm12
+mulsd SIX_SIX,%xmm12
+
+# qhasm: float6464 0r0[0] -= 0t21[0]
+# asm 1: subsd <0t21=int6464#13,<0r0=int6464#1
+# asm 2: subsd <0t21=%xmm12,<0r0=%xmm0
+subsd %xmm12,%xmm0
+
+# qhasm: *(float64 *)(0mysp + 0) = 0r0[0]
+# asm 1: movlpd <0r0=int6464#1,0(<0mysp=int64#4)
+# asm 2: movlpd <0r0=%xmm0,0(<0mysp=%rcx)
+movlpd %xmm0,0(%rcx)
+
+# qhasm: 0r3[0] = *(float64 *)(0mysp + 24)
+# asm 1: movlpd 24(<0mysp=int64#4),>0r3=int6464#1
+# asm 2: movlpd 24(<0mysp=%rcx),>0r3=%xmm0
+movlpd 24(%rcx),%xmm0
+
+# qhasm: float6464 0r3[0] -= 0r12[0]
+# asm 1: subsd <0r12=int6464#2,<0r3=int6464#1
+# asm 2: subsd <0r12=%xmm1,<0r3=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: 0t15 = 0r15
+# asm 1: movdqa <0r15=int6464#5,>0t15=int6464#13
+# asm 2: movdqa <0r15=%xmm4,>0t15=%xmm12
+movdqa %xmm4,%xmm12
+
+# qhasm: float6464 0t15[0] *= FIVE_FIVE
+# asm 1: mulsd FIVE_FIVE,<0t15=int6464#13
+# asm 2: mulsd FIVE_FIVE,<0t15=%xmm12
+mulsd FIVE_FIVE,%xmm12
+
+# qhasm: float6464 0r3[0] += 0t15[0]
+# asm 1: addsd <0t15=int6464#13,<0r3=int6464#1
+# asm 2: addsd <0t15=%xmm12,<0r3=%xmm0
+addsd %xmm12,%xmm0
+
+# qhasm: float6464 0r3[0] -= 0r18[0]
+# asm 1: subsd <0r18=int6464#8,<0r3=int6464#1
+# asm 2: subsd <0r18=%xmm7,<0r3=%xmm0
+subsd %xmm7,%xmm0
+
+# qhasm: 0t21 = 0r21
+# asm 1: movdqa <0r21=int6464#11,>0t21=int6464#13
+# asm 2: movdqa <0r21=%xmm10,>0t21=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm: float6464 0t21[0] *= EIGHT_EIGHT
+# asm 1: mulsd EIGHT_EIGHT,<0t21=int6464#13
+# asm 2: mulsd EIGHT_EIGHT,<0t21=%xmm12
+mulsd EIGHT_EIGHT,%xmm12
+
+# qhasm: float6464 0r3[0] -= 0t21[0]
+# asm 1: subsd <0t21=int6464#13,<0r3=int6464#1
+# asm 2: subsd <0t21=%xmm12,<0r3=%xmm0
+subsd %xmm12,%xmm0
+
+# qhasm: *(float64 *)(0mysp + 24) = 0r3[0]
+# asm 1: movlpd <0r3=int6464#1,24(<0mysp=int64#4)
+# asm 2: movlpd <0r3=%xmm0,24(<0mysp=%rcx)
+movlpd %xmm0,24(%rcx)
+
+# qhasm: 0r6[0] = *(float64 *)(0mysp + 48)
+# asm 1: movlpd 48(<0mysp=int64#4),>0r6=int6464#1
+# asm 2: movlpd 48(<0mysp=%rcx),>0r6=%xmm0
+movlpd 48(%rcx),%xmm0
+
+# qhasm: 0t12 = 0r12
+# asm 1: movdqa <0r12=int6464#2,>0t12=int6464#13
+# asm 2: movdqa <0r12=%xmm1,>0t12=%xmm12
+movdqa %xmm1,%xmm12
+
+# qhasm: float6464 0t12[0] *= FOUR_FOUR
+# asm 1: mulsd FOUR_FOUR,<0t12=int6464#13
+# asm 2: mulsd FOUR_FOUR,<0t12=%xmm12
+mulsd FOUR_FOUR,%xmm12
+
+# qhasm: float6464 0r6[0] -= 0t12[0]
+# asm 1: subsd <0t12=int6464#13,<0r6=int6464#1
+# asm 2: subsd <0t12=%xmm12,<0r6=%xmm0
+subsd %xmm12,%xmm0
+
+# qhasm: 0t15 = 0r15
+# asm 1: movdqa <0r15=int6464#5,>0t15=int6464#13
+# asm 2: movdqa <0r15=%xmm4,>0t15=%xmm12
+movdqa %xmm4,%xmm12
+
+# qhasm: float6464 0t15[0] *= EIGHTEEN_EIGHTEEN
+# asm 1: mulsd EIGHTEEN_EIGHTEEN,<0t15=int6464#13
+# asm 2: mulsd EIGHTEEN_EIGHTEEN,<0t15=%xmm12
+mulsd EIGHTEEN_EIGHTEEN,%xmm12
+
+# qhasm: float6464 0r6[0] += 0t15[0]
+# asm 1: addsd <0t15=int6464#13,<0r6=int6464#1
+# asm 2: addsd <0t15=%xmm12,<0r6=%xmm0
+addsd %xmm12,%xmm0
+
+# qhasm: 0t18 = 0r18
+# asm 1: movdqa <0r18=int6464#8,>0t18=int6464#13
+# asm 2: movdqa <0r18=%xmm7,>0t18=%xmm12
+movdqa %xmm7,%xmm12
+
+# qhasm: float6464 0t18[0] *= THREE_THREE
+# asm 1: mulsd THREE_THREE,<0t18=int6464#13
+# asm 2: mulsd THREE_THREE,<0t18=%xmm12
+mulsd THREE_THREE,%xmm12
+
+# qhasm: float6464 0r6[0] -= 0t18[0]
+# asm 1: subsd <0t18=int6464#13,<0r6=int6464#1
+# asm 2: subsd <0t18=%xmm12,<0r6=%xmm0
+subsd %xmm12,%xmm0
+
+# qhasm: 0t21 = 0r21
+# asm 1: movdqa <0r21=int6464#11,>0t21=int6464#13
+# asm 2: movdqa <0r21=%xmm10,>0t21=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm: float6464 0t21[0] *= THIRTY_THIRTY
+# asm 1: mulsd THIRTY_THIRTY,<0t21=int6464#13
+# asm 2: mulsd THIRTY_THIRTY,<0t21=%xmm12
+mulsd THIRTY_THIRTY,%xmm12
+
+# qhasm: float6464 0r6[0] -= 0t21[0]
+# asm 1: subsd <0t21=int6464#13,<0r6=int6464#1
+# asm 2: subsd <0t21=%xmm12,<0r6=%xmm0
+subsd %xmm12,%xmm0
+
+# qhasm: *(float64 *)(0mysp + 48) = 0r6[0]
+# asm 1: movlpd <0r6=int6464#1,48(<0mysp=int64#4)
+# asm 2: movlpd <0r6=%xmm0,48(<0mysp=%rcx)
+movlpd %xmm0,48(%rcx)
+
+# qhasm: 0r9[0] = *(float64 *)(0mysp + 72)
+# asm 1: movlpd 72(<0mysp=int64#4),>0r9=int6464#1
+# asm 2: movlpd 72(<0mysp=%rcx),>0r9=%xmm0
+movlpd 72(%rcx),%xmm0
+
+# qhasm: float6464 0r9[0] -= 0r12[0]
+# asm 1: subsd <0r12=int6464#2,<0r9=int6464#1
+# asm 2: subsd <0r12=%xmm1,<0r9=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: 0t15 = 0r15
+# asm 1: movdqa <0r15=int6464#5,>0t15=int6464#2
+# asm 2: movdqa <0r15=%xmm4,>0t15=%xmm1
+movdqa %xmm4,%xmm1
+
+# qhasm: float6464 0t15[0] *= TWO_TWO
+# asm 1: mulsd TWO_TWO,<0t15=int6464#2
+# asm 2: mulsd TWO_TWO,<0t15=%xmm1
+mulsd TWO_TWO,%xmm1
+
+# qhasm: float6464 0r9[0] += 0t15[0]
+# asm 1: addsd <0t15=int6464#2,<0r9=int6464#1
+# asm 2: addsd <0t15=%xmm1,<0r9=%xmm0
+addsd %xmm1,%xmm0
+
+# qhasm: float6464 0r9[0] += 0r18[0]
+# asm 1: addsd <0r18=int6464#8,<0r9=int6464#1
+# asm 2: addsd <0r18=%xmm7,<0r9=%xmm0
+addsd %xmm7,%xmm0
+
+# qhasm: 0t21 = 0r21
+# asm 1: movdqa <0r21=int6464#11,>0t21=int6464#2
+# asm 2: movdqa <0r21=%xmm10,>0t21=%xmm1
+movdqa %xmm10,%xmm1
+
+# qhasm: float6464 0t21[0] *= NINE_NINE
+# asm 1: mulsd NINE_NINE,<0t21=int6464#2
+# asm 2: mulsd NINE_NINE,<0t21=%xmm1
+mulsd NINE_NINE,%xmm1
+
+# qhasm: float6464 0r9[0] -= 0t21[0]
+# asm 1: subsd <0t21=int6464#2,<0r9=int6464#1
+# asm 2: subsd <0t21=%xmm1,<0r9=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(float64 *)(0mysp + 72) = 0r9[0]
+# asm 1: movlpd <0r9=int6464#1,72(<0mysp=int64#4)
+# asm 2: movlpd <0r9=%xmm0,72(<0mysp=%rcx)
+movlpd %xmm0,72(%rcx)
+
+# qhasm: 0r1[0] = *(float64 *)(0mysp + 8)
+# asm 1: movlpd 8(<0mysp=int64#4),>0r1=int6464#1
+# asm 2: movlpd 8(<0mysp=%rcx),>0r1=%xmm0
+movlpd 8(%rcx),%xmm0
+
+# qhasm: float6464 0r1[0] -= 0r13[0]
+# asm 1: subsd <0r13=int6464#3,<0r1=int6464#1
+# asm 2: subsd <0r13=%xmm2,<0r1=%xmm0
+subsd %xmm2,%xmm0
+
+# qhasm: float6464 0r1[0] += 0r16[0]
+# asm 1: addsd <0r16=int6464#6,<0r1=int6464#1
+# asm 2: addsd <0r16=%xmm5,<0r1=%xmm0
+addsd %xmm5,%xmm0
+
+# qhasm: 0t19 = 0r19
+# asm 1: movdqa <0r19=int6464#9,>0t19=int6464#2
+# asm 2: movdqa <0r19=%xmm8,>0t19=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: float6464 0t19[0] *= TWO_TWO
+# asm 1: mulsd TWO_TWO,<0t19=int6464#2
+# asm 2: mulsd TWO_TWO,<0t19=%xmm1
+mulsd TWO_TWO,%xmm1
+
+# qhasm: float6464 0r1[0] -= 0t19[0]
+# asm 1: subsd <0t19=int6464#2,<0r1=int6464#1
+# asm 2: subsd <0t19=%xmm1,<0r1=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: float6464 0r1[0] -= 0r22[0]
+# asm 1: subsd <0r22=int6464#12,<0r1=int6464#1
+# asm 2: subsd <0r22=%xmm11,<0r1=%xmm0
+subsd %xmm11,%xmm0
+
+# qhasm: *(float64 *)(0mysp + 8) = 0r1[0]
+# asm 1: movlpd <0r1=int6464#1,8(<0mysp=int64#4)
+# asm 2: movlpd <0r1=%xmm0,8(<0mysp=%rcx)
+movlpd %xmm0,8(%rcx)
+
+# qhasm: 0r4[0] = *(float64 *)(0mysp + 32)
+# asm 1: movlpd 32(<0mysp=int64#4),>0r4=int6464#1
+# asm 2: movlpd 32(<0mysp=%rcx),>0r4=%xmm0
+movlpd 32(%rcx),%xmm0
+
+# qhasm: 0t13 = 0r13
+# asm 1: movdqa <0r13=int6464#3,>0t13=int6464#2
+# asm 2: movdqa <0r13=%xmm2,>0t13=%xmm1
+movdqa %xmm2,%xmm1
+
+# qhasm: float6464 0t13[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0t13=int6464#2
+# asm 2: mulsd SIX_SIX,<0t13=%xmm1
+mulsd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r4[0] -= 0t13[0]
+# asm 1: subsd <0t13=int6464#2,<0r4=int6464#1
+# asm 2: subsd <0t13=%xmm1,<0r4=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: 0t16 = 0r16
+# asm 1: movdqa <0r16=int6464#6,>0t16=int6464#2
+# asm 2: movdqa <0r16=%xmm5,>0t16=%xmm1
+movdqa %xmm5,%xmm1
+
+# qhasm: float6464 0t16[0] *= FIVE_FIVE
+# asm 1: mulsd FIVE_FIVE,<0t16=int6464#2
+# asm 2: mulsd FIVE_FIVE,<0t16=%xmm1
+mulsd FIVE_FIVE,%xmm1
+
+# qhasm: float6464 0r4[0] += 0t16[0]
+# asm 1: addsd <0t16=int6464#2,<0r4=int6464#1
+# asm 2: addsd <0t16=%xmm1,<0r4=%xmm0
+addsd %xmm1,%xmm0
+
+# qhasm: 0t19 = 0r19
+# asm 1: movdqa <0r19=int6464#9,>0t19=int6464#2
+# asm 2: movdqa <0r19=%xmm8,>0t19=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: float6464 0t19 *= SIX_SIX
+# asm 1: mulpd SIX_SIX,<0t19=int6464#2
+# asm 2: mulpd SIX_SIX,<0t19=%xmm1
+mulpd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r4[0] -= 0t19[0]
+# asm 1: subsd <0t19=int6464#2,<0r4=int6464#1
+# asm 2: subsd <0t19=%xmm1,<0r4=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: 0t22 = 0r22
+# asm 1: movdqa <0r22=int6464#12,>0t22=int6464#2
+# asm 2: movdqa <0r22=%xmm11,>0t22=%xmm1
+movdqa %xmm11,%xmm1
+
+# qhasm: float6464 0t22[0] *= EIGHT_EIGHT
+# asm 1: mulsd EIGHT_EIGHT,<0t22=int6464#2
+# asm 2: mulsd EIGHT_EIGHT,<0t22=%xmm1
+mulsd EIGHT_EIGHT,%xmm1
+
+# qhasm: float6464 0r4[0] -= 0t22[0]
+# asm 1: subsd <0t22=int6464#2,<0r4=int6464#1
+# asm 2: subsd <0t22=%xmm1,<0r4=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(float64 *)(0mysp + 32) = 0r4[0]
+# asm 1: movlpd <0r4=int6464#1,32(<0mysp=int64#4)
+# asm 2: movlpd <0r4=%xmm0,32(<0mysp=%rcx)
+movlpd %xmm0,32(%rcx)
+
+# qhasm: 0r7[0] = *(float64 *)(0mysp + 56)
+# asm 1: movlpd 56(<0mysp=int64#4),>0r7=int6464#1
+# asm 2: movlpd 56(<0mysp=%rcx),>0r7=%xmm0
+movlpd 56(%rcx),%xmm0
+
+# qhasm: 0t13 = 0r13
+# asm 1: movdqa <0r13=int6464#3,>0t13=int6464#2
+# asm 2: movdqa <0r13=%xmm2,>0t13=%xmm1
+movdqa %xmm2,%xmm1
+
+# qhasm: float6464 0t13[0] *= FOUR_FOUR
+# asm 1: mulsd FOUR_FOUR,<0t13=int6464#2
+# asm 2: mulsd FOUR_FOUR,<0t13=%xmm1
+mulsd FOUR_FOUR,%xmm1
+
+# qhasm: float6464 0r7[0] -= 0t13[0]
+# asm 1: subsd <0t13=int6464#2,<0r7=int6464#1
+# asm 2: subsd <0t13=%xmm1,<0r7=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: 0t16 = 0r16
+# asm 1: movdqa <0r16=int6464#6,>0t16=int6464#2
+# asm 2: movdqa <0r16=%xmm5,>0t16=%xmm1
+movdqa %xmm5,%xmm1
+
+# qhasm: float6464 0t16[0] *= THREE_THREE
+# asm 1: mulsd THREE_THREE,<0t16=int6464#2
+# asm 2: mulsd THREE_THREE,<0t16=%xmm1
+mulsd THREE_THREE,%xmm1
+
+# qhasm: float6464 0r7[0] += 0t16[0]
+# asm 1: addsd <0t16=int6464#2,<0r7=int6464#1
+# asm 2: addsd <0t16=%xmm1,<0r7=%xmm0
+addsd %xmm1,%xmm0
+
+# qhasm: 0t19 = 0r19
+# asm 1: movdqa <0r19=int6464#9,>0t19=int6464#2
+# asm 2: movdqa <0r19=%xmm8,>0t19=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: float6464 0t19[0] *= THREE_THREE
+# asm 1: mulsd THREE_THREE,<0t19=int6464#2
+# asm 2: mulsd THREE_THREE,<0t19=%xmm1
+mulsd THREE_THREE,%xmm1
+
+# qhasm: float6464 0r7[0] -= 0t19[0]
+# asm 1: subsd <0t19=int6464#2,<0r7=int6464#1
+# asm 2: subsd <0t19=%xmm1,<0r7=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: 0t22 = 0r22
+# asm 1: movdqa <0r22=int6464#12,>0t22=int6464#2
+# asm 2: movdqa <0r22=%xmm11,>0t22=%xmm1
+movdqa %xmm11,%xmm1
+
+# qhasm: float6464 0t22[0] *= FIVE_FIVE
+# asm 1: mulsd FIVE_FIVE,<0t22=int6464#2
+# asm 2: mulsd FIVE_FIVE,<0t22=%xmm1
+mulsd FIVE_FIVE,%xmm1
+
+# qhasm: float6464 0r7[0] -= 0t22[0]
+# asm 1: subsd <0t22=int6464#2,<0r7=int6464#1
+# asm 2: subsd <0t22=%xmm1,<0r7=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(float64 *)(0mysp + 56) = 0r7[0]
+# asm 1: movlpd <0r7=int6464#1,56(<0mysp=int64#4)
+# asm 2: movlpd <0r7=%xmm0,56(<0mysp=%rcx)
+movlpd %xmm0,56(%rcx)
+
+# qhasm: 0r10[0] = *(float64 *)(0mysp + 80)
+# asm 1: movlpd 80(<0mysp=int64#4),>0r10=int6464#1
+# asm 2: movlpd 80(<0mysp=%rcx),>0r10=%xmm0
+movlpd 80(%rcx),%xmm0
+
+# qhasm: 0t13 = 0r13
+# asm 1: movdqa <0r13=int6464#3,>0t13=int6464#2
+# asm 2: movdqa <0r13=%xmm2,>0t13=%xmm1
+movdqa %xmm2,%xmm1
+
+# qhasm: float6464 0t13[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0t13=int6464#2
+# asm 2: mulsd SIX_SIX,<0t13=%xmm1
+mulsd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r10[0] -= 0t13[0]
+# asm 1: subsd <0t13=int6464#2,<0r10=int6464#1
+# asm 2: subsd <0t13=%xmm1,<0r10=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: 0t16 = 0r16
+# asm 1: movdqa <0r16=int6464#6,>0t16=int6464#2
+# asm 2: movdqa <0r16=%xmm5,>0t16=%xmm1
+movdqa %xmm5,%xmm1
+
+# qhasm: float6464 0t16[0] *= TWO_TWO
+# asm 1: mulsd TWO_TWO,<0t16=int6464#2
+# asm 2: mulsd TWO_TWO,<0t16=%xmm1
+mulsd TWO_TWO,%xmm1
+
+# qhasm: float6464 0r10[0] += 0t16[0]
+# asm 1: addsd <0t16=int6464#2,<0r10=int6464#1
+# asm 2: addsd <0t16=%xmm1,<0r10=%xmm0
+addsd %xmm1,%xmm0
+
+# qhasm: 0t19 = 0r19
+# asm 1: movdqa <0r19=int6464#9,>0t19=int6464#2
+# asm 2: movdqa <0r19=%xmm8,>0t19=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: float6464 0t19[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0t19=int6464#2
+# asm 2: mulsd SIX_SIX,<0t19=%xmm1
+mulsd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r10[0] += 0t19[0]
+# asm 1: addsd <0t19=int6464#2,<0r10=int6464#1
+# asm 2: addsd <0t19=%xmm1,<0r10=%xmm0
+addsd %xmm1,%xmm0
+
+# qhasm: 0t22 = 0r22
+# asm 1: movdqa <0r22=int6464#12,>0t22=int6464#2
+# asm 2: movdqa <0r22=%xmm11,>0t22=%xmm1
+movdqa %xmm11,%xmm1
+
+# qhasm: float6464 0t22[0] *= NINE_NINE
+# asm 1: mulsd NINE_NINE,<0t22=int6464#2
+# asm 2: mulsd NINE_NINE,<0t22=%xmm1
+mulsd NINE_NINE,%xmm1
+
+# qhasm: float6464 0r10[0] -= 0t22[0]
+# asm 1: subsd <0t22=int6464#2,<0r10=int6464#1
+# asm 2: subsd <0t22=%xmm1,<0r10=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(float64 *)(0mysp + 80) = 0r10[0]
+# asm 1: movlpd <0r10=int6464#1,80(<0mysp=int64#4)
+# asm 2: movlpd <0r10=%xmm0,80(<0mysp=%rcx)
+movlpd %xmm0,80(%rcx)
+
+# qhasm: 0r2[0] = *(float64 *)(0mysp + 16)
+# asm 1: movlpd 16(<0mysp=int64#4),>0r2=int6464#1
+# asm 2: movlpd 16(<0mysp=%rcx),>0r2=%xmm0
+movlpd 16(%rcx),%xmm0
+
+# qhasm: float6464 0r2[0] -= 0r14[0]
+# asm 1: subsd <0r14=int6464#4,<0r2=int6464#1
+# asm 2: subsd <0r14=%xmm3,<0r2=%xmm0
+subsd %xmm3,%xmm0
+
+# qhasm: float6464 0r2[0] += 0r17[0]
+# asm 1: addsd <0r17=int6464#7,<0r2=int6464#1
+# asm 2: addsd <0r17=%xmm6,<0r2=%xmm0
+addsd %xmm6,%xmm0
+
+# qhasm: 0t20 = 0r20
+# asm 1: movdqa <0r20=int6464#10,>0t20=int6464#2
+# asm 2: movdqa <0r20=%xmm9,>0t20=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm: float6464 0t20[0] *= TWO_TWO
+# asm 1: mulsd TWO_TWO,<0t20=int6464#2
+# asm 2: mulsd TWO_TWO,<0t20=%xmm1
+mulsd TWO_TWO,%xmm1
+
+# qhasm: float6464 0r2[0] -= 0t20[0]
+# asm 1: subsd <0t20=int6464#2,<0r2=int6464#1
+# asm 2: subsd <0t20=%xmm1,<0r2=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(float64 *)(0mysp + 16) = 0r2[0]
+# asm 1: movlpd <0r2=int6464#1,16(<0mysp=int64#4)
+# asm 2: movlpd <0r2=%xmm0,16(<0mysp=%rcx)
+movlpd %xmm0,16(%rcx)
+
+# qhasm: 0r5[0] = *(float64 *)(0mysp + 40)
+# asm 1: movlpd 40(<0mysp=int64#4),>0r5=int6464#1
+# asm 2: movlpd 40(<0mysp=%rcx),>0r5=%xmm0
+movlpd 40(%rcx),%xmm0
+
+# qhasm: 0t14 = 0r14
+# asm 1: movdqa <0r14=int6464#4,>0t14=int6464#2
+# asm 2: movdqa <0r14=%xmm3,>0t14=%xmm1
+movdqa %xmm3,%xmm1
+
+# qhasm: float6464 0t14[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0t14=int6464#2
+# asm 2: mulsd SIX_SIX,<0t14=%xmm1
+mulsd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r5[0] -= 0t14[0]
+# asm 1: subsd <0t14=int6464#2,<0r5=int6464#1
+# asm 2: subsd <0t14=%xmm1,<0r5=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: 0t17 = 0r17
+# asm 1: movdqa <0r17=int6464#7,>0t17=int6464#2
+# asm 2: movdqa <0r17=%xmm6,>0t17=%xmm1
+movdqa %xmm6,%xmm1
+
+# qhasm: float6464 0t17[0] *= FIVE_FIVE
+# asm 1: mulsd FIVE_FIVE,<0t17=int6464#2
+# asm 2: mulsd FIVE_FIVE,<0t17=%xmm1
+mulsd FIVE_FIVE,%xmm1
+
+# qhasm: float6464 0r5[0] += 0t17[0]
+# asm 1: addsd <0t17=int6464#2,<0r5=int6464#1
+# asm 2: addsd <0t17=%xmm1,<0r5=%xmm0
+addsd %xmm1,%xmm0
+
+# qhasm: 0t20 = 0r20
+# asm 1: movdqa <0r20=int6464#10,>0t20=int6464#2
+# asm 2: movdqa <0r20=%xmm9,>0t20=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm: float6464 0t20[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0t20=int6464#2
+# asm 2: mulsd SIX_SIX,<0t20=%xmm1
+mulsd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r5[0] -= 0t20[0]
+# asm 1: subsd <0t20=int6464#2,<0r5=int6464#1
+# asm 2: subsd <0t20=%xmm1,<0r5=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(float64 *)(0mysp + 40) = 0r5[0]
+# asm 1: movlpd <0r5=int6464#1,40(<0mysp=int64#4)
+# asm 2: movlpd <0r5=%xmm0,40(<0mysp=%rcx)
+movlpd %xmm0,40(%rcx)
+
+# qhasm: 0r8[0] = *(float64 *)(0mysp + 64)
+# asm 1: movlpd 64(<0mysp=int64#4),>0r8=int6464#1
+# asm 2: movlpd 64(<0mysp=%rcx),>0r8=%xmm0
+movlpd 64(%rcx),%xmm0
+
+# qhasm: 0t14 = 0r14
+# asm 1: movdqa <0r14=int6464#4,>0t14=int6464#2
+# asm 2: movdqa <0r14=%xmm3,>0t14=%xmm1
+movdqa %xmm3,%xmm1
+
+# qhasm: float6464 0t14[0] *= FOUR_FOUR
+# asm 1: mulsd FOUR_FOUR,<0t14=int6464#2
+# asm 2: mulsd FOUR_FOUR,<0t14=%xmm1
+mulsd FOUR_FOUR,%xmm1
+
+# qhasm: float6464 0r8[0] -= 0t14[0]
+# asm 1: subsd <0t14=int6464#2,<0r8=int6464#1
+# asm 2: subsd <0t14=%xmm1,<0r8=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: 0t17 = 0r17
+# asm 1: movdqa <0r17=int6464#7,>0t17=int6464#2
+# asm 2: movdqa <0r17=%xmm6,>0t17=%xmm1
+movdqa %xmm6,%xmm1
+
+# qhasm: float6464 0t17[0] *= THREE_THREE
+# asm 1: mulsd THREE_THREE,<0t17=int6464#2
+# asm 2: mulsd THREE_THREE,<0t17=%xmm1
+mulsd THREE_THREE,%xmm1
+
+# qhasm: float6464 0r8[0] += 0t17[0]
+# asm 1: addsd <0t17=int6464#2,<0r8=int6464#1
+# asm 2: addsd <0t17=%xmm1,<0r8=%xmm0
+addsd %xmm1,%xmm0
+
+# qhasm: 0t20 = 0r20
+# asm 1: movdqa <0r20=int6464#10,>0t20=int6464#2
+# asm 2: movdqa <0r20=%xmm9,>0t20=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm: float6464 0t20[0] *= THREE_THREE
+# asm 1: mulsd THREE_THREE,<0t20=int6464#2
+# asm 2: mulsd THREE_THREE,<0t20=%xmm1
+mulsd THREE_THREE,%xmm1
+
+# qhasm: float6464 0r8[0] -= 0t20[0]
+# asm 1: subsd <0t20=int6464#2,<0r8=int6464#1
+# asm 2: subsd <0t20=%xmm1,<0r8=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: *(float64 *)(0mysp + 64) = 0r8[0]
+# asm 1: movlpd <0r8=int6464#1,64(<0mysp=int64#4)
+# asm 2: movlpd <0r8=%xmm0,64(<0mysp=%rcx)
+movlpd %xmm0,64(%rcx)
+
+# qhasm: 0r11[0] = *(float64 *)(0mysp + 88)
+# asm 1: movlpd 88(<0mysp=int64#4),>0r11=int6464#1
+# asm 2: movlpd 88(<0mysp=%rcx),>0r11=%xmm0
+movlpd 88(%rcx),%xmm0
+
+# qhasm: 0t14 = 0r14
+# asm 1: movdqa <0r14=int6464#4,>0t14=int6464#2
+# asm 2: movdqa <0r14=%xmm3,>0t14=%xmm1
+movdqa %xmm3,%xmm1
+
+# qhasm: float6464 0t14[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0t14=int6464#2
+# asm 2: mulsd SIX_SIX,<0t14=%xmm1
+mulsd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r11[0] -= 0t14[0]
+# asm 1: subsd <0t14=int6464#2,<0r11=int6464#1
+# asm 2: subsd <0t14=%xmm1,<0r11=%xmm0
+subsd %xmm1,%xmm0
+
+# qhasm: 0t17 = 0r17
+# asm 1: movdqa <0r17=int6464#7,>0t17=int6464#2
+# asm 2: movdqa <0r17=%xmm6,>0t17=%xmm1
+movdqa %xmm6,%xmm1
+
+# qhasm: float6464 0t17[0] *= TWO_TWO
+# asm 1: mulsd TWO_TWO,<0t17=int6464#2
+# asm 2: mulsd TWO_TWO,<0t17=%xmm1
+mulsd TWO_TWO,%xmm1
+
+# qhasm: float6464 0r11[0] += 0t17[0]
+# asm 1: addsd <0t17=int6464#2,<0r11=int6464#1
+# asm 2: addsd <0t17=%xmm1,<0r11=%xmm0
+addsd %xmm1,%xmm0
+
+# qhasm: 0t20 = 0r20
+# asm 1: movdqa <0r20=int6464#10,>0t20=int6464#2
+# asm 2: movdqa <0r20=%xmm9,>0t20=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm: float6464 0t20[0] *= SIX_SIX
+# asm 1: mulsd SIX_SIX,<0t20=int6464#2
+# asm 2: mulsd SIX_SIX,<0t20=%xmm1
+mulsd SIX_SIX,%xmm1
+
+# qhasm: float6464 0r11[0] += 0t20[0]
+# asm 1: addsd <0t20=int6464#2,<0r11=int6464#1
+# asm 2: addsd <0t20=%xmm1,<0r11=%xmm0
+addsd %xmm1,%xmm0
+
+# qhasm: *(float64 *)(0mysp + 88) = 0r11[0]
+# asm 1: movlpd <0r11=int6464#1,88(<0mysp=int64#4)
+# asm 2: movlpd <0r11=%xmm0,88(<0mysp=%rcx)
+movlpd %xmm0,88(%rcx)
+
+# qhasm: int6464 0round
+
+# qhasm: int6464 0carry
+
+# qhasm: int6464 1t6
+
+# qhasm: r0[0] = *(float64 *)(0mysp + 0)
+# asm 1: movlpd 0(<0mysp=int64#4),>r0=int6464#1
+# asm 2: movlpd 0(<0mysp=%rcx),>r0=%xmm0
+movlpd 0(%rcx),%xmm0
+
+# qhasm: r1[0] = *(float64 *)(0mysp + 8)
+# asm 1: movlpd 8(<0mysp=int64#4),>r1=int6464#2
+# asm 2: movlpd 8(<0mysp=%rcx),>r1=%xmm1
+movlpd 8(%rcx),%xmm1
+
+# qhasm: r2[0] = *(float64 *)(0mysp + 16)
+# asm 1: movlpd 16(<0mysp=int64#4),>r2=int6464#3
+# asm 2: movlpd 16(<0mysp=%rcx),>r2=%xmm2
+movlpd 16(%rcx),%xmm2
+
+# qhasm: r3[0] = *(float64 *)(0mysp + 24)
+# asm 1: movlpd 24(<0mysp=int64#4),>r3=int6464#4
+# asm 2: movlpd 24(<0mysp=%rcx),>r3=%xmm3
+movlpd 24(%rcx),%xmm3
+
+# qhasm: r4[0] = *(float64 *)(0mysp + 32)
+# asm 1: movlpd 32(<0mysp=int64#4),>r4=int6464#5
+# asm 2: movlpd 32(<0mysp=%rcx),>r4=%xmm4
+movlpd 32(%rcx),%xmm4
+
+# qhasm: r5[0] = *(float64 *)(0mysp + 40)
+# asm 1: movlpd 40(<0mysp=int64#4),>r5=int6464#6
+# asm 2: movlpd 40(<0mysp=%rcx),>r5=%xmm5
+movlpd 40(%rcx),%xmm5
+
+# qhasm: r6[0] = *(float64 *)(0mysp + 48)
+# asm 1: movlpd 48(<0mysp=int64#4),>r6=int6464#7
+# asm 2: movlpd 48(<0mysp=%rcx),>r6=%xmm6
+movlpd 48(%rcx),%xmm6
+
+# qhasm: r7[0] = *(float64 *)(0mysp + 56)
+# asm 1: movlpd 56(<0mysp=int64#4),>r7=int6464#8
+# asm 2: movlpd 56(<0mysp=%rcx),>r7=%xmm7
+movlpd 56(%rcx),%xmm7
+
+# qhasm: r8[0] = *(float64 *)(0mysp + 64)
+# asm 1: movlpd 64(<0mysp=int64#4),>r8=int6464#9
+# asm 2: movlpd 64(<0mysp=%rcx),>r8=%xmm8
+movlpd 64(%rcx),%xmm8
+
+# qhasm: r9[0] = *(float64 *)(0mysp + 72)
+# asm 1: movlpd 72(<0mysp=int64#4),>r9=int6464#10
+# asm 2: movlpd 72(<0mysp=%rcx),>r9=%xmm9
+movlpd 72(%rcx),%xmm9
+
+# qhasm: r10[0] = *(float64 *)(0mysp + 80)
+# asm 1: movlpd 80(<0mysp=int64#4),>r10=int6464#11
+# asm 2: movlpd 80(<0mysp=%rcx),>r10=%xmm10
+movlpd 80(%rcx),%xmm10
+
+# qhasm: r11[0] = *(float64 *)(0mysp + 88)
+# asm 1: movlpd 88(<0mysp=int64#4),>r11=int6464#12
+# asm 2: movlpd 88(<0mysp=%rcx),>r11=%xmm11
+movlpd 88(%rcx),%xmm11
+
+# qhasm: 0round = ROUND_ROUND
+# asm 1: movdqa ROUND_ROUND,<0round=int6464#13
+# asm 2: movdqa ROUND_ROUND,<0round=%xmm12
+movdqa ROUND_ROUND,%xmm12
+
+# qhasm: 0carry = r1
+# asm 1: movdqa <r1=int6464#2,>0carry=int6464#14
+# asm 2: movdqa <r1=%xmm1,>0carry=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm: float6464 0carry[0] *= VINV_VINV
+# asm 1: mulsd VINV_VINV,<0carry=int6464#14
+# asm 2: mulsd VINV_VINV,<0carry=%xmm13
+mulsd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r2[0] += 0carry[0]
+# asm 1: addsd <0carry=int6464#14,<r2=int6464#3
+# asm 2: addsd <0carry=%xmm13,<r2=%xmm2
+addsd %xmm13,%xmm2
+
+# qhasm: float6464 0carry[0] *= V_V
+# asm 1: mulsd V_V,<0carry=int6464#14
+# asm 2: mulsd V_V,<0carry=%xmm13
+mulsd V_V,%xmm13
+
+# qhasm: float6464 r1[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r1=int6464#2
+# asm 2: subsd <0carry=%xmm13,<r1=%xmm1
+subsd %xmm13,%xmm1
+
+# qhasm: 0carry = r4
+# asm 1: movdqa <r4=int6464#5,>0carry=int6464#14
+# asm 2: movdqa <r4=%xmm4,>0carry=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0carry[0] *= VINV_VINV
+# asm 1: mulsd VINV_VINV,<0carry=int6464#14
+# asm 2: mulsd VINV_VINV,<0carry=%xmm13
+mulsd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r5[0] += 0carry[0]
+# asm 1: addsd <0carry=int6464#14,<r5=int6464#6
+# asm 2: addsd <0carry=%xmm13,<r5=%xmm5
+addsd %xmm13,%xmm5
+
+# qhasm: float6464 0carry[0] *= V_V
+# asm 1: mulsd V_V,<0carry=int6464#14
+# asm 2: mulsd V_V,<0carry=%xmm13
+mulsd V_V,%xmm13
+
+# qhasm: float6464 r4[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r4=int6464#5
+# asm 2: subsd <0carry=%xmm13,<r4=%xmm4
+subsd %xmm13,%xmm4
+
+# qhasm: 0carry = r7
+# asm 1: movdqa <r7=int6464#8,>0carry=int6464#14
+# asm 2: movdqa <r7=%xmm7,>0carry=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 0carry[0] *= VINV_VINV
+# asm 1: mulsd VINV_VINV,<0carry=int6464#14
+# asm 2: mulsd VINV_VINV,<0carry=%xmm13
+mulsd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r8[0] += 0carry[0]
+# asm 1: addsd <0carry=int6464#14,<r8=int6464#9
+# asm 2: addsd <0carry=%xmm13,<r8=%xmm8
+addsd %xmm13,%xmm8
+
+# qhasm: float6464 0carry[0] *= V_V
+# asm 1: mulsd V_V,<0carry=int6464#14
+# asm 2: mulsd V_V,<0carry=%xmm13
+mulsd V_V,%xmm13
+
+# qhasm: float6464 r7[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r7=int6464#8
+# asm 2: subsd <0carry=%xmm13,<r7=%xmm7
+subsd %xmm13,%xmm7
+
+# qhasm: 0carry = r10
+# asm 1: movdqa <r10=int6464#11,>0carry=int6464#14
+# asm 2: movdqa <r10=%xmm10,>0carry=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 0carry[0] *= VINV_VINV
+# asm 1: mulsd VINV_VINV,<0carry=int6464#14
+# asm 2: mulsd VINV_VINV,<0carry=%xmm13
+mulsd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r11[0] += 0carry[0]
+# asm 1: addsd <0carry=int6464#14,<r11=int6464#12
+# asm 2: addsd <0carry=%xmm13,<r11=%xmm11
+addsd %xmm13,%xmm11
+
+# qhasm: float6464 0carry[0] *= V_V
+# asm 1: mulsd V_V,<0carry=int6464#14
+# asm 2: mulsd V_V,<0carry=%xmm13
+mulsd V_V,%xmm13
+
+# qhasm: float6464 r10[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r10=int6464#11
+# asm 2: subsd <0carry=%xmm13,<r10=%xmm10
+subsd %xmm13,%xmm10
+
+# qhasm: 0carry = r2
+# asm 1: movdqa <r2=int6464#3,>0carry=int6464#14
+# asm 2: movdqa <r2=%xmm2,>0carry=%xmm13
+movdqa %xmm2,%xmm13
+
+# qhasm: float6464 0carry[0] *= VINV_VINV
+# asm 1: mulsd VINV_VINV,<0carry=int6464#14
+# asm 2: mulsd VINV_VINV,<0carry=%xmm13
+mulsd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r3[0] += 0carry[0]
+# asm 1: addsd <0carry=int6464#14,<r3=int6464#4
+# asm 2: addsd <0carry=%xmm13,<r3=%xmm3
+addsd %xmm13,%xmm3
+
+# qhasm: float6464 0carry[0] *= V_V
+# asm 1: mulsd V_V,<0carry=int6464#14
+# asm 2: mulsd V_V,<0carry=%xmm13
+mulsd V_V,%xmm13
+
+# qhasm: float6464 r2[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r2=int6464#3
+# asm 2: subsd <0carry=%xmm13,<r2=%xmm2
+subsd %xmm13,%xmm2
+
+# qhasm: 0carry = r5
+# asm 1: movdqa <r5=int6464#6,>0carry=int6464#14
+# asm 2: movdqa <r5=%xmm5,>0carry=%xmm13
+movdqa %xmm5,%xmm13
+
+# qhasm: float6464 0carry[0] *= VINV_VINV
+# asm 1: mulsd VINV_VINV,<0carry=int6464#14
+# asm 2: mulsd VINV_VINV,<0carry=%xmm13
+mulsd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r6[0] += 0carry[0]
+# asm 1: addsd <0carry=int6464#14,<r6=int6464#7
+# asm 2: addsd <0carry=%xmm13,<r6=%xmm6
+addsd %xmm13,%xmm6
+
+# qhasm: float6464 0carry[0] *= V_V
+# asm 1: mulsd V_V,<0carry=int6464#14
+# asm 2: mulsd V_V,<0carry=%xmm13
+mulsd V_V,%xmm13
+
+# qhasm: float6464 r5[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r5=int6464#6
+# asm 2: subsd <0carry=%xmm13,<r5=%xmm5
+subsd %xmm13,%xmm5
+
+# qhasm: 0carry = r8
+# asm 1: movdqa <r8=int6464#9,>0carry=int6464#14
+# asm 2: movdqa <r8=%xmm8,>0carry=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm: float6464 0carry[0] *= VINV_VINV
+# asm 1: mulsd VINV_VINV,<0carry=int6464#14
+# asm 2: mulsd VINV_VINV,<0carry=%xmm13
+mulsd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r9[0] += 0carry[0]
+# asm 1: addsd <0carry=int6464#14,<r9=int6464#10
+# asm 2: addsd <0carry=%xmm13,<r9=%xmm9
+addsd %xmm13,%xmm9
+
+# qhasm: float6464 0carry[0] *= V_V
+# asm 1: mulsd V_V,<0carry=int6464#14
+# asm 2: mulsd V_V,<0carry=%xmm13
+mulsd V_V,%xmm13
+
+# qhasm: float6464 r8[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r8=int6464#9
+# asm 2: subsd <0carry=%xmm13,<r8=%xmm8
+subsd %xmm13,%xmm8
+
+# qhasm: 0carry = r11
+# asm 1: movdqa <r11=int6464#12,>0carry=int6464#14
+# asm 2: movdqa <r11=%xmm11,>0carry=%xmm13
+movdqa %xmm11,%xmm13
+
+# qhasm: float6464 0carry[0] *= VINV_VINV
+# asm 1: mulsd VINV_VINV,<0carry=int6464#14
+# asm 2: mulsd VINV_VINV,<0carry=%xmm13
+mulsd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r0[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r0=int6464#1
+# asm 2: subsd <0carry=%xmm13,<r0=%xmm0
+subsd %xmm13,%xmm0
+
+# qhasm: float6464 r3[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r3=int6464#4
+# asm 2: subsd <0carry=%xmm13,<r3=%xmm3
+subsd %xmm13,%xmm3
+
+# qhasm: 1t6 = 0carry
+# asm 1: movdqa <0carry=int6464#14,>1t6=int6464#15
+# asm 2: movdqa <0carry=%xmm13,>1t6=%xmm14
+movdqa %xmm13,%xmm14
+
+# qhasm: float6464 1t6[0] *= FOUR_FOUR
+# asm 1: mulsd FOUR_FOUR,<1t6=int6464#15
+# asm 2: mulsd FOUR_FOUR,<1t6=%xmm14
+mulsd FOUR_FOUR,%xmm14
+
+# qhasm: float6464 r6[0] -= 1t6[0]
+# asm 1: subsd <1t6=int6464#15,<r6=int6464#7
+# asm 2: subsd <1t6=%xmm14,<r6=%xmm6
+subsd %xmm14,%xmm6
+
+# qhasm: float6464 r9[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r9=int6464#10
+# asm 2: subsd <0carry=%xmm13,<r9=%xmm9
+subsd %xmm13,%xmm9
+
+# qhasm: float6464 0carry[0] *= V_V
+# asm 1: mulsd V_V,<0carry=int6464#14
+# asm 2: mulsd V_V,<0carry=%xmm13
+mulsd V_V,%xmm13
+
+# qhasm: float6464 r11[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r11=int6464#12
+# asm 2: subsd <0carry=%xmm13,<r11=%xmm11
+subsd %xmm13,%xmm11
+
+# qhasm: 0carry = r0
+# asm 1: movdqa <r0=int6464#1,>0carry=int6464#14
+# asm 2: movdqa <r0=%xmm0,>0carry=%xmm13
+movdqa %xmm0,%xmm13
+
+# qhasm: float6464 0carry[0] *= V6INV_V6INV
+# asm 1: mulsd V6INV_V6INV,<0carry=int6464#14
+# asm 2: mulsd V6INV_V6INV,<0carry=%xmm13
+mulsd V6INV_V6INV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r1[0] += 0carry[0]
+# asm 1: addsd <0carry=int6464#14,<r1=int6464#2
+# asm 2: addsd <0carry=%xmm13,<r1=%xmm1
+addsd %xmm13,%xmm1
+
+# qhasm: float6464 0carry[0] *= V6_V6
+# asm 1: mulsd V6_V6,<0carry=int6464#14
+# asm 2: mulsd V6_V6,<0carry=%xmm13
+mulsd V6_V6,%xmm13
+
+# qhasm: float6464 r0[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r0=int6464#1
+# asm 2: subsd <0carry=%xmm13,<r0=%xmm0
+subsd %xmm13,%xmm0
+
+# qhasm: 0carry = r3
+# asm 1: movdqa <r3=int6464#4,>0carry=int6464#14
+# asm 2: movdqa <r3=%xmm3,>0carry=%xmm13
+movdqa %xmm3,%xmm13
+
+# qhasm: float6464 0carry[0] *= VINV_VINV
+# asm 1: mulsd VINV_VINV,<0carry=int6464#14
+# asm 2: mulsd VINV_VINV,<0carry=%xmm13
+mulsd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r4[0] += 0carry[0]
+# asm 1: addsd <0carry=int6464#14,<r4=int6464#5
+# asm 2: addsd <0carry=%xmm13,<r4=%xmm4
+addsd %xmm13,%xmm4
+
+# qhasm: float6464 0carry[0] *= V_V
+# asm 1: mulsd V_V,<0carry=int6464#14
+# asm 2: mulsd V_V,<0carry=%xmm13
+mulsd V_V,%xmm13
+
+# qhasm: float6464 r3[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r3=int6464#4
+# asm 2: subsd <0carry=%xmm13,<r3=%xmm3
+subsd %xmm13,%xmm3
+
+# qhasm: 0carry = r6
+# asm 1: movdqa <r6=int6464#7,>0carry=int6464#14
+# asm 2: movdqa <r6=%xmm6,>0carry=%xmm13
+movdqa %xmm6,%xmm13
+
+# qhasm: float6464 0carry[0] *= V6INV_V6INV
+# asm 1: mulsd V6INV_V6INV,<0carry=int6464#14
+# asm 2: mulsd V6INV_V6INV,<0carry=%xmm13
+mulsd V6INV_V6INV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r7[0] += 0carry[0]
+# asm 1: addsd <0carry=int6464#14,<r7=int6464#8
+# asm 2: addsd <0carry=%xmm13,<r7=%xmm7
+addsd %xmm13,%xmm7
+
+# qhasm: float6464 0carry[0] *= V6_V6
+# asm 1: mulsd V6_V6,<0carry=int6464#14
+# asm 2: mulsd V6_V6,<0carry=%xmm13
+mulsd V6_V6,%xmm13
+
+# qhasm: float6464 r6[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r6=int6464#7
+# asm 2: subsd <0carry=%xmm13,<r6=%xmm6
+subsd %xmm13,%xmm6
+
+# qhasm: 0carry = r9
+# asm 1: movdqa <r9=int6464#10,>0carry=int6464#14
+# asm 2: movdqa <r9=%xmm9,>0carry=%xmm13
+movdqa %xmm9,%xmm13
+
+# qhasm: float6464 0carry[0] *= VINV_VINV
+# asm 1: mulsd VINV_VINV,<0carry=int6464#14
+# asm 2: mulsd VINV_VINV,<0carry=%xmm13
+mulsd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r10[0] += 0carry[0]
+# asm 1: addsd <0carry=int6464#14,<r10=int6464#11
+# asm 2: addsd <0carry=%xmm13,<r10=%xmm10
+addsd %xmm13,%xmm10
+
+# qhasm: float6464 0carry[0] *= V_V
+# asm 1: mulsd V_V,<0carry=int6464#14
+# asm 2: mulsd V_V,<0carry=%xmm13
+mulsd V_V,%xmm13
+
+# qhasm: float6464 r9[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r9=int6464#10
+# asm 2: subsd <0carry=%xmm13,<r9=%xmm9
+subsd %xmm13,%xmm9
+
+# qhasm: 0carry = r1
+# asm 1: movdqa <r1=int6464#2,>0carry=int6464#14
+# asm 2: movdqa <r1=%xmm1,>0carry=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm: float6464 0carry[0] *= VINV_VINV
+# asm 1: mulsd VINV_VINV,<0carry=int6464#14
+# asm 2: mulsd VINV_VINV,<0carry=%xmm13
+mulsd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r2[0] += 0carry[0]
+# asm 1: addsd <0carry=int6464#14,<r2=int6464#3
+# asm 2: addsd <0carry=%xmm13,<r2=%xmm2
+addsd %xmm13,%xmm2
+
+# qhasm: float6464 0carry[0] *= V_V
+# asm 1: mulsd V_V,<0carry=int6464#14
+# asm 2: mulsd V_V,<0carry=%xmm13
+mulsd V_V,%xmm13
+
+# qhasm: float6464 r1[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r1=int6464#2
+# asm 2: subsd <0carry=%xmm13,<r1=%xmm1
+subsd %xmm13,%xmm1
+
+# qhasm: 0carry = r4
+# asm 1: movdqa <r4=int6464#5,>0carry=int6464#14
+# asm 2: movdqa <r4=%xmm4,>0carry=%xmm13
+movdqa %xmm4,%xmm13
+
+# qhasm: float6464 0carry[0] *= VINV_VINV
+# asm 1: mulsd VINV_VINV,<0carry=int6464#14
+# asm 2: mulsd VINV_VINV,<0carry=%xmm13
+mulsd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r5[0] += 0carry[0]
+# asm 1: addsd <0carry=int6464#14,<r5=int6464#6
+# asm 2: addsd <0carry=%xmm13,<r5=%xmm5
+addsd %xmm13,%xmm5
+
+# qhasm: float6464 0carry[0] *= V_V
+# asm 1: mulsd V_V,<0carry=int6464#14
+# asm 2: mulsd V_V,<0carry=%xmm13
+mulsd V_V,%xmm13
+
+# qhasm: float6464 r4[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r4=int6464#5
+# asm 2: subsd <0carry=%xmm13,<r4=%xmm4
+subsd %xmm13,%xmm4
+
+# qhasm: 0carry = r7
+# asm 1: movdqa <r7=int6464#8,>0carry=int6464#14
+# asm 2: movdqa <r7=%xmm7,>0carry=%xmm13
+movdqa %xmm7,%xmm13
+
+# qhasm: float6464 0carry[0] *= VINV_VINV
+# asm 1: mulsd VINV_VINV,<0carry=int6464#14
+# asm 2: mulsd VINV_VINV,<0carry=%xmm13
+mulsd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r8[0] += 0carry[0]
+# asm 1: addsd <0carry=int6464#14,<r8=int6464#9
+# asm 2: addsd <0carry=%xmm13,<r8=%xmm8
+addsd %xmm13,%xmm8
+
+# qhasm: float6464 0carry[0] *= V_V
+# asm 1: mulsd V_V,<0carry=int6464#14
+# asm 2: mulsd V_V,<0carry=%xmm13
+mulsd V_V,%xmm13
+
+# qhasm: float6464 r7[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r7=int6464#8
+# asm 2: subsd <0carry=%xmm13,<r7=%xmm7
+subsd %xmm13,%xmm7
+
+# qhasm: 0carry = r10
+# asm 1: movdqa <r10=int6464#11,>0carry=int6464#14
+# asm 2: movdqa <r10=%xmm10,>0carry=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm: float6464 0carry[0] *= VINV_VINV
+# asm 1: mulsd VINV_VINV,<0carry=int6464#14
+# asm 2: mulsd VINV_VINV,<0carry=%xmm13
+mulsd VINV_VINV,%xmm13
+
+# qhasm: float6464 0carry[0] += 0round[0]
+# asm 1: addsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: addsd <0round=%xmm12,<0carry=%xmm13
+addsd %xmm12,%xmm13
+
+# qhasm: float6464 0carry[0] -= 0round[0]
+# asm 1: subsd <0round=int6464#13,<0carry=int6464#14
+# asm 2: subsd <0round=%xmm12,<0carry=%xmm13
+subsd %xmm12,%xmm13
+
+# qhasm: float6464 r11[0] += 0carry[0]
+# asm 1: addsd <0carry=int6464#14,<r11=int6464#12
+# asm 2: addsd <0carry=%xmm13,<r11=%xmm11
+addsd %xmm13,%xmm11
+
+# qhasm: float6464 0carry[0] *= V_V
+# asm 1: mulsd V_V,<0carry=int6464#14
+# asm 2: mulsd V_V,<0carry=%xmm13
+mulsd V_V,%xmm13
+
+# qhasm: float6464 r10[0] -= 0carry[0]
+# asm 1: subsd <0carry=int6464#14,<r10=int6464#11
+# asm 2: subsd <0carry=%xmm13,<r10=%xmm10
+subsd %xmm13,%xmm10
+
+# qhasm: *(float64 *)(rop +  0) = r0[0]
+# asm 1: movlpd <r0=int6464#1,0(<rop=int64#1)
+# asm 2: movlpd <r0=%xmm0,0(<rop=%rdi)
+movlpd %xmm0,0(%rdi)
+
+# qhasm: *(float64 *)(rop +  8) = r1[0]
+# asm 1: movlpd <r1=int6464#2,8(<rop=int64#1)
+# asm 2: movlpd <r1=%xmm1,8(<rop=%rdi)
+movlpd %xmm1,8(%rdi)
+
+# qhasm: *(float64 *)(rop + 16) = r2[0]
+# asm 1: movlpd <r2=int6464#3,16(<rop=int64#1)
+# asm 2: movlpd <r2=%xmm2,16(<rop=%rdi)
+movlpd %xmm2,16(%rdi)
+
+# qhasm: *(float64 *)(rop + 24) = r3[0]
+# asm 1: movlpd <r3=int6464#4,24(<rop=int64#1)
+# asm 2: movlpd <r3=%xmm3,24(<rop=%rdi)
+movlpd %xmm3,24(%rdi)
+
+# qhasm: *(float64 *)(rop + 32) = r4[0]
+# asm 1: movlpd <r4=int6464#5,32(<rop=int64#1)
+# asm 2: movlpd <r4=%xmm4,32(<rop=%rdi)
+movlpd %xmm4,32(%rdi)
+
+# qhasm: *(float64 *)(rop + 40) = r5[0]
+# asm 1: movlpd <r5=int6464#6,40(<rop=int64#1)
+# asm 2: movlpd <r5=%xmm5,40(<rop=%rdi)
+movlpd %xmm5,40(%rdi)
+
+# qhasm: *(float64 *)(rop + 48) = r6[0]
+# asm 1: movlpd <r6=int6464#7,48(<rop=int64#1)
+# asm 2: movlpd <r6=%xmm6,48(<rop=%rdi)
+movlpd %xmm6,48(%rdi)
+
+# qhasm: *(float64 *)(rop + 56) = r7[0]
+# asm 1: movlpd <r7=int6464#8,56(<rop=int64#1)
+# asm 2: movlpd <r7=%xmm7,56(<rop=%rdi)
+movlpd %xmm7,56(%rdi)
+
+# qhasm: *(float64 *)(rop + 64) = r8[0]
+# asm 1: movlpd <r8=int6464#9,64(<rop=int64#1)
+# asm 2: movlpd <r8=%xmm8,64(<rop=%rdi)
+movlpd %xmm8,64(%rdi)
+
+# qhasm: *(float64 *)(rop + 72) = r9[0]
+# asm 1: movlpd <r9=int6464#10,72(<rop=int64#1)
+# asm 2: movlpd <r9=%xmm9,72(<rop=%rdi)
+movlpd %xmm9,72(%rdi)
+
+# qhasm: *(float64 *)(rop + 80) = r10[0]
+# asm 1: movlpd <r10=int6464#11,80(<rop=int64#1)
+# asm 2: movlpd <r10=%xmm10,80(<rop=%rdi)
+movlpd %xmm10,80(%rdi)
+
+# qhasm: *(float64 *)(rop + 88) = r11[0]
+# asm 1: movlpd <r11=int6464#12,88(<rop=int64#1)
+# asm 2: movlpd <r11=%xmm11,88(<rop=%rdi)
+movlpd %xmm11,88(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 313 - 0
dclxvi-20130329/gmp_convert.c

@@ -0,0 +1,313 @@
+/*
+ * File:   dclxvi-20130329/gmp_convert.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <gmp.h>
+//#include "parameters.h"
+
+
+//avant
+//#include "fpe.h"
+//#include "fp2e.h"
+//#include "fp6e.h"
+//#include "fp12e.h"
+
+
+//APRES
+#include "mul.h"
+extern "C" {	
+#include "fpe.h"
+#include "fp2e.h"
+#include "fp6e.h"
+#include "fp12e.h"
+} 
+
+extern const double bn_v;
+extern const double bn_v6;
+extern const char * bn_pstr;
+
+#ifndef CHECK
+static long long ftoll(double arg)
+{
+  return (long long)arg;
+}
+#endif
+
+void fp2mpz(mpz_t rop, const fpe_t op)
+{
+  mpz_t v, vp, t, p;
+  mpz_init_set_ui(v, (long)bn_v);
+  mpz_init_set_str(p, bn_pstr, 10);
+  mpz_init(t);
+  mpz_init(vp);
+
+  mpz_set_si(rop, ftoll(op->v[0]));
+  mpz_set_si(t,ftoll(op->v[1]));
+  mpz_mul(t,t,v);
+  mpz_mul_si(t,t,6);
+  mpz_mul(vp,v,v);
+  mpz_add(rop, rop, t);
+  mpz_set_si(t,ftoll(op->v[2]));
+  mpz_mul(t,t,vp);
+  mpz_mul_si(t,t,6);
+  mpz_mul(vp,vp,v);
+  mpz_add(rop, rop, t);
+  mpz_set_si(t,ftoll(op->v[3]));
+  mpz_mul(t,t,vp);
+  mpz_mul_si(t,t,6);
+  mpz_mul(vp,vp,v);
+  mpz_add(rop, rop, t);
+  mpz_set_si(t,ftoll(op->v[4]));
+  mpz_mul(t,t,vp);
+  mpz_mul_si(t,t,6);
+  mpz_mul(vp,vp,v);
+  mpz_add(rop, rop, t);
+  mpz_set_si(t,ftoll(op->v[5]));
+  mpz_mul(t,t,vp);
+  mpz_mul_si(t,t,6);
+  mpz_mul(vp,vp,v);
+  mpz_add(rop, rop, t);
+  mpz_set_si(t,ftoll(op->v[6]));
+  mpz_mul(t,t,vp);
+  mpz_mul_si(t,t,6);
+  mpz_mul(vp,vp,v);
+  mpz_add(rop, rop, t);
+  mpz_set_si(t,ftoll(op->v[7]));
+  mpz_mul(t,t,vp);
+  mpz_mul_si(t,t,36);
+  mpz_mul(vp,vp,v);
+  mpz_add(rop, rop, t);
+  mpz_set_si(t,ftoll(op->v[8]));
+  mpz_mul(t,t,vp);
+  mpz_mul_si(t,t,36);
+  mpz_mul(vp,vp,v);
+  mpz_add(rop, rop, t);
+  mpz_set_si(t,ftoll(op->v[9]));
+  mpz_mul(t,t,vp);
+  mpz_mul_si(t,t,36);
+  mpz_mul(vp,vp,v);
+  mpz_add(rop, rop, t);
+  mpz_set_si(t,ftoll(op->v[10]));
+  mpz_mul(t,t,vp);
+  mpz_mul_si(t,t,36);
+  mpz_mul(vp,vp,v);
+  mpz_add(rop, rop, t);
+  mpz_set_si(t,ftoll(op->v[11]));
+  mpz_mul(t,t,vp);
+  mpz_mul_si(t,t,36);
+  mpz_mul(vp,vp,v);
+  mpz_add(rop, rop, t);
+  mpz_mod(rop, rop, p);
+  mpz_clear(v);
+  mpz_clear(p);
+  mpz_clear(t);
+  mpz_clear(vp);
+}
+
+void mpz2fp(fpe_t rop, const mpz_t op)
+{
+  mpz_t t, r;
+  mpz_init(r);
+  mpz_init_set(t, op);
+  mpz_tdiv_qr_ui(t, r, t, (long)bn_v6);
+  rop->v[0] = mpz_get_d(r);
+  mpz_tdiv_qr_ui(t, r, t, (long)bn_v);
+  rop->v[1] = mpz_get_d(r);
+  mpz_tdiv_qr_ui(t, r, t, (long)bn_v);
+  rop->v[2] = mpz_get_d(r);
+  mpz_tdiv_qr_ui(t, r, t, (long)bn_v);
+  rop->v[3] = mpz_get_d(r);
+  mpz_tdiv_qr_ui(t, r, t, (long)bn_v);
+  rop->v[4] = mpz_get_d(r);
+  mpz_tdiv_qr_ui(t, r, t, (long)bn_v);
+  rop->v[5] = mpz_get_d(r);
+  mpz_tdiv_qr_ui(t, r, t, (long)bn_v6);
+  rop->v[6] = mpz_get_d(r);
+  mpz_tdiv_qr_ui(t, r, t, (long)bn_v);
+  rop->v[7] = mpz_get_d(r);
+  mpz_tdiv_qr_ui(t, r, t, (long)bn_v);
+  rop->v[8] = mpz_get_d(r);
+  mpz_tdiv_qr_ui(t, r, t, (long)bn_v);
+  rop->v[9] = mpz_get_d(r);
+  mpz_tdiv_qr_ui(t, r, t, (long)bn_v);
+  rop->v[10] = mpz_get_d(r);
+  //mpz_tdiv_qr_ui(t, r, t, (long)bn_v);
+  rop->v[11] = mpz_get_d(t);
+
+  setmax(rop->v[0],(long)bn_v6/2);
+  setmax(rop->v[6],(long)bn_v6/2);
+
+  setmax(rop->v[1],(long)bn_v/2);
+  setmax(rop->v[3],(long)bn_v/2);
+  setmax(rop->v[4],(long)bn_v/2);
+  setmax(rop->v[7],(long)bn_v/2);
+  setmax(rop->v[9],(long)bn_v/2);
+  setmax(rop->v[10],(long)bn_v/2);
+
+  setmax(rop->v[2],(long)bn_v);
+  setmax(rop->v[5],(long)bn_v);
+  setmax(rop->v[8],(long)bn_v);
+  setmax(rop->v[11],(long)bn_v);
+  mpz_clear(r);
+  mpz_clear(t);
+}
+
+void fpe_out_str(FILE *outfile, const fpe_t op)
+{
+  mpz_t t;
+  mpz_init(t);
+  fp2mpz(t, op);
+  mpz_out_str(outfile, 10, t);
+  mpz_clear(t);
+}
+
+void fp2e_out_str(FILE *outfile, const fp2e_t op)
+{
+  fpe_t a, b;
+  mpz_t ma, mb;
+  mpz_init(ma);
+  mpz_init(mb);
+  int i;
+  for(i=0;i<12;i++)
+  {
+    b->v[i] = op->v[2*i];
+    a->v[i] = op->v[2*i+1];
+  }
+  fp2mpz(ma, a);
+  fp2mpz(mb, b);
+  fprintf(outfile, "(");
+  mpz_out_str(outfile, 10, ma);
+  fprintf(outfile, "*X + ");
+  mpz_out_str(outfile, 10, mb);
+  fprintf(outfile, ")");
+  mpz_clear(ma);
+  mpz_clear(mb);
+}
+
+void fp6e_out_str(FILE *outfile, const fp6e_t op)
+{
+	fprintf(outfile, "[");
+	fp2e_out_str(outfile, op->m_a);
+	fprintf(outfile, " * Y^2 + ");
+	fp2e_out_str(outfile, op->m_b);
+	fprintf(outfile, " * Y + ");
+	fp2e_out_str(outfile, op->m_c);
+	fprintf(outfile, "]");
+}
+
+void fp12e_out_str(FILE *outfile, const fp12e_t op)
+{
+	fp6e_out_str(outfile, op->m_a);
+	fprintf(outfile, " * Z + ");
+	fp6e_out_str(outfile, op->m_b);
+}
+
+/*
+void fp12e_out_magma(FILE *outfile, const fp12e_t op)
+{
+  fpe_t f[12];
+  fp2e_to_2fpe(f[10],f[11],(op->m_a)->m_a);
+  fp2e_to_2fpe(f[8],f[9],(op->m_a)->m_b);
+  fp2e_to_2fpe(f[6],f[7],(op->m_a)->m_c);
+  fp2e_to_2fpe(f[4],f[5],(op->m_b)->m_a);
+  fp2e_to_2fpe(f[2],f[3],(op->m_b)->m_b);
+  fp2e_to_2fpe(f[0],f[1],(op->m_b)->m_c);
+  int i;
+  fprintf(outfile, "[");
+  for(i=0;i<11;i++)
+  {
+    fprintf(outfile, "Fp!");
+    fpe_out_str(outfile, f[i]); 
+    fprintf(outfile,", ");
+  }
+  fprintf(outfile, "Fp!");
+  fpe_out_str(outfile, f[11]);
+  fprintf(outfile, "]");
+}
+*/
+
+int fp12e_iseq_gmp(const fp12e_t op1, const fp12e_t op2)
+{
+  fpe_t a[12];
+  fp2e_to_2fpe(a[10],a[11],(op1->m_a)->m_a);
+  fp2e_to_2fpe(a[8],a[9],(op1->m_a)->m_b);
+  fp2e_to_2fpe(a[6],a[7],(op1->m_a)->m_c);
+  fp2e_to_2fpe(a[4],a[5],(op1->m_b)->m_a);
+  fp2e_to_2fpe(a[2],a[3],(op1->m_b)->m_b);
+  fp2e_to_2fpe(a[0],a[1],(op1->m_b)->m_c);
+
+  fpe_t b[12];
+  fp2e_to_2fpe(b[10],b[11],(op2->m_a)->m_a);
+  fp2e_to_2fpe(b[8],b[9],(op2->m_a)->m_b);
+  fp2e_to_2fpe(b[6],b[7],(op2->m_a)->m_c);
+  fp2e_to_2fpe(b[4],b[5],(op2->m_b)->m_a);
+  fp2e_to_2fpe(b[2],b[3],(op2->m_b)->m_b);
+  fp2e_to_2fpe(b[0],b[1],(op2->m_b)->m_c);
+
+  int i;
+  mpz_t at, bt;
+  mpz_init(at);
+  mpz_init(bt);
+  for(i=0;i<12;i++)
+  {
+    fp2mpz(at, a[i]);
+    fp2mpz(bt, b[i]);
+    if(mpz_cmp(at,bt)) return 0;
+  }
+  mpz_clear(at);
+  mpz_clear(bt);
+  return 1;
+}
+
+int fp12e_iszero_gmp(const fp12e_t op)
+{
+  fpe_t a[12];
+  fp2e_to_2fpe(a[10],a[11],(op->m_a)->m_a);
+  fp2e_to_2fpe(a[8],a[9],(op->m_a)->m_b);
+  fp2e_to_2fpe(a[6],a[7],(op->m_a)->m_c);
+  fp2e_to_2fpe(a[4],a[5],(op->m_b)->m_a);
+  fp2e_to_2fpe(a[2],a[3],(op->m_b)->m_b);
+  fp2e_to_2fpe(a[0],a[1],(op->m_b)->m_c);
+
+  int i;
+  mpz_t at;
+  mpz_init(at);
+  int ret = 1;
+  for(i=0;i<12;i++)
+  {
+    fp2mpz(at, a[i]);
+    if(mpz_cmp_ui(at,0)) ret = 0;
+  }
+  mpz_clear(at);
+  return ret;
+}
+
+
+int fp12e_isone_gmp(const fp12e_t op)
+{
+  fpe_t a[12];
+  fp2e_to_2fpe(a[10],a[11],(op->m_a)->m_a);
+  fp2e_to_2fpe(a[8],a[9],(op->m_a)->m_b);
+  fp2e_to_2fpe(a[6],a[7],(op->m_a)->m_c);
+  fp2e_to_2fpe(a[4],a[5],(op->m_b)->m_a);
+  fp2e_to_2fpe(a[2],a[3],(op->m_b)->m_b);
+  fp2e_to_2fpe(a[0],a[1],(op->m_b)->m_c);
+
+  int i;
+  mpz_t at;
+  mpz_init(at);
+  int ret = 1;
+  for(i=1;i<12;i++)
+  {
+    fp2mpz(at, a[i]);
+    if(mpz_cmp_ui(at,0)) ret = 0;
+  }
+  fp2mpz(at, a[0]);
+  if(mpz_cmp_ui(at,1)) ret = 0;
+  mpz_clear(at);
+  return ret;
+}

+ 35 - 0
dclxvi-20130329/gmp_convert.h

@@ -0,0 +1,35 @@
+/*
+ * File:   dclxvi-20130329/gmp_convert.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef GMP_CONVERT_H
+#define GMP_CONVERT_H
+
+#include <stdio.h>
+#include <gmp.h>
+#include "fpe.h"
+#include "fp2e.h"
+#include "fp6e.h"
+#include "fp12e.h"
+
+void fp2mpz(mpz_t rop, const fpe_t op);
+
+void mpz2fp(fpe_t rop, const mpz_t op);
+
+void fpe_out_str(FILE *outfile, const fpe_t op);
+
+void fp2e_out_str(FILE *outfile, const fp2e_t op);
+
+void fp6e_out_str(FILE *outfile, const fp6e_t op);
+
+void fp12e_out_str(FILE *outfile, const fp12e_t op);
+
+//void fp12e_out_magma(FILE *outfile, const fp12e_t op);
+
+int fp12e_iseq_gmp(const fp12e_t op1, const fp12e_t op2);
+int fp12e_iszero_gmp(const fp12e_t op);
+int fp12e_isone_gmp(const fp12e_t op);
+
+#endif

+ 480 - 0
dclxvi-20130329/heap_rootreplaced.s

@@ -0,0 +1,480 @@
+# File:   dclxvi-20130329/heap_rootreplaced.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: int64 hp
+
+# qhasm: int64 hlen
+
+# qhasm: int64 sp
+
+# qhasm: int64 pp
+
+# qhasm: input hp
+
+# qhasm: input hlen
+
+# qhasm: input sp
+
+# qhasm: int64 prc
+
+# qhasm: int64 plc
+
+# qhasm: int64 pc
+
+# qhasm: int64 d
+
+# qhasm: int64 spp
+
+# qhasm: int64 sprc
+
+# qhasm: int64 spc
+
+# qhasm: int64 c0
+
+# qhasm: int64 c1
+
+# qhasm: int64 c2
+
+# qhasm: int64 c3
+
+# qhasm: int64 t0
+
+# qhasm: int64 t1
+
+# qhasm: int64 t2
+
+# qhasm: int64 t3
+
+# qhasm: int64 p0
+
+# qhasm: int64 p1
+
+# qhasm: int64 p2
+
+# qhasm: int64 p3
+
+# qhasm:   int64 caller1
+
+# qhasm:   int64 caller2
+
+# qhasm:   int64 caller3
+
+# qhasm:   int64 caller4
+
+# qhasm:   int64 caller5
+
+# qhasm:   int64 caller6
+
+# qhasm:   int64 caller7
+
+# qhasm:   caller caller1
+
+# qhasm:   caller caller2
+
+# qhasm:   caller caller3
+
+# qhasm:   caller caller4
+
+# qhasm:   caller caller5
+
+# qhasm:   caller caller6
+
+# qhasm:   caller caller7
+
+# qhasm:   stack64 caller1_stack
+
+# qhasm:   stack64 caller2_stack
+
+# qhasm:   stack64 caller3_stack
+
+# qhasm:   stack64 caller4_stack
+
+# qhasm:   stack64 caller5_stack
+
+# qhasm:   stack64 caller6_stack
+
+# qhasm:   stack64 caller7_stack
+
+# qhasm: enter heap_rootreplaced
+.text
+.p2align 5
+.globl _heap_rootreplaced
+.globl heap_rootreplaced
+_heap_rootreplaced:
+heap_rootreplaced:
+mov %rsp,%r11
+and $31,%r11
+add $64,%r11
+sub %r11,%rsp
+
+# qhasm: caller1_stack = caller1
+# asm 1: movq <caller1=int64#9,>caller1_stack=stack64#1
+# asm 2: movq <caller1=%r11,>caller1_stack=0(%rsp)
+movq %r11,0(%rsp)
+
+# qhasm: caller2_stack = caller2
+# asm 1: movq <caller2=int64#10,>caller2_stack=stack64#2
+# asm 2: movq <caller2=%r12,>caller2_stack=8(%rsp)
+movq %r12,8(%rsp)
+
+# qhasm: caller3_stack = caller3
+# asm 1: movq <caller3=int64#11,>caller3_stack=stack64#3
+# asm 2: movq <caller3=%r13,>caller3_stack=16(%rsp)
+movq %r13,16(%rsp)
+
+# qhasm: caller4_stack = caller4
+# asm 1: movq <caller4=int64#12,>caller4_stack=stack64#4
+# asm 2: movq <caller4=%r14,>caller4_stack=24(%rsp)
+movq %r14,24(%rsp)
+
+# qhasm: caller5_stack = caller5
+# asm 1: movq <caller5=int64#13,>caller5_stack=stack64#5
+# asm 2: movq <caller5=%r15,>caller5_stack=32(%rsp)
+movq %r15,32(%rsp)
+
+# qhasm: caller6_stack = caller6
+# asm 1: movq <caller6=int64#14,>caller6_stack=stack64#6
+# asm 2: movq <caller6=%rbx,>caller6_stack=40(%rsp)
+movq %rbx,40(%rsp)
+
+# qhasm: caller7_stack = caller7
+# asm 1: movq <caller7=int64#15,>caller7_stack=stack64#7
+# asm 2: movq <caller7=%rbp,>caller7_stack=48(%rsp)
+movq %rbp,48(%rsp)
+
+# qhasm: pp = 0
+# asm 1: mov  $0,>pp=int64#4
+# asm 2: mov  $0,>pp=%rcx
+mov  $0,%rcx
+
+# qhasm: siftdownloop:
+._siftdownloop:
+
+# qhasm: prc = pp
+# asm 1: mov  <pp=int64#4,>prc=int64#5
+# asm 2: mov  <pp=%rcx,>prc=%r8
+mov  %rcx,%r8
+
+# qhasm: prc *= 2
+# asm 1: imulq  $2,<prc=int64#5,>prc=int64#5
+# asm 2: imulq  $2,<prc=%r8,>prc=%r8
+imulq  $2,%r8,%r8
+
+# qhasm: pc = prc
+# asm 1: mov  <prc=int64#5,>pc=int64#6
+# asm 2: mov  <prc=%r8,>pc=%r9
+mov  %r8,%r9
+
+# qhasm: prc += 2
+# asm 1: add  $2,<prc=int64#5
+# asm 2: add  $2,<prc=%r8
+add  $2,%r8
+
+# qhasm: pc += 1
+# asm 1: add  $1,<pc=int64#6
+# asm 2: add  $1,<pc=%r9
+add  $1,%r9
+
+# qhasm: unsigned>? hlen - prc
+# asm 1: cmp  <prc=int64#5,<hlen=int64#2
+# asm 2: cmp  <prc=%r8,<hlen=%rsi
+cmp  %r8,%rsi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto siftuploop if !unsigned>
+jbe ._siftuploop
+
+# qhasm: sprc = *(uint64 *)(hp + prc * 8)
+# asm 1: movq   (<hp=int64#1,<prc=int64#5,8),>sprc=int64#7
+# asm 2: movq   (<hp=%rdi,<prc=%r8,8),>sprc=%rax
+movq   (%rdi,%r8,8),%rax
+
+# qhasm: sprc <<= 5
+# asm 1: shl  $5,<sprc=int64#7
+# asm 2: shl  $5,<sprc=%rax
+shl  $5,%rax
+
+# qhasm: sprc += sp
+# asm 1: add  <sp=int64#3,<sprc=int64#7
+# asm 2: add  <sp=%rdx,<sprc=%rax
+add  %rdx,%rax
+
+# qhasm: spc = *(uint64 *)(hp + pc * 8)
+# asm 1: movq   (<hp=int64#1,<pc=int64#6,8),>spc=int64#8
+# asm 2: movq   (<hp=%rdi,<pc=%r9,8),>spc=%r10
+movq   (%rdi,%r9,8),%r10
+
+# qhasm: spc <<= 5
+# asm 1: shl  $5,<spc=int64#8
+# asm 2: shl  $5,<spc=%r10
+shl  $5,%r10
+
+# qhasm: spc += sp
+# asm 1: add  <sp=int64#3,<spc=int64#8
+# asm 2: add  <sp=%rdx,<spc=%r10
+add  %rdx,%r10
+
+# qhasm: c0 = *(uint64 *)(spc +  0)
+# asm 1: movq   0(<spc=int64#8),>c0=int64#9
+# asm 2: movq   0(<spc=%r10),>c0=%r11
+movq   0(%r10),%r11
+
+# qhasm: c1 = *(uint64 *)(spc +  8)
+# asm 1: movq   8(<spc=int64#8),>c1=int64#10
+# asm 2: movq   8(<spc=%r10),>c1=%r12
+movq   8(%r10),%r12
+
+# qhasm: c2 = *(uint64 *)(spc + 16)
+# asm 1: movq   16(<spc=int64#8),>c2=int64#11
+# asm 2: movq   16(<spc=%r10),>c2=%r13
+movq   16(%r10),%r13
+
+# qhasm: c3 = *(uint64 *)(spc + 24)
+# asm 1: movq   24(<spc=int64#8),>c3=int64#12
+# asm 2: movq   24(<spc=%r10),>c3=%r14
+movq   24(%r10),%r14
+
+# qhasm: carry? c0 -= *(uint64 *)(sprc +  0)
+# asm 1: subq 0(<sprc=int64#7),<c0=int64#9
+# asm 2: subq 0(<sprc=%rax),<c0=%r11
+subq 0(%rax),%r11
+
+# qhasm: carry? c1 -= *(uint64 *)(sprc +  8) - carry
+# asm 1: sbbq 8(<sprc=int64#7),<c1=int64#10
+# asm 2: sbbq 8(<sprc=%rax),<c1=%r12
+sbbq 8(%rax),%r12
+
+# qhasm: carry? c2 -= *(uint64 *)(sprc + 16) - carry
+# asm 1: sbbq 16(<sprc=int64#7),<c2=int64#11
+# asm 2: sbbq 16(<sprc=%rax),<c2=%r13
+sbbq 16(%rax),%r13
+
+# qhasm: carry? c3 -= *(uint64 *)(sprc + 24) - carry
+# asm 1: sbbq 24(<sprc=int64#7),<c3=int64#12
+# asm 2: sbbq 24(<sprc=%rax),<c3=%r14
+sbbq 24(%rax),%r14
+
+# qhasm: pc = prc if carry
+# asm 1: cmovc <prc=int64#5,<pc=int64#6
+# asm 2: cmovc <prc=%r8,<pc=%r9
+cmovc %r8,%r9
+
+# qhasm: spc = sprc if carry
+# asm 1: cmovc <sprc=int64#7,<spc=int64#8
+# asm 2: cmovc <sprc=%rax,<spc=%r10
+cmovc %rax,%r10
+
+# qhasm: spc -= sp
+# asm 1: sub  <sp=int64#3,<spc=int64#8
+# asm 2: sub  <sp=%rdx,<spc=%r10
+sub  %rdx,%r10
+
+# qhasm: (uint64) spc >>= 5
+# asm 1: shr  $5,<spc=int64#8
+# asm 2: shr  $5,<spc=%r10
+shr  $5,%r10
+
+# qhasm: spp = *(uint64 *)(hp + pp * 8)
+# asm 1: movq   (<hp=int64#1,<pp=int64#4,8),>spp=int64#5
+# asm 2: movq   (<hp=%rdi,<pp=%rcx,8),>spp=%r8
+movq   (%rdi,%rcx,8),%r8
+
+# qhasm: *(uint64 *)(hp + pp * 8) = spc
+# asm 1: movq  <spc=int64#8,(<hp=int64#1,<pp=int64#4,8)
+# asm 2: movq  <spc=%r10,(<hp=%rdi,<pp=%rcx,8)
+movq  %r10,(%rdi,%rcx,8)
+
+# qhasm: *(uint64 *)(hp + pc * 8) = spp
+# asm 1: movq  <spp=int64#5,(<hp=int64#1,<pc=int64#6,8)
+# asm 2: movq  <spp=%r8,(<hp=%rdi,<pc=%r9,8)
+movq  %r8,(%rdi,%r9,8)
+
+# qhasm: pp = pc
+# asm 1: mov  <pc=int64#6,>pp=int64#4
+# asm 2: mov  <pc=%r9,>pp=%rcx
+mov  %r9,%rcx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto siftdownloop
+jmp ._siftdownloop
+
+# qhasm: siftuploop:
+._siftuploop:
+
+# qhasm: pc = pp
+# asm 1: mov  <pp=int64#4,>pc=int64#2
+# asm 2: mov  <pp=%rcx,>pc=%rsi
+mov  %rcx,%rsi
+
+# qhasm: pp -= 1
+# asm 1: sub  $1,<pp=int64#4
+# asm 2: sub  $1,<pp=%rcx
+sub  $1,%rcx
+
+# qhasm: (uint64) pp >>= 1
+# asm 1: shr  $1,<pp=int64#4
+# asm 2: shr  $1,<pp=%rcx
+shr  $1,%rcx
+
+# qhasm: unsigned>? pc - 0 
+# asm 1: cmp  $0,<pc=int64#2
+# asm 2: cmp  $0,<pc=%rsi
+cmp  $0,%rsi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto end if !unsigned>
+jbe ._end
+
+# qhasm: spp = *(uint64 *)(hp + pp * 8)
+# asm 1: movq   (<hp=int64#1,<pp=int64#4,8),>spp=int64#5
+# asm 2: movq   (<hp=%rdi,<pp=%rcx,8),>spp=%r8
+movq   (%rdi,%rcx,8),%r8
+
+# qhasm: spc = *(uint64 *)(hp + pc * 8)
+# asm 1: movq   (<hp=int64#1,<pc=int64#2,8),>spc=int64#6
+# asm 2: movq   (<hp=%rdi,<pc=%rsi,8),>spc=%r9
+movq   (%rdi,%rsi,8),%r9
+
+# qhasm: spp <<= 5
+# asm 1: shl  $5,<spp=int64#5
+# asm 2: shl  $5,<spp=%r8
+shl  $5,%r8
+
+# qhasm: spc <<= 5
+# asm 1: shl  $5,<spc=int64#6
+# asm 2: shl  $5,<spc=%r9
+shl  $5,%r9
+
+# qhasm: spc += sp
+# asm 1: add  <sp=int64#3,<spc=int64#6
+# asm 2: add  <sp=%rdx,<spc=%r9
+add  %rdx,%r9
+
+# qhasm: spp += sp
+# asm 1: add  <sp=int64#3,<spp=int64#5
+# asm 2: add  <sp=%rdx,<spp=%r8
+add  %rdx,%r8
+
+# qhasm: c0 = *(uint64 *)(spc +  0)
+# asm 1: movq   0(<spc=int64#6),>c0=int64#7
+# asm 2: movq   0(<spc=%r9),>c0=%rax
+movq   0(%r9),%rax
+
+# qhasm: c1 = *(uint64 *)(spc +  8)
+# asm 1: movq   8(<spc=int64#6),>c1=int64#8
+# asm 2: movq   8(<spc=%r9),>c1=%r10
+movq   8(%r9),%r10
+
+# qhasm: c2 = *(uint64 *)(spc + 16)
+# asm 1: movq   16(<spc=int64#6),>c2=int64#9
+# asm 2: movq   16(<spc=%r9),>c2=%r11
+movq   16(%r9),%r11
+
+# qhasm: c3 = *(uint64 *)(spc + 24)
+# asm 1: movq   24(<spc=int64#6),>c3=int64#10
+# asm 2: movq   24(<spc=%r9),>c3=%r12
+movq   24(%r9),%r12
+
+# qhasm: carry? c0 -= *(uint64 *)(spp +  0)
+# asm 1: subq 0(<spp=int64#5),<c0=int64#7
+# asm 2: subq 0(<spp=%r8),<c0=%rax
+subq 0(%r8),%rax
+
+# qhasm: carry? c1 -= *(uint64 *)(spp +  8) - carry
+# asm 1: sbbq 8(<spp=int64#5),<c1=int64#8
+# asm 2: sbbq 8(<spp=%r8),<c1=%r10
+sbbq 8(%r8),%r10
+
+# qhasm: carry? c2 -= *(uint64 *)(spp + 16) - carry
+# asm 1: sbbq 16(<spp=int64#5),<c2=int64#9
+# asm 2: sbbq 16(<spp=%r8),<c2=%r11
+sbbq 16(%r8),%r11
+
+# qhasm: carry? c3 -= *(uint64 *)(spp + 24) - carry
+# asm 1: sbbq 24(<spp=int64#5),<c3=int64#10
+# asm 2: sbbq 24(<spp=%r8),<c3=%r12
+sbbq 24(%r8),%r12
+# comment:fp stack unchanged by jump
+
+# qhasm: goto end if carry
+jc ._end
+
+# qhasm: spc -= sp
+# asm 1: sub  <sp=int64#3,<spc=int64#6
+# asm 2: sub  <sp=%rdx,<spc=%r9
+sub  %rdx,%r9
+
+# qhasm: (uint64) spc >>= 5
+# asm 1: shr  $5,<spc=int64#6
+# asm 2: shr  $5,<spc=%r9
+shr  $5,%r9
+
+# qhasm: spp -= sp
+# asm 1: sub  <sp=int64#3,<spp=int64#5
+# asm 2: sub  <sp=%rdx,<spp=%r8
+sub  %rdx,%r8
+
+# qhasm: (uint64) spp >>= 5
+# asm 1: shr  $5,<spp=int64#5
+# asm 2: shr  $5,<spp=%r8
+shr  $5,%r8
+
+# qhasm: *(uint64 *)(hp + pp * 8) = spc
+# asm 1: movq  <spc=int64#6,(<hp=int64#1,<pp=int64#4,8)
+# asm 2: movq  <spc=%r9,(<hp=%rdi,<pp=%rcx,8)
+movq  %r9,(%rdi,%rcx,8)
+
+# qhasm: *(uint64 *)(hp + pc * 8) = spp
+# asm 1: movq  <spp=int64#5,(<hp=int64#1,<pc=int64#2,8)
+# asm 2: movq  <spp=%r8,(<hp=%rdi,<pc=%rsi,8)
+movq  %r8,(%rdi,%rsi,8)
+# comment:fp stack unchanged by jump
+
+# qhasm: goto siftuploop
+jmp ._siftuploop
+
+# qhasm: end:
+._end:
+
+# qhasm: caller1 = caller1_stack
+# asm 1: movq <caller1_stack=stack64#1,>caller1=int64#9
+# asm 2: movq <caller1_stack=0(%rsp),>caller1=%r11
+movq 0(%rsp),%r11
+
+# qhasm: caller2 = caller2_stack
+# asm 1: movq <caller2_stack=stack64#2,>caller2=int64#10
+# asm 2: movq <caller2_stack=8(%rsp),>caller2=%r12
+movq 8(%rsp),%r12
+
+# qhasm: caller3 = caller3_stack
+# asm 1: movq <caller3_stack=stack64#3,>caller3=int64#11
+# asm 2: movq <caller3_stack=16(%rsp),>caller3=%r13
+movq 16(%rsp),%r13
+
+# qhasm: caller4 = caller4_stack
+# asm 1: movq <caller4_stack=stack64#4,>caller4=int64#12
+# asm 2: movq <caller4_stack=24(%rsp),>caller4=%r14
+movq 24(%rsp),%r14
+
+# qhasm: caller5 = caller5_stack
+# asm 1: movq <caller5_stack=stack64#5,>caller5=int64#13
+# asm 2: movq <caller5_stack=32(%rsp),>caller5=%r15
+movq 32(%rsp),%r15
+
+# qhasm: caller6 = caller6_stack
+# asm 1: movq <caller6_stack=stack64#6,>caller6=int64#14
+# asm 2: movq <caller6_stack=40(%rsp),>caller6=%rbx
+movq 40(%rsp),%rbx
+
+# qhasm: caller7 = caller7_stack
+# asm 1: movq <caller7_stack=stack64#7,>caller7=int64#15
+# asm 2: movq <caller7_stack=48(%rsp),>caller7=%rbp
+movq 48(%rsp),%rbp
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 56 - 0
dclxvi-20130329/index_heap.c

@@ -0,0 +1,56 @@
+/*
+ * File:   dclxvi-20130329/index_heap.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include <assert.h>
+#ifdef NEW_PARAMETERS
+#include "scalar_512.h"
+#else
+#include "scalar.h"
+#endif
+#include "index_heap.h"
+
+static void heap_push(unsigned long long *h, unsigned long long *hlen, unsigned long long elem, scalar_t *s)
+{
+  /* Move up towards the root */
+  /* XXX: Check size of hlen, whether cast to signed value is ok */
+  signed long long pos = *hlen;
+  signed long long ppos = (pos-1)/2;
+  unsigned long long t;
+  h[*hlen] = elem;
+  while(pos > 0)
+  {
+    if(scalar_lt_vartime(s[h[ppos]], s[h[pos]]))
+    {
+      t = h[ppos];
+      h[ppos] = h[pos];
+      h[pos] = t;
+      pos = ppos;
+      ppos = (pos-1)/2;
+    }
+    else break;
+  } 
+  (*hlen)++;
+}
+
+/* caller's responsibility to ensure hlen>=3 */
+void heap_init(unsigned long long *h, unsigned long long hlen, scalar_t *s)
+{
+  assert(hlen>=5); 
+  assert(hlen&1);
+  h[0] = 0;
+  unsigned long long i=1;
+  while(i<hlen)
+    heap_push(h, &i, i, s);
+}
+
+/* Put the largest value in the heap in max1, the second largest in max2 */
+void heap_get2max(unsigned long long *h, unsigned long long *max1, unsigned long long *max2, scalar_t *s)
+{
+  *max1 = h[0];
+  *max2 = h[1];
+  if(scalar_lt_vartime(s[h[1]],s[h[2]]))
+    *max2 = h[2];
+}

+ 30 - 0
dclxvi-20130329/index_heap.h

@@ -0,0 +1,30 @@
+/*
+ * File:   dclxvi-20130329/index_heap.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef INDEX_HEAP_H
+#define INDEX_HEAP_H
+
+#ifdef NEW_PARAMETERS
+#include "scalar_512.h"
+#else
+#include "scalar.h"
+#endif
+
+#ifdef __cplusplus
+ extern "C" {
+#endif 
+
+void heap_init(unsigned long long *h, unsigned long long hlen, scalar_t *s);
+
+void heap_get2max(unsigned long long *h, unsigned long long *max1, unsigned long long *max2, scalar_t *s);
+
+void heap_rootreplaced(unsigned long long *h, unsigned long long hlen, scalar_t *s);
+
+#ifdef __cplusplus
+ }
+#endif 
+
+#endif

+ 184 - 0
dclxvi-20130329/linefunction.c

@@ -0,0 +1,184 @@
+/*
+ * File:   dclxvi-20130329/linefunction.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+//AVANT
+//#include "fp2e.h"
+//#include "curvepoint_fp.h"
+//#include "twistpoint_fp2.h"
+
+
+//APRES
+#include "mul.h"
+extern "C" {	
+#include "fpe.h"
+#include "fp2e.h"
+} 
+#include "curvepoint_fp.h"
+#include "twistpoint_fp2.h"
+
+
+#ifdef N_OPS
+  unsigned long long linefunction_addctr; 
+  unsigned long long linefunction_doublectr; 
+#endif
+
+void linefunction_add_ate(
+        fp2e_t rop11, 
+        fp2e_t rop12, 
+        fp2e_t rop13, 
+        twistpoint_fp2_t rop2, 
+        const twistpoint_fp2_t op1, 
+        const twistpoint_fp2_t op2, 
+        const curvepoint_fp_t op3,
+        const fp2e_t r2 // r2 = y^2, see "Faster Computation of Tate Pairings" 
+        )
+
+{
+#ifdef N_OPS
+  linefunction_addctr++; 
+#endif
+    fp2e_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9, tmp10; // Temporary variables needed for intermediary results
+
+    fp2e_mul(tmp0, op2->m_x, op1->m_t); /* tmp0 = B = x2 * T1  = x2z1^2*/
+
+    fp2e_add(tmp1, op2->m_y, op1->m_z);
+    //fp2e_short_coeffred(tmp1);
+    fp2e_square(tmp1, tmp1);
+    fp2e_sub2(tmp1, r2);
+    fp2e_sub2(tmp1, op1->m_t);
+    //fp2e_short_coeffred(tmp1);
+    fp2e_mul(tmp1, tmp1, op1->m_t); /* tmp1 = D = ((y2 + Z1)^2 - R2 - T1)T1  = 2y2z1^3 */
+
+    fp2e_sub(tmp2, tmp0, op1->m_x); /* tmp2 = H = B - X1  = x2z1^2 - x1*/
+    //fp2e_short_coeffred(tmp2);
+
+    fp2e_square(tmp3, tmp2); /* tmp3 = I = H^2  = (x2z1^2 - x1)^2*/
+
+    fp2e_double(tmp4, tmp3); 
+    fp2e_double2(tmp4); /* tmp4 = E = 4I = 4(x2z1^2 - x1)^2*/
+    fp2e_short_coeffred(tmp4);
+
+    fp2e_mul(tmp5, tmp2, tmp4); /* tmp5 = J = HE =  4(x2z1^2 - x1)(x2z1^2 - x1)^2*/
+
+    fp2e_sub(tmp6, tmp1, op1->m_y); 
+    fp2e_sub2(tmp6, op1->m_y); /* tmp6 = r = 2(D - 2Y1) = (2y2z1^3 - 2y1)*/
+    fp2e_short_coeffred(tmp6);
+    
+    fp2e_mul(tmp9, tmp6, op2->m_x); /* Needed later: tmp9 = x2(2y2z1^3 - 2y1)*/
+
+    fp2e_mul(tmp7, op1->m_x, tmp4); /* tmp7 = V = X1*E = 4x1(x2z1^2 - x1)^2*/
+
+    fp2e_square(rop2->m_x, tmp6);
+    fp2e_sub2(rop2->m_x, tmp5);
+    fp2e_sub2(rop2->m_x, tmp7);
+    fp2e_sub2(rop2->m_x, tmp7); /* X3 = r^2 - J - 2V = (2y2z1^3 - 2y1)^2 - 4(x2z1^2 - x1)(x2z1^2 - x1)^2 - 8x1(x2z1^2 - x1)^2*/
+    fp2e_short_coeffred(rop2->m_x);
+
+    fp2e_add(rop2->m_z, op1->m_z, tmp2);
+    fp2e_short_coeffred(rop2->m_z);
+    fp2e_square(rop2->m_z, rop2->m_z);
+    fp2e_sub2(rop2->m_z, op1->m_t);
+    fp2e_sub2(rop2->m_z, tmp3); /* Z3 = (z1 + H)^2 - T1 - I  = 2z1(x2z1^2 - x1) */
+    fp2e_short_coeffred(rop2->m_z);
+    
+    fp2e_add(tmp10, op2->m_y, rop2->m_z); /* Needed later: tmp10 = y2 + z3*/
+    //fp2e_short_coeffred(tmp10);
+
+    fp2e_sub(tmp8, tmp7, rop2->m_x);
+    //fp2e_short_coeffred(tmp8);
+    fp2e_mul(tmp8, tmp8, tmp6);
+    fp2e_mul(tmp0, op1->m_y, tmp5);
+    fp2e_double2(tmp0);
+    fp2e_sub(rop2->m_y, tmp8, tmp0); /* Y3 = r(V - X3) - 2Y1*J = (2y2z1^3 - 2y1)(4x1(x2z1^2 - x1)^2 - x3) - 8y1(x2z1^2 - x1)(x2z1^2 - x1)^2*/
+    fp2e_short_coeffred(rop2->m_y);
+
+    fp2e_square(rop2->m_t, rop2->m_z); /* T3 = Z3^2 */
+
+    fp2e_square(tmp10, tmp10); /* tmp10 = (y2 + z3)^2 */
+    fp2e_sub2(tmp10, r2);
+    fp2e_sub2(tmp10, rop2->m_t); 
+    //fp2e_short_coeffred(tmp10);
+    fp2e_double2(tmp9);
+    fp2e_sub(rop11, tmp9, tmp10); /* tmp9 = 4x2(y2z1^3 - y1) - 2z3y2 */
+    fp2e_short_coeffred(rop11);
+
+    fp2e_mul_fpe(tmp10, rop2->m_z, op3->m_y); /* tmp10 = z3y_Q */
+    fp2e_double(rop13, tmp10);
+    //fp2e_short_coeffred(rop13);
+
+    fp2e_neg(tmp6, tmp6);
+    fp2e_mul_fpe(tmp1, tmp6, op3->m_x);
+    fp2e_double(rop12, tmp1);
+    fp2e_short_coeffred(rop12);
+}
+
+void linefunction_double_ate(fp2e_t rop11, fp2e_t rop12, fp2e_t rop13, twistpoint_fp2_t rop2, const twistpoint_fp2_t op1, const curvepoint_fp_t op3)
+{
+#ifdef N_OPS
+  linefunction_doublectr++; 
+#endif
+    fp2e_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp7, dummy; // Temporary variables needed for intermediary results
+
+    fp2e_square(tmp0, op1->m_x); /* tmp0 = A = X1^2 = x1^2 */
+    fp2e_square(tmp1, op1->m_y); /* tmp1 = B = Y1^2 = y1^2 */
+
+    fp2e_square(tmp2, tmp1); /* tmp2 = C = B^2 = y1^4 */
+
+    fp2e_add(tmp3, op1->m_x, tmp1);
+    //fp2e_short_coeffred(tmp3);
+    fp2e_square(tmp3, tmp3);
+    fp2e_sub2(tmp3, tmp0);
+    fp2e_sub2(tmp3, tmp2);
+    fp2e_double2(tmp3); /* tmp3 = D = 2(X1 + B)^2 - A - C) = 4x1y1^2 */
+
+    fp2e_triple(tmp4, tmp0); /* tmp4 = E = 3A = 3x1^2 */
+    fp2e_short_coeffred(tmp4);
+    
+    fp2e_add(tmp7, tmp4, op1->m_x); /* Needed later */
+    //fp2e_short_coeffred(tmp7);
+
+    fp2e_square(tmp5, tmp4); /* tmp5 = G = E^2 = 9x1^4 */
+
+    fp2e_sub(rop2->m_x, tmp5, tmp3);
+    fp2e_sub2(rop2->m_x, tmp3); /* X3 = G - 2D = 9x1^4 - 8x1y1^2 */
+    fp2e_short_coeffred(rop2->m_x);
+
+    fp2e_add(rop2->m_z, op1->m_y, op1->m_z);
+    //fp2e_short_coeffred(rop2->m_z);
+    fp2e_square(rop2->m_z, rop2->m_z);
+    fp2e_sub2(rop2->m_z, tmp1);
+    fp2e_sub2(rop2->m_z, op1->m_t); /* Z3 = (Y1 + Z1)^2 - B - T1 = 2y1z1; */
+    fp2e_short_coeffred(rop2->m_z);
+
+    fp2e_sub(rop2->m_y, tmp3, rop2->m_x);
+    fp2e_short_coeffred(rop2->m_y);
+    fp2e_mul(rop2->m_y, rop2->m_y, tmp4); 
+    fp2e_double(dummy, tmp2);
+    fp2e_double2(dummy);
+    fp2e_double2(dummy);
+    fp2e_sub2(rop2->m_y, dummy); /* Y3 = E(D - X3) - 8C = 3x1^2(4x1y1^2 - X3) - 8y1^4 */
+    fp2e_short_coeffred(rop2->m_y);
+
+    fp2e_mul(tmp3, tmp4, op1->m_t);
+    fp2e_double2(tmp3);
+    fp2e_neg(tmp3, tmp3);
+    fp2e_mul_fpe(rop12, tmp3, op3->m_x); /* tmp3 = -6x1^2z1^2 * x_Q */
+
+    fp2e_square(tmp7, tmp7);
+    fp2e_sub2(tmp7, tmp0);
+    fp2e_sub2(tmp7, tmp5);
+    fp2e_double(dummy, tmp1);
+    fp2e_double2(dummy);
+    fp2e_sub(rop11, tmp7, dummy); /* tmp7 = 6x1^3 - 4y1^2 */
+    fp2e_short_coeffred(rop11);
+
+    fp2e_mul(tmp0, rop2->m_z, op1->m_t);
+    fp2e_double2(tmp0);
+    fp2e_mul_fpe(rop13, tmp0, op3->m_y);
+    
+    fp2e_square(rop2->m_t, rop2->m_z); 
+}
+

+ 36 - 0
dclxvi-20130329/linefunction.h

@@ -0,0 +1,36 @@
+/*
+ * File:   dclxvi-20130329/linefunction.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef LINEFUNCTION_H
+#define LINEFUNCTION_H
+
+#include "fp2e.h"
+#include "curvepoint_fp.h"
+#include "twistpoint_fp2.h"
+
+void linefunction_add_ate(
+        fp2e_t rop11, 
+        fp2e_t rop12, 
+        fp2e_t rop13, 
+        twistpoint_fp2_t rop2, 
+        const twistpoint_fp2_t op1, 
+        const twistpoint_fp2_t op2, 
+        const curvepoint_fp_t op3,
+        const fp2e_struct_t *r2 // r2 = y^2, see "Faster Computation of Tate Pairings" 
+        );
+
+
+
+void linefunction_double_ate(
+        fp2e_t rop11, 
+        fp2e_t rop12, 
+        fp2e_t rop13, 
+        twistpoint_fp2_t rop2, 
+        const twistpoint_fp2_t op1, 
+        const curvepoint_fp_t op3
+        );
+
+#endif

+ 132 - 0
dclxvi-20130329/mul.c

@@ -0,0 +1,132 @@
+/*
+ * File:   dclxvi-20130329/mul.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include <math.h>
+#include "mul.h"
+#include "mydouble.h"
+#include "zout.hpp"
+
+extern const double bn_v;
+extern const double bn_v6;
+
+void polymul(mydouble *h, const mydouble *f, const mydouble *g)
+{
+	mydouble t[24];
+	//debug(421);	
+	//zout(f[0].v,g[0].v,f[0].mmax,g[0].mmax);
+	//if (f[0].mmax>3*5604099)	zout(f[0].mmax);	
+
+	t[0] = f[0]*g[0];
+	//debug(422);
+	t[1] = f[0]*g[1] + f[1]*g[0];
+	t[2] = 6*f[1]*g[1] + (f[0]*g[2] + f[2]*g[0]);
+	t[3] = (f[1]*g[2] + f[2]*g[1])*6 + (f[0]*g[3] + f[3]*g[0]);
+	t[4] = (f[1]*g[3] + f[2]*g[2] + f[3]*g[1])*6 + (f[0]*g[4] + f[4]*g[0]);
+	t[5] = (f[1]*g[4] + f[2]*g[3] + f[3]*g[2] + f[4]*g[1])*6 + (f[0]*g[5] + f[5]*g[0]);
+	t[6] = (f[1]*g[5] + f[2]*g[4] + f[3]*g[3] + f[4]*g[2] + f[5]*g[1])*6 + f[0]*g[6] + f[6]*g[0];
+	t[7] = (f[0]*g[7] + f[1]*g[6] + f[2]*g[5] + f[3]*g[4] + f[4]*g[3] + f[5]*g[2] + f[6]*g[1] + f[7]*g[0]);
+	t[8] = (f[1]*g[7] + f[7]*g[1])*6 + (f[0]*g[8] + f[2]*g[6] + f[3]*g[5] + f[4]*g[4] + f[5]*g[3] + f[6]*g[2] + f[8]*g[0]);
+	t[9] = (f[1]*g[8] + f[2]*g[7] + f[7]*g[2] + f[8]*g[1])*6 + (f[0]*g[9] + f[3]*g[6] + f[4]*g[5] + f[5]*g[4] + f[6]*g[3] + f[9]*g[0]);
+	t[10] = (f[1]*g[9] + f[2]*g[8] + f[3]*g[7] + f[7]*g[3] + f[8]*g[2] + f[9]*g[1])*6 + (f[0]*g[10] + f[4]*g[6] + f[5]*g[5] + f[6]*g[4] + f[10]*g[0]);
+	
+	t[11] = (f[1]*g[10] + f[2]*g[9] + f[3]*g[8] + f[4]*g[7] + f[7]*g[4] + f[8]*g[3] + f[9]*g[2] + f[10]*g[1])*6 + (f[0]*g[11] + f[5]*g[6] + f[6]*g[5] + f[11]*g[0]);
+	t[12] = (f[1]*g[11] + f[2]*g[10] + f[3]*g[9] + f[4]*g[8] + f[5]*g[7] + f[7]*g[5] + f[8]*g[4] + f[9]*g[3] + f[10]*g[2] + f[11]*g[1])*6 + f[6]*g[6];
+	t[13] = (f[2]*g[11] + f[3]*g[10] + f[4]*g[9] + f[5]*g[8] + f[6]*g[7] + f[7]*g[6] + f[8]*g[5] + f[9]*g[4] + f[10]*g[3] + f[11]*g[2]);
+	t[14] = f[7]*g[7]*6 + (f[3]*g[11] + f[4]*g[10] + f[5]*g[9] + f[6]*g[8] + f[8]*g[6] + f[9]*g[5] + f[10]*g[4] + f[11]*g[3]);
+	t[15] = (f[7]*g[8] + f[8]*g[7])*6 + (f[4]*g[11] + f[5]*g[10] + f[6]*g[9] + f[9]*g[6] + f[10]*g[5] + f[11]*g[4]);
+	t[16] = (f[7]*g[9] + f[8]*g[8] + f[9]*g[7])*6 + (f[5]*g[11] + f[6]*g[10] + f[10]*g[6] + f[11]*g[5]);
+	t[17] = (f[7]*g[10] + f[8]*g[9] + f[9]*g[8] + f[10]*g[7])*6 + (f[6]*g[11] + f[11]*g[6]);
+	t[18] = (f[7]*g[11] + f[8]*g[10] + f[9]*g[9] + f[10]*g[8] + f[11]*g[7])*6;
+	t[19] = (f[8]*g[11] + f[9]*g[10] + f[10]*g[9] + f[11]*g[8]);
+	t[20] = (f[9]*g[11] + f[10]*g[10] + f[11]*g[9]);
+	t[21] = (f[10]*g[11] + f[11]*g[10]);
+	t[22] = f[11]*g[11];
+	
+	int i;
+	for(i=0;i<23;i++)
+	h[i]=t[i];
+}
+
+
+void degred(mydouble *h)
+{
+    h[0] = h[0] - h[12] + 6*h[15] - 2*h[18] - 6*h[21];
+    h[1] = h[1] - h[13] + h[16] - 2*h[19] - h[22];
+    h[2] = h[2] - h[14] + h[17] - 2*h[20];
+    h[3] = h[3] - h[12] + 5*h[15] - h[18] - 8*h[21];
+    h[4] = h[4] - 6*h[13] + 5*h[16] - 6*h[19] - 8*h[22];
+    h[5] = h[5] - 6*h[14] + 5*h[17] - 6*h[20];
+    h[6] = h[6] - 4*h[12] + 18*h[15] - 3*h[18];
+    h[6] -= 30*h[21];
+    h[7] = h[7] - 4*h[13] + 3*h[16] - 3*h[19] - 5*h[22];
+    h[8] = h[8] - 4*h[14] + 3*h[17] - 3*h[20];
+    h[9] = h[9] - h[12] + 2*h[15] + h[18] - 9*h[21];
+    h[10] = h[10] - 6*h[13] + 2*h[16] + 6*h[19] - 9*h[22];
+    h[11] = h[11] - 6*h[14] + 2*h[17] + 6*h[20];
+}
+
+void coeffred_round_par(mydouble *h)
+{
+    mydouble carry = 0;    
+    
+    carry = round(h[1]/bn_v);
+    h[1] = remround(h[1],bn_v);
+    h[2] += carry;
+    carry = round(h[4]/bn_v);
+    h[4] = remround(h[4],bn_v);
+    h[5] += carry;
+    carry = round(h[7]/bn_v);
+    h[7] = remround(h[7],bn_v);
+    h[8] += carry;
+    carry = round(h[10]/bn_v);
+    h[10] = remround(h[10],bn_v);
+    h[11] += carry;
+    
+    carry = round(h[2]/bn_v);
+    h[2] = remround(h[2],bn_v);
+    h[3] += carry;
+    carry = round(h[5]/bn_v);
+    h[5] = remround(h[5],bn_v);
+    h[6] += carry;
+    carry = round(h[8]/bn_v);
+    h[8] = remround(h[8],bn_v);
+    h[9] += carry;
+    carry = round(h[11]/bn_v);
+    h[11] = remround(h[11],bn_v);
+    
+    h[0] = h[0] - carry;
+    h[3] = h[3] - carry;
+    h[6] = h[6] - 4*carry;
+    h[9] = h[9] - carry;
+
+    carry = round(h[0]/bn_v6);    // h0 = 2^53 - 1
+    h[0] = remround(h[0],bn_v6);  // carry = (2^53-1)/6v = 763549741
+    h[1] += carry;                // h1 = v+763549741 = 765515821
+    carry = round(h[3]/bn_v);     // h3 = 2^53 - 1
+    h[3] = remround(h[3],bn_v);   // carry = (2^53-1)/v = 4581298449
+    h[4] += carry;                // h4 = v + 4581298449 = 4583264529
+    carry = round(h[6]/bn_v6);
+    h[6] = remround(h[6],bn_v6);
+    h[7] += carry;
+    carry = round(h[9]/bn_v);
+    h[9] = remround(h[9],bn_v);
+    h[10] += carry;
+
+    carry = round(h[1]/bn_v);    // carry = 765515821/v = 389
+    h[1] = remround(h[1],bn_v);
+    h[2] += carry;
+    carry = round(h[4]/bn_v);    // carry = 4583264529/v = 2331
+    h[4] = remround(h[4],bn_v);
+    h[5] += carry;
+    carry = round(h[7]/bn_v);
+    h[7] = remround(h[7],bn_v);
+    h[8] += carry;
+    carry = round(h[10]/bn_v);
+    h[10] = remround(h[10],bn_v);
+    h[11] += carry;
+}
+
+

+ 16 - 0
dclxvi-20130329/mul.h

@@ -0,0 +1,16 @@
+/*
+ * File:   dclxvi-20130329/mul.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef MUL_H
+#define MUL_H
+
+#include "mydouble.h"
+
+void polymul(mydouble *h, const mydouble *f, const mydouble *g);
+void degred(mydouble *h);
+void coeffred_round_par(mydouble *h);
+
+#endif

+ 18 - 0
dclxvi-20130329/mydouble.c

@@ -0,0 +1,18 @@
+/*
+ * File:   dclxvi-20130329/mydouble.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include "mydouble.h"
+
+#ifndef CHECK
+#include <math.h>
+
+double remround(double a, double d)
+{
+  double carry = round(a/d);
+  return a - carry*d;
+}
+
+#endif

+ 20 - 0
dclxvi-20130329/mydouble.h

@@ -0,0 +1,20 @@
+/*
+ * File:   dclxvi-20130329/mydouble.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef MYDOUBLE_H
+#define MYDOUBLE_H
+
+#ifdef CHECK
+	#include "checkdouble.h"
+	#define mydouble CheckDouble
+#else
+	#define mydouble double
+	#define setmax(x,y)
+	#define todouble(x) x
+	double remround(double a, double d);
+#endif
+
+#endif

+ 137 - 0
dclxvi-20130329/optate.c

@@ -0,0 +1,137 @@
+/*
+ * File:   dclxvi-20130329/optate.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include <stdio.h>
+
+
+
+//AVANT
+//#include "fp2e.h"
+//#include "fp6e.h"
+//#include "fp12e.h"
+//APRES
+#include "mul.h"
+extern "C" {	
+#include "fpe.h"
+#include "fp2e.h"
+#include "fp6e.h"
+#include "fp12e.h"
+} 
+#include "curvepoint_fp.h"
+#include "twistpoint_fp2.h"
+#include "linefunction.h"
+#include "optate.h"
+#include "final_expo.h"
+#include "zout.hpp"
+//#include "parameters.h"
+
+extern const unsigned long bn_naflen_6uplus2;
+extern const scalar_t bn_6uplus2;
+extern const fpe_t bn_zeta2;
+extern const fp2e_t bn_z2p;
+extern const fp2e_t bn_z3p;
+extern const signed char bn_6uplus2_naf[66];
+
+void optate_miller(fp12e_t rop, const twistpoint_fp2_t op1, const curvepoint_fp_t op2)
+{
+    // op1 and op2 are assumed to be in affine coordinates!
+    twistpoint_fp2_t q1, q2;//, q3;
+    fp12e_setone(rop);
+
+    fp2e_t dummy1, dummy2, dummy3;
+    fp2e_t tfp2e1, tfp2e2;
+
+    twistpoint_fp2_t r, t, mop1;
+    twistpoint_fp2_set(r, op1);
+    twistpoint_fp2_neg(mop1, op1);
+    fp2e_setone(r->m_t); /* As r has to be in affine coordinates this is ok */
+    fp2e_setone(t->m_t); /* As t has to be in affine coordinates this is ok */
+
+    fp2e_t r2;
+    fp2e_square(r2, op1->m_y);
+
+    unsigned int i;
+    /*
+    for(i = bn_bitlen_6uplus2 - 1; i > 0; i--) 
+    {
+        linefunction_double_ate(dummy1, dummy2, dummy3, r, r, op2);
+        if(i != bn_bitlen_6uplus2 -1) fp12e_square(rop, rop);
+        fp12e_mul_line(rop, rop, dummy1, dummy2, dummy3);
+
+        if (scalar_getbit(bn_6uplus2, i - 1))
+        {
+            linefunction_add_ate(dummy1, dummy2, dummy3, r, r, op1, op2, r2);
+            fp12e_mul_line(rop, rop, dummy1, dummy2, dummy3);
+        }
+    }
+    */
+    for(i = bn_naflen_6uplus2-1; i > 0; i--) 
+    {
+        linefunction_double_ate(dummy1, dummy2, dummy3, r, r, op2);
+        if(i != bn_naflen_6uplus2 -1) fp12e_square(rop, rop);
+        fp12e_mul_line(rop, rop, dummy1, dummy2, dummy3);
+
+        if (bn_6uplus2_naf[i-1]==1)
+        {
+            linefunction_add_ate(dummy1, dummy2, dummy3, r, r, op1, op2, r2);
+            fp12e_mul_line(rop, rop, dummy1, dummy2, dummy3);
+        }
+        if (bn_6uplus2_naf[i-1]==-1)
+        {
+            linefunction_add_ate(dummy1, dummy2, dummy3, r, r, mop1, op2, r2);
+            fp12e_mul_line(rop, rop, dummy1, dummy2, dummy3);
+        }
+    }
+
+
+    /* Compute Q2 */
+    fp2e_mul_fpe(tfp2e1, op1->m_x, bn_zeta2);
+    twistpoint_fp2_affineset_fp2e(q2, tfp2e1, op1->m_y); 
+
+    /* Compute Q1 */
+    fp2e_set(tfp2e1, op1->m_x);
+    fp2e_conjugate(tfp2e1, tfp2e1);
+    fp2e_mul(tfp2e1, tfp2e1, bn_z2p);
+    /*
+    printf("\n");
+    fp2e_print(stdout, bn_z2p);
+    printf("\n");
+    */
+    fp2e_set(tfp2e2, op1->m_y);
+    fp2e_conjugate(tfp2e2, tfp2e2);
+    fp2e_mul(tfp2e2, tfp2e2, bn_z3p);
+    twistpoint_fp2_affineset_fp2e(q1, tfp2e1, tfp2e2);
+
+    /* Compute Q3 */
+    //fp2e_mul_fpe(tfp2e3, tfp2e1, bn_zeta2);
+    //fp2e_neg(tfp2e2, tfp2e2);
+    //twistpoint_fp2_affineset_fp2e(q3, tfp2e3, tfp2e2);
+
+    /* Remaining line functions */
+    fp2e_square(r2, q1->m_y);
+    linefunction_add_ate(dummy1, dummy2, dummy3, t, r, q1, op2, r2);
+    fp12e_mul_line(rop, rop, dummy1, dummy2, dummy3);
+    
+    fp2e_square(r2, q2->m_y);
+    linefunction_add_ate(dummy1, dummy2, dummy3, t, t, q2, op2, r2);
+    fp12e_mul_line(rop, rop, dummy1, dummy2, dummy3);
+    
+    //fp2e_square(r2, q3->m_y);
+    //linefunction_add_ate(dummy1, dummy2, dummy3, t, t, q3, op2, r2);
+    //fp12e_mul_line(rop, rop, dummy1, dummy2, dummy3);
+}
+
+void optate(fp12e_t rop, const twistpoint_fp2_t op1, const curvepoint_fp_t op2)
+{
+  int retone;
+  fp12e_t d;
+  fp12e_setone(d);
+  optate_miller(rop, op1, op2);
+  final_expo(rop);
+  retone  = fp2e_iszero(op1->m_z);
+  retone |= fpe_iszero(op2->m_z);
+  fp12e_cmov(rop, d, retone);
+}

+ 17 - 0
dclxvi-20130329/optate.h

@@ -0,0 +1,17 @@
+/*
+ * File:   dclxvi-20130329/optate.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef OPTATE_H
+#define OPTATE_H
+
+#include "curvepoint_fp.h"
+#include "twistpoint_fp2.h"
+#include "fp12e.h"
+
+void optate(fp12e_t rop, const twistpoint_fp2_t op1, const curvepoint_fp_t op2);
+void optate_miller(fp12e_t rop, const twistpoint_fp2_t op1, const curvepoint_fp_t op2);
+
+#endif

+ 83 - 0
dclxvi-20130329/parameters.c

@@ -0,0 +1,83 @@
+//#ifndef NEW_PARAMETERS
+
+
+
+/*
+ * File:   dclxvi-20130329/parameters.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include "fpe.h"
+#include "fp2e.h"
+#include "fp6e.h"
+#include "fp12e.h"
+#include "curvepoint_fp.h"
+#include "twistpoint_fp2.h"
+#include "scalar.h"
+//#include "scalar_512.h"
+#ifdef __cplusplus
+#define EXTERN extern
+#else
+#define EXTERN 
+#endif
+
+//EXTERN const scalar_t bn_6uplus2 =  {0x1EC817A18A131208ULL,2,0,0};
+#define BN_6UPLUS2_NAFLEN 66
+EXTERN const unsigned long bn_naflen_6uplus2 = BN_6UPLUS2_NAFLEN;
+EXTERN const signed char bn_6uplus2_naf[BN_6UPLUS2_NAFLEN] = {0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, -1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, -1, 0, 1, 0, 0, 0, 1, 0, -1, 0, 0, 0, -1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, -1, 0, -1, 0, 0, 0, 0, 1, 0, 0, 0, 1};
+//EXTERN const scalar_t bn_u =        {0x5BBC1015F02AC17DULL, 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL};
+EXTERN const scalar_t bn_n       = {0x1A2EF45B57AC7261ULL, 0x2E8D8E12F82B3924ULL, 0xAA6FECB86184DC21ULL, 0x8FB501E34AA387F9ULL};
+EXTERN const scalar_t bn_pminus2 = {0x185CAC6C5E089665ULL, 0xEE5B88D120B5B59EULL, 0xAA6FECB86184DC21ULL, 0x8FB501E34AA387F9ULL};
+
+//EXTERN const unsigned long bn_u_bitsize = 63;
+
+EXTERN const double bn_v = 1868033.;
+EXTERN const double bn_v6 = 11208198.;
+const char * bn_pstr = "65000549695646603732796438742359905742825358107623003571877145026864184071783";
+EXTERN const scalar_t bn_v_scalar = {1868033,0,0,0};
+
+EXTERN const fpe_t bn_zeta       = {{{ -5604098, -934016, -934016, 2, 0, 0, -5604096, -934016, -934016, 1, 0, 0}}};  /* zeta   */
+EXTERN const fpe_t bn_zeta2      = {{{ 5604097, 934016, 934016, -2, 0, 0, -5604102, -934016, -934016, 0, 0, 0}}};  /* zeta^2 */
+
+EXTERN const curvepoint_fp_t bn_curvegen = {{{{{1.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.}}}, 
+                                      {{{ -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}},
+                                      {{{1.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.}}},
+                                      {{{0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.,0.}}}}};                                                       
+
+EXTERN const twistpoint_fp2_t bn_twistgen = {{{{{490313, 4260028, -821156, -818020, 106592, -171108, 757738, 545601, 597403,
+                       366066, -270886, -169528, 3101279, 2043941, -726481, 382478, -650880, -891316,
+                       -13923, 327200, -110487, 473555, -7301, 608340}}},
+                                              {{{-4628877, 3279202, 431044, 459682, -606446, -924615, -927454, 90760, 13692,
+                                                                       -225706, -430013, -373196, 3004032, 4097571, 380900, 919715, -640623, -402833,
+                                                                       -729700, -163786, -332478, -440873, 510935, 593941}}},
+                                              {{{1.,0.,0.,0.,0.,0., 0.,0.,0.,0.,0.,0., 0.,0.,0.,0.,0.,0., 0.,0.,0.,0.,0.,0.}}},
+                                              {{{0.,0.,0.,0.,0.,0., 0.,0.,0.,0.,0.,0., 0.,0.,0.,0.,0.,0., 0.,0.,0.,0.,0.,0.}}}}};
+
+EXTERN const fp2e_t bn_z2p          = {{{-3981901, -4468327, 248857, -740622, 900229, -562222,
+                         260246, -632491, -928317, -38527, 838674, 36774, -2702081, 3668149,
+                                                -873042, 304894, 876721, 213663,
+                                                                       562599, -128685, -325465, 518143, 457851, 750024 }}}; /* Z^(2p) */
+EXTERN const fp2e_t bn_z3p          = {{{-1220868, -3662603, -18020, -54060, 771971, 447880,
+                         -925219, -907622, 808438, 557280, -170086, -510257, -548011, -1644029,
+                                                332930, -869243, -918612,
+                                                                       -887802, -656367, -101068, 599384, -69882, -756823, -402435 }}}; /* Z^(3p) */
+
+EXTERN const fp2e_t bn_ypminus1 = {{{-3981901, -4468327, 248857, -740622, 900229,
+                         -562222, 260246, -632491, -928317, -38527, 838674, 36774, -2702081,
+                                                3668149, -873042, 304894, 876721,
+                                                                       213663, 562599, -128685, -325465, 518143, 457851, 750024 }}}; // Y^{p-1} lies in F_{p^2}
+EXTERN const fp2e_t bn_zpminus1 = {{{-127312, 512442, -137362, 859841, -693124, 477483,
+                         -456715, 571378, -391523, 771884, -684646, 729153, 4294836, 3621570,
+                                                -839768, -538090, -213833,
+                                                                       -814642, -240945, -172644, 308331, -116810, 574718, 249147 }}}; // Z^{p-1}, lies in F_{p^2}
+
+EXTERN const fp2e_t bn_ypminus1_squ = {{{1555911, 5331252, -776828, 226463,
+                         691213, -261413, -410662, -394138, -432410, -178831, -475754,
+                                                92316, -5497403, -1697028, 207147, -413437,
+                                                                       -291878, 77064, 214666, 415072, -853656, 644193, 622068, 571473 }}}; // (Y^{p-1})^2 i F_{p^2}
+
+#undef EXTERN
+
+
+//#endif

File diff suppressed because it is too large
+ 40 - 0
dclxvi-20130329/parameters.h


+ 111 - 0
dclxvi-20130329/scalar.c

@@ -0,0 +1,111 @@
+/*
+ * File:   dclxvi-20130329/scalar.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "scalar.h"
+
+#ifndef NEW_PARAMETERS
+
+void scalar_setrandom(scalar_t rop, const scalar_t bound)
+{
+  int i;
+  FILE *urand = fopen("/dev/urandom", "r");
+  if (urand == NULL)
+  {
+    fprintf(stderr, "Could not open device file /dev/urandom");
+    exit(1);
+  }
+  do
+  {
+    for(i=0;i<32;i++) 
+      i[(unsigned char*)rop] = fgetc(urand);
+  }
+  while(!scalar_lt_vartime(rop,bound));
+  fclose(urand);
+}
+
+void scalar_set_lluarray(scalar_t rop, unsigned long long v[4])
+{
+  int i;
+  for(i=0;i<4;i++) rop[i] = v[i];
+}
+
+int scalar_getbit(const scalar_t s, unsigned int pos)
+{
+  assert(pos < 256);
+  return (s[pos >> 6] >> (pos & 0x3f)) & 1;
+}
+
+// Returns the position of the most significant set bit
+int scalar_scanb(const scalar_t s)
+{
+  int i;
+  unsigned int pos = 0;
+  for(i=255;i>0;i--)
+    if(scalar_getbit(s,i) && pos == 0) pos = i;
+  return pos;
+}
+
+int scalar_iszero_vartime(const scalar_t s)
+{
+  return ((s[0] | s[1] | s[2] | s[3]) == 0);
+}
+
+void scalar_window4(signed char r[65], const scalar_t s)
+{
+  char carry;
+  int i;
+  for(i=0;i<16;i++)
+    r[i] = (s[0] >> (4*i)) & 15;
+  for(i=0;i<16;i++)
+    r[i+16] = (s[1] >> (4*i)) & 15;
+  for(i=0;i<16;i++)
+    r[i+32] = (s[2] >> (4*i)) & 15;
+  for(i=0;i<16;i++)
+    r[i+48] = (s[3] >> (4*i)) & 15;
+
+  /* Making it signed */
+  carry = 0;
+  for(i=0;i<64;i++)
+  {
+    r[i] += carry;
+    r[i+1] += r[i] >> 4;
+    r[i] &= 15;
+    carry = r[i] >> 3;
+    r[i] -= carry << 4;
+  }
+  r[64] = carry;
+}
+
+// Returns 1 if a < b, 0 otherwise
+int scalar_lt_vartime(const scalar_t a, const scalar_t b)
+{
+  if(a[3] < b[3]) return 1;
+  if(a[3] > b[3]) return 0;
+  if(a[2] < b[2]) return 1;
+  if(a[2] > b[2]) return 0;
+  if(a[1] < b[1]) return 1;
+  if(a[1] > b[1]) return 0;
+  if(a[0] < b[0]) return 1;
+  if(a[0] > b[0]) return 0;
+  return 0;
+}
+
+void scalar_print(FILE *fh, const scalar_t t)
+{
+  int i;
+  fprintf(fh, "{0x%llx,\t", t[0]);
+  for(i=1;i<=2;i++)
+  {
+	  fprintf(fh, "0x%llx,\t", t[i]);
+  }
+  fprintf(fh, "0x%llx};\n", t[3]);  
+
+}
+
+#endif

+ 45 - 0
dclxvi-20130329/scalar.h

@@ -0,0 +1,45 @@
+/*
+ * File:   dclxvi-20130329/scalar.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef SCALAR_H
+#define SCALAR_H
+#include <stdio.h>
+
+#ifdef __cplusplus
+ extern "C" {
+#endif 
+
+#ifndef NEW_PARAMETERS
+
+typedef unsigned long long scalar_t[4] ;
+
+void scalar_sub_nored(scalar_t r, scalar_t x, scalar_t y);
+
+void scalar_setrandom(scalar_t rop, const scalar_t bound);
+
+
+void scalar_set_lluarray(scalar_t rop, unsigned long long v[4]);
+
+int scalar_getbit(const scalar_t s, unsigned int pos);
+
+// Returns the position of the most significant set bit
+int scalar_scanb(const scalar_t s);
+
+int scalar_iszero_vartime(const scalar_t s);
+
+void scalar_window4(signed char r[64], const scalar_t s);
+
+int scalar_lt_vartime(const scalar_t a, const scalar_t b);
+
+void scalar_print(FILE *fh, const scalar_t t);
+
+#endif
+
+#ifdef __cplusplus
+ }
+#endif 
+
+#endif

+ 146 - 0
dclxvi-20130329/scalar_sub_nored.s

@@ -0,0 +1,146 @@
+# File:   dclxvi-20130329/scalar_sub_nored.s
+# Author: Ruben Niederhagen, Peter Schwabe
+# Public Domain
+
+
+# qhasm: int64 rp
+
+# qhasm: int64 xp
+
+# qhasm: int64 yp
+
+# qhasm: input rp
+
+# qhasm: input xp
+
+# qhasm: input yp
+
+# qhasm: int64 r0
+
+# qhasm: int64 r1
+
+# qhasm: int64 r2
+
+# qhasm: int64 r3
+
+# qhasm: int64 t0
+
+# qhasm: int64 t1
+
+# qhasm: int64 t2
+
+# qhasm: int64 t3
+
+# qhasm:   int64 caller1
+
+# qhasm:   int64 caller2
+
+# qhasm:   int64 caller3
+
+# qhasm:   int64 caller4
+
+# qhasm:   int64 caller5
+
+# qhasm:   int64 caller6
+
+# qhasm:   int64 caller7
+
+# qhasm:   caller caller1
+
+# qhasm:   caller caller2
+
+# qhasm:   caller caller3
+
+# qhasm:   caller caller4
+
+# qhasm:   caller caller5
+
+# qhasm:   caller caller6
+
+# qhasm:   caller caller7
+
+# qhasm:   stack64 caller4_stack
+
+# qhasm:   stack64 caller5_stack
+
+# qhasm:   stack64 caller6_stack
+
+# qhasm:   stack64 caller7_stack
+
+# qhasm: enter scalar_sub_nored
+.text
+.p2align 5
+.globl _scalar_sub_nored
+.globl scalar_sub_nored
+_scalar_sub_nored:
+scalar_sub_nored:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: r0 = *(uint64 *)(xp +  0)
+# asm 1: movq   0(<xp=int64#2),>r0=int64#4
+# asm 2: movq   0(<xp=%rsi),>r0=%rcx
+movq   0(%rsi),%rcx
+
+# qhasm: r1 = *(uint64 *)(xp +  8)
+# asm 1: movq   8(<xp=int64#2),>r1=int64#5
+# asm 2: movq   8(<xp=%rsi),>r1=%r8
+movq   8(%rsi),%r8
+
+# qhasm: r2 = *(uint64 *)(xp + 16)
+# asm 1: movq   16(<xp=int64#2),>r2=int64#6
+# asm 2: movq   16(<xp=%rsi),>r2=%r9
+movq   16(%rsi),%r9
+
+# qhasm: r3 = *(uint64 *)(xp + 24)
+# asm 1: movq   24(<xp=int64#2),>r3=int64#2
+# asm 2: movq   24(<xp=%rsi),>r3=%rsi
+movq   24(%rsi),%rsi
+
+# qhasm: carry? r0 -= *(uint64 *)(yp +  0)
+# asm 1: subq 0(<yp=int64#3),<r0=int64#4
+# asm 2: subq 0(<yp=%rdx),<r0=%rcx
+subq 0(%rdx),%rcx
+
+# qhasm: carry? r1 -= *(uint64 *)(yp +  8) - carry
+# asm 1: sbbq 8(<yp=int64#3),<r1=int64#5
+# asm 2: sbbq 8(<yp=%rdx),<r1=%r8
+sbbq 8(%rdx),%r8
+
+# qhasm: carry? r2 -= *(uint64 *)(yp + 16) - carry
+# asm 1: sbbq 16(<yp=int64#3),<r2=int64#6
+# asm 2: sbbq 16(<yp=%rdx),<r2=%r9
+sbbq 16(%rdx),%r9
+
+# qhasm: r3 -= *(uint64 *)(yp + 24) - carry
+# asm 1: sbbq 24(<yp=int64#3),<r3=int64#2
+# asm 2: sbbq 24(<yp=%rdx),<r3=%rsi
+sbbq 24(%rdx),%rsi
+
+# qhasm: *(uint64 *)(rp +  0) = r0
+# asm 1: movq   <r0=int64#4,0(<rp=int64#1)
+# asm 2: movq   <r0=%rcx,0(<rp=%rdi)
+movq   %rcx,0(%rdi)
+
+# qhasm: *(uint64 *)(rp +  8) = r1
+# asm 1: movq   <r1=int64#5,8(<rp=int64#1)
+# asm 2: movq   <r1=%r8,8(<rp=%rdi)
+movq   %r8,8(%rdi)
+
+# qhasm: *(uint64 *)(rp + 16) = r2
+# asm 1: movq   <r2=int64#6,16(<rp=int64#1)
+# asm 2: movq   <r2=%r9,16(<rp=%rdi)
+movq   %r9,16(%rdi)
+
+# qhasm: *(uint64 *)(rp + 24) = r3
+# asm 1: movq   <r3=int64#2,24(<rp=int64#1)
+# asm 2: movq   <r3=%rsi,24(<rp=%rdi)
+movq   %rsi,24(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+ret

+ 233 - 0
dclxvi-20130329/speedtest.c

@@ -0,0 +1,233 @@
+/*
+ * File:   dclxvi-20130329/speedtest.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "curvepoint_fp.h"
+#include "twistpoint_fp2.h"
+#include "fp12e.h"
+#include "optate.h"
+#include "linefunction.h"
+
+#define NTESTS 20
+
+#define REP 50
+
+#ifdef __x86_64__
+#define mycpucycles(RES) \
+  __asm__ volatile("rdtsc;shlq $32,%%rdx;orq %%rdx,%%rax" : "=a" (RES) ::  "%rdx");
+#else
+#define mycpucycles(RES) \
+  __asm__ volatile(".byte 15;.byte 49" : "=A" (RES));
+#endif
+
+extern const curvepoint_fp_t bn_curvegen;
+extern const twistpoint_fp2_t bn_twistgen;
+extern const scalar_t bn_n;
+
+/*
+static int cmp_llu(const void *a, const void*b)
+{
+  if(*(unsigned long long *)a < *(unsigned long long *)b) return -1;
+  if(*(unsigned long long *)a > *(unsigned long long *)b) return 1;
+  return 0;
+}
+
+
+static unsigned long long median(unsigned long long *l, size_t llen)
+{
+  qsort(l,llen,sizeof(unsigned long long),cmp_llu);
+
+  if(llen%2) return l[llen/2];
+  else return (l[llen/2-1]+l[llen/2])/2;
+}
+*/
+
+static void print_bench(unsigned long long *l, size_t llen)
+{
+  size_t i;
+  for(i=0;i<llen-1;i++)
+  {
+    l[i] = l[i+1]-l[i];
+    printf("%llu", l[i]/REP);
+    if(i < llen-2) printf(" ");
+  }
+
+  printf("\n");
+}
+
+
+fp12e_t e1;
+curvepoint_fp_t p1;
+twistpoint_fp2_t p2;
+twistpoint_fp2_t p3;
+twistpoint_fp2_t rq;
+
+fp2e_t rop11, rop12, rop13, r2;
+fpe_t fpe1;
+
+scalar_t s1, s2, s3;
+
+unsigned long long t[NTESTS+1];
+
+int main(int argc, char* argv[])
+{
+  int i, j;
+  int choose;
+
+  if(argc >= 2) choose = atoi(argv[1]);
+  else choose = 0;
+
+  scalar_setrandom(s1, bn_n);
+  scalar_setrandom(s2, bn_n);
+  scalar_setrandom(s3, bn_n);
+
+  curvepoint_fp_scalarmult_vartime(p1, bn_curvegen, s1);
+  curvepoint_fp_makeaffine(p1);
+  twistpoint_fp2_scalarmult_vartime(p2, bn_twistgen, s2);
+  twistpoint_fp2_makeaffine(p2);
+  twistpoint_fp2_scalarmult_vartime(p3, bn_twistgen, s3);
+  twistpoint_fp2_makeaffine(p3);
+  fp2e_setone(rop11);
+  fp2e_setone(rop12);
+  fp2e_setone(rop13);
+  fp2e_setone(r2);
+  fpe_setone(fpe1); 
+  fp12e_setone(e1);
+
+  if(!choose || choose == 1)
+  {
+    printf("Fp2 multiplication: ");
+    for(i=0;i<NTESTS+1;i++)
+    {
+      mycpucycles(t[i]);
+      for(j=0;j<REP;j++) fp2e_mul(r2, rop11, rop12);
+    }
+    print_bench(t,NTESTS+1);
+  }
+
+  if(!choose || choose == 2)
+  {
+    printf("Fp2 squaring: ");
+    for(i=0;i<NTESTS+1;i++)
+    {
+      mycpucycles(t[i]);
+      for(j=0;j<REP;j++) fp2e_square(r2, rop11);
+    }
+    print_bench(t,NTESTS+1);
+  }
+
+  if(!choose || choose == 3)
+  {
+    printf("Fp2xFp multiplication: ");
+    for(i=0;i<NTESTS+1;i++)
+    {
+      mycpucycles(t[i]);
+      for(j=0;j<REP;j++) fp2e_mul_fpe(r2, rop11, fpe1);
+    }
+    print_bench(t,NTESTS+1);
+  }
+
+  if(!choose || choose == 4)
+  {
+    printf("Fp2 short coeffred: ");
+    for(i=0;i<NTESTS+1;i++)
+    {
+      mycpucycles(t[i]);
+      for(j=0;j<REP;j++) fp2e_short_coeffred(r2);
+    }
+    print_bench(t,NTESTS+1);
+  }
+  
+  if(!choose || choose == 5)
+  {
+     printf("Linefunction add: ");
+     for(i=0;i<NTESTS+1;i++)
+     {
+     mycpucycles(t[i]);
+     for(j=0;j<REP;j++) linefunction_add_ate(rop11, rop12, rop13, rq, p2, p3, p1, r2);
+     }
+     print_bench(t,NTESTS+1);
+  }
+
+  if(!choose || choose == 6)
+  {
+     printf("Linefunction double: ");
+     for(i=0;i<NTESTS+1;i++)
+     {
+     mycpucycles(t[i]);
+     for(j=0;j<REP;j++) linefunction_double_ate(rop11, rop12, rop13, rq, p2,  p1);
+     }
+     print_bench(t,NTESTS+1);
+  }
+
+  if(!choose || choose == 7)
+  {
+    printf("Fp12 multiplication: ");
+    for(i=0;i<NTESTS+1;i++)
+    {
+      mycpucycles(t[i]);
+      for(j=0;j<REP;j++) fp12e_mul(e1, e1, e1);
+    }
+    print_bench(t,NTESTS+1);
+  }
+
+  if(!choose || choose == 8)
+  {
+    printf("Fp12 squaring: ");
+    for(i=0;i<NTESTS+1;i++)
+    {
+      mycpucycles(t[i]);
+      for(j=0;j<REP;j++) fp12e_square(e1, e1);
+    }
+    print_bench(t,NTESTS+1);
+  }
+
+  if(!choose || choose == 9)
+  {
+    printf("Fp12 linefunction multiplication: ");
+    for(i=0;i<NTESTS+1;i++)
+    {
+      mycpucycles(t[i]);
+      for(j=0;j<REP;j++) fp12e_mul_line(e1, e1, r2, r2, r2);
+    }
+    print_bench(t,NTESTS+1);
+  }
+
+  if(!choose || choose == 10)
+  {
+    printf("Fp12 inversion: ");
+    for(i=0;i<NTESTS+1;i++)
+    {
+      mycpucycles(t[i]);
+      for(j=0;j<REP;j++) fp12e_invert(e1, e1);
+    }
+    print_bench(t,NTESTS+1);
+  }
+
+  if(!choose || choose == 11)
+  {
+    printf("Miller loop: ");
+    for(i=0;i<NTESTS+1;i++)
+    {
+      mycpucycles(t[i]);
+      for(j=0;j<REP;j++) optate_miller(e1, p2, p1);
+    }
+    print_bench(t,NTESTS+1);
+  }
+
+  if(!choose || choose == 12)
+  {
+    printf("Optimal ate pairing: ");
+    for(i=0;i<NTESTS+1;i++)
+    {
+      mycpucycles(t[i]);
+      for(j=0;j<REP;j++) optate(e1, p2, p1);
+    }
+    print_bench(t,NTESTS+1);
+  }
+}

+ 55 - 0
dclxvi-20130329/test_curvepoint_multiscalar.c

@@ -0,0 +1,55 @@
+/*
+ * File:   dclxvi-20130329/test_curvepoint_multiscalar.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include "curvepoint_fp.h"
+#include "curvepoint_fp_multiscalar.h"
+#include "parameters.h"
+
+extern const scalar_t bn_n;
+extern const curvepoint_fp_t bn_curvegen;
+
+#define MAXBATCH 500
+
+int main()
+{
+  struct curvepoint_fp_struct p[MAXBATCH];
+  curvepoint_fp_t r1, r2, t;
+  scalar_t s[MAXBATCH];
+  int i,batch;
+  for(batch=1;batch<MAXBATCH;batch++)
+  {
+    printf("batch: %d\n",batch);
+
+    for(i=0;i<batch;i++)
+    {
+      scalar_setrandom(s[i],bn_n);
+      curvepoint_fp_set(&p[i],bn_curvegen);
+      fpe_isreduced(p[i].m_x);
+      fpe_isreduced(p[i].m_y);
+      curvepoint_fp_scalarmult_vartime(&p[i],&p[i],s[i]);
+    }
+
+    curvepoint_fp_setneutral(r2);
+    for(i=0;i<batch;i++)
+    {
+      scalar_setrandom(s[i],bn_n);
+      curvepoint_fp_scalarmult_vartime(t,&p[i],s[i]);
+      curvepoint_fp_add_vartime(r2,r2,t);
+    }
+
+    curvepoint_fp_multiscalarmult_vartime(r1,p,s,batch);
+
+    curvepoint_fp_makeaffine(r1);
+    curvepoint_fp_makeaffine(r2);
+    if(!fpe_iseq(r1->m_x, r2->m_x) || !fpe_iseq(r1->m_y, r2->m_y))
+    {
+      printf("error\n");
+      return -1;
+    }
+  }
+  return 0;
+}

+ 55 - 0
dclxvi-20130329/test_twistpoint_multiscalar.c

@@ -0,0 +1,55 @@
+/*
+ * File:   dclxvi-20130329/test_twistpoint_multiscalar.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include "twistpoint_fp2.h"
+#include "twistpoint_fp2_multiscalar.h"
+#include "parameters.h"
+
+extern const scalar_t bn_n;
+extern const twistpoint_fp2_t bn_curvegen;
+
+#define MAXBATCH 500
+
+int main()
+{
+  struct twistpoint_fp2_struct p[MAXBATCH];
+  twistpoint_fp2_t r1, r2, t;
+  scalar_t s[MAXBATCH];
+  int i,batch;
+  for(batch=1;batch<MAXBATCH;batch++)
+  {
+    printf("batch: %d\n",batch);
+
+    for(i=0;i<batch;i++)
+    {
+      scalar_setrandom(s[i],bn_n);
+      twistpoint_fp2_set(&p[i],bn_curvegen);
+      fp2e_isreduced(p[i].m_x);
+      fp2e_isreduced(p[i].m_y);
+      twistpoint_fp2_scalarmult_vartime(&p[i],&p[i],s[i]);
+    }
+
+    twistpoint_fp2_setneutral(r2);
+    for(i=0;i<batch;i++)
+    {
+      scalar_setrandom(s[i],bn_n);
+      twistpoint_fp2_scalarmult_vartime(t,&p[i],s[i]);
+      twistpoint_fp2_add_vartime(r2,r2,t);
+    }
+
+    twistpoint_fp2_multiscalarmult_vartime(r1,p,s,batch);
+
+    twistpoint_fp2_makeaffine(r1);
+    twistpoint_fp2_makeaffine(r2);
+    if(!fp2e_iseq(r1->m_x, r2->m_x) || !fp2e_iseq(r1->m_y, r2->m_y))
+    {
+      printf("error\n");
+      return -1;
+    }
+  }
+  return 0;
+}

+ 401 - 0
dclxvi-20130329/twistpoint_fp2.c

@@ -0,0 +1,401 @@
+/*
+ * File:   dclxvi-20130329/twistpoint_fp2.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "mydouble.h"
+extern "C" {	
+#include "fpe.h"
+#include "fp2e.h"
+} 
+#include "twistpoint_fp2.h"
+//#include "scalar_512.h"
+
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+//            Point initialization and deletion functions
+//////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+// Global dummies usable by all curvepoints:
+
+// Set the coordinates of a twistpoint_fp2_t by copying the coordinates from another twistpoint_fp2
+void twistpoint_fp2_set(twistpoint_fp2_t rop, const twistpoint_fp2_t op)
+{
+	fp2e_set(rop->m_x, op->m_x);
+	fp2e_set(rop->m_y, op->m_y);
+	fp2e_set(rop->m_z, op->m_z);
+	fp2e_setzero(rop->m_t);
+}
+
+void twistpoint_fp2_setneutral(twistpoint_fp2_t rop)
+{
+	fp2e_setone(rop->m_x);
+	fp2e_setone(rop->m_y);
+	fp2e_setzero(rop->m_z);
+	fp2e_setzero(rop->m_t);
+}
+
+// Addition of two points, op2 is assumed to be in affine coordinates 
+// For the algorithm see e.g. DA Peter Schwabe
+/*
+void twistpoint_fp2_mixadd(twistpoint_fp2_t rop, const twistpoint_fp2_t op1, const twistpoint_fp2_t op2)
+{
+	fp2e_t tfpe1, tfpe2, tfpe3, tfpe4, tfpe5, tfpe6, tfpe7, tfpe8, tfpe9; // Temporary variables needed for intermediary results
+	fp2e_square(tfpe1, op1->m_z);
+	fp2e_mul(tfpe2, op1->m_z, tfpe1);
+	fp2e_mul(tfpe3, op2->m_x, tfpe1);
+	fp2e_mul(tfpe4, op2->m_y, tfpe2);
+	fp2e_sub(tfpe5, tfpe3, op1->m_x);
+  fp2e_short_coeffred(tfpe5);
+	fp2e_sub(tfpe6, tfpe4, op1->m_y);
+	fp2e_square(tfpe7, tfpe5);
+	fp2e_mul(tfpe8, tfpe7, tfpe5);
+	fp2e_mul(tfpe9, op1->m_x, tfpe7);
+
+	fp2e_double(tfpe1, tfpe9);
+	fp2e_add(tfpe1, tfpe1, tfpe8);
+	fp2e_square(rop->m_x, tfpe6);
+	fp2e_sub(rop->m_x, rop->m_x, tfpe1);
+  fp2e_short_coeffred(rop->m_x);
+	fp2e_sub(tfpe1, tfpe9, rop->m_x);
+	fp2e_mul(tfpe2, tfpe1, tfpe6);
+	fp2e_mul(tfpe3, op1->m_y, tfpe8);
+	fp2e_sub(rop->m_y, tfpe2, tfpe3);
+  fp2e_short_coeffred(rop->m_y);
+	fp2e_mul(rop->m_z, op1->m_z, tfpe5);
+}
+*/
+
+#ifndef COMPLETE_ADDITION
+
+void twistpoint_fp2_double(twistpoint_fp2_t rop, const twistpoint_fp2_t op)
+{
+	fp2e_t tfpe1, tfpe2, tfpe3, tfpe4; // Temporary variables needed for intermediary results
+	fp2e_square(tfpe1, op->m_y);
+	fp2e_mul(tfpe2, tfpe1, op->m_x);
+	fp2e_double(tfpe2, tfpe2);
+	fp2e_double(tfpe2, tfpe2);
+	fp2e_square(tfpe3, tfpe1);
+	fp2e_double(tfpe3, tfpe3);
+	fp2e_double(tfpe3, tfpe3);
+	fp2e_double(tfpe3, tfpe3);
+	fp2e_square(tfpe4, op->m_x);
+	fp2e_triple(tfpe4, tfpe4);
+  fp2e_short_coeffred(tfpe4);
+	fp2e_square(rop->m_x, tfpe4);
+	fp2e_double(tfpe1, tfpe2);
+	fp2e_sub(rop->m_x, rop->m_x, tfpe1);
+  fp2e_short_coeffred(rop->m_x);
+	fp2e_sub(tfpe1, tfpe2, rop->m_x);
+  fp2e_short_coeffred(tfpe1);
+	fp2e_mul(rop->m_z, op->m_y, op->m_z);
+	fp2e_double(rop->m_z, rop->m_z);
+	fp2e_mul(rop->m_y, tfpe4, tfpe1);
+	fp2e_sub(rop->m_y, rop->m_y, tfpe3);
+  fp2e_short_coeffred(rop->m_y);
+}
+
+
+
+
+void twistpoint_fp2_add_vartime(twistpoint_fp2_t rop, const twistpoint_fp2_t op1, const twistpoint_fp2_t op2)
+{
+  if(fp2e_iszero(op1->m_z))
+    twistpoint_fp2_set(rop,op2);
+  else if(fp2e_iszero(op2->m_z))
+    twistpoint_fp2_set(rop,op1);
+  else
+  {
+    //See http://www.hyperelliptic.org/EFD/g1p/auto-code/shortw/jacobian-0/addition/add-2007-bl.op3
+    fp2e_t z1z1, z2z2, r, v, s1, s2, u1, u2, h, i, j, t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14;
+    //Z1Z1 = Z1^2
+    fp2e_square(z1z1, op1->m_z);
+    //Z2Z2 = Z2^2
+    fp2e_square(z2z2, op2->m_z);
+    //U1 = X1*Z2Z2
+    fp2e_mul(u1, op1->m_x, z2z2);
+    //U2 = X2*Z1Z1
+    fp2e_mul(u2, op2->m_x, z1z1);
+    //t0 = Z2*Z2Z2
+    fp2e_mul(t0, op2->m_z, z2z2);
+    //S1 = Y1*t0
+    fp2e_mul(s1,op1->m_y,t0);
+    //t1 = Z1*Z1Z1
+    fp2e_mul(t1,op1->m_z, z1z1);
+    //S2 = Y2*t1
+    fp2e_mul(s2,op2->m_y,t1);
+    if(fp2e_iseq(u1,u2))
+    {
+      if(fp2e_iseq(s1,s2))
+        twistpoint_fp2_double(rop,op1);
+      else
+        twistpoint_fp2_setneutral(rop);
+    }
+    //H = U2-U1
+    fp2e_sub(h,u2,u1);
+    //t2 = 2*H
+    fp2e_add(t2, h, h);
+    //I = t2^2
+    fp2e_short_coeffred(t2);
+    fp2e_square(i,t2);
+    //J = H*I
+    fp2e_mul(j,h,i);
+    //t3 = S2-S1
+    fp2e_sub(t3,s2,s1);
+    //r = 2*t3
+    fp2e_add(r,t3,t3);
+    //V = U1*I
+    fp2e_mul(v,u1,i);
+    //t4 = r^2
+    fp2e_short_coeffred(r);
+    fp2e_square(t4,r);
+    //t5 = 2*V
+    fp2e_add(t5,v,v);
+    //t6 = t4-J
+    fp2e_sub(t6,t4,j);
+    //X3 = t6-t5
+    fp2e_sub(rop->m_x,t6,t5);
+    fp2e_short_coeffred(rop->m_x);
+    //t7 = V-X3
+    fp2e_sub(t7,v,rop->m_x);
+    //t8 = S1*J
+    fp2e_mul(t8,s1,j);
+    //t9 = 2*t8
+    fp2e_add(t9,t8,t8);
+    //t10 = r*t7
+    fp2e_mul(t10,r,t7);
+    //Y3 = t10-t9
+    fp2e_sub(rop->m_y,t10,t9);
+    fp2e_short_coeffred(rop->m_y);
+    //t11 = Z1+Z2
+    fp2e_add(t11,op1->m_z,op2->m_z);
+    //t12 = t11^2
+    fp2e_short_coeffred(t11);
+    fp2e_square(t12,t11);
+    //t13 = t12-Z1Z1
+    fp2e_sub(t13,t12,z1z1);
+    //t14 = t13-Z2Z2
+    fp2e_sub(t14,t13,z2z2);
+    //Z3 = t14*H
+    fp2e_short_coeffred(t14);
+    fp2e_mul(rop->m_z,t14,h);
+    fp2e_short_coeffred(rop->m_z);
+  }
+}
+
+
+
+// Transform Jacobian to Affine Coordinates (z=1)
+void twistpoint_fp2_makeaffine(twistpoint_fp2_t point)
+{
+	//signature;
+	if (!fp2e_iszero(point->m_z))
+	{	
+		fp2e_t tfpe1;
+		fp2e_invert(tfpe1, point->m_z);
+		fp2e_mul(point->m_x, point->m_x, tfpe1);
+		fp2e_mul(point->m_x, point->m_x, tfpe1);
+
+		fp2e_mul(point->m_y, point->m_y, tfpe1);
+		fp2e_mul(point->m_y, point->m_y, tfpe1);
+		fp2e_mul(point->m_y, point->m_y, tfpe1);
+
+		fp2e_setone(point->m_z);
+	}
+}
+
+#endif
+static void twistpoint_fp2_add_nocheck(twistpoint_fp2_t rop, const twistpoint_fp2_t op1, const twistpoint_fp2_t op2)
+{
+  //See http://www.hyperelliptic.org/EFD/g1p/auto-code/shortw/jacobian-0/addition/add-2007-bl.op3
+  fp2e_t z1z1, z2z2, r, v, s1, s2, u1, u2, h, i, j, t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14;
+  //Z1Z1 = Z1^2
+  fp2e_square(z1z1, op1->m_z);
+  //Z2Z2 = Z2^2
+  fp2e_square(z2z2, op2->m_z);
+  //U1 = X1*Z2Z2
+  fp2e_mul(u1, op1->m_x, z2z2);
+  //U2 = X2*Z1Z1
+  fp2e_mul(u2, op2->m_x, z1z1);
+  //t0 = Z2*Z2Z2
+  fp2e_mul(t0, op2->m_z, z2z2);
+  //S1 = Y1*t0
+  fp2e_mul(s1,op1->m_y,t0);
+  //t1 = Z1*Z1Z1
+  fp2e_mul(t1,op1->m_z, z1z1);
+  //S2 = Y2*t1
+  fp2e_mul(s2,op2->m_y,t1);
+  //H = U2-U1
+  fp2e_sub(h,u2,u1);
+  //t2 = 2*H
+  fp2e_add(t2, h, h);
+  //I = t2^2
+  fp2e_short_coeffred(t2);
+  fp2e_square(i,t2);
+  //J = H*I
+  fp2e_mul(j,h,i);
+  //t3 = S2-S1
+  fp2e_sub(t3,s2,s1);
+  //r = 2*t3
+  fp2e_add(r,t3,t3);
+  //V = U1*I
+  fp2e_mul(v,u1,i);
+  //t4 = r^2
+  fp2e_short_coeffred(r);
+  fp2e_square(t4,r);
+  //t5 = 2*V
+  fp2e_add(t5,v,v);
+  //t6 = t4-J
+  fp2e_sub(t6,t4,j);
+  //X3 = t6-t5
+  fp2e_sub(rop->m_x,t6,t5);
+  fp2e_short_coeffred(rop->m_x);
+  //t7 = V-X3
+  fp2e_sub(t7,v,rop->m_x);
+  //t8 = S1*J
+  fp2e_mul(t8,s1,j);
+  //t9 = 2*t8
+  fp2e_add(t9,t8,t8);
+  //t10 = r*t7
+  fp2e_mul(t10,r,t7);
+  //Y3 = t10-t9
+  fp2e_sub(rop->m_y,t10,t9);
+  fp2e_short_coeffred(rop->m_y);
+  //t11 = Z1+Z2
+  fp2e_add(t11,op1->m_z,op2->m_z);
+  //t12 = t11^2
+  fp2e_short_coeffred(t11);
+  fp2e_square(t12,t11);
+  //t13 = t12-Z1Z1
+  fp2e_sub(t13,t12,z1z1);
+  //t14 = t13-Z2Z2
+  fp2e_sub(t14,t13,z2z2);
+  //Z3 = t14*H
+  fp2e_short_coeffred(h);
+  fp2e_mul(rop->m_z,t14,h);
+  fp2e_short_coeffred(rop->m_z);
+}
+
+/*
+void twistpoint_fp2_scalarmult_vartime_old(twistpoint_fp2_t rop, const twistpoint_fp2_t op, const scalar_t scalar, const unsigned int scalar_bitsize)
+{
+	size_t i;
+	twistpoint_fp2_t r;
+	twistpoint_fp2_set(r, op);
+	for(i = scalar_bitsize-1; i > 0; i--)
+	{
+		twistpoint_fp2_double(r, r);
+		if(scalar_getbit(scalar, i - 1)) 
+			twistpoint_fp2_mixadd(r, r, op);
+	}
+	twistpoint_fp2_set(rop, r);
+}
+*/
+
+static void choose_t(twistpoint_fp2_t t, struct twistpoint_fp2_struct *pre, signed char b)
+{
+  if(b>0)
+    *t = pre[b-1];
+  else 
+  {
+    *t = pre[-b-1];
+    twistpoint_fp2_neg(t,t);
+  }
+}
+
+void twistpoint_fp2_scalarmult_vartime(twistpoint_fp2_t rop, const twistpoint_fp2_t op, const scalar_t scalar)
+{
+  signed char s[65]; 
+  int i; 
+  twistpoint_fp2_t t;
+  struct twistpoint_fp2_struct pre[8];
+  scalar_window4(s,scalar);
+  /*
+  for(i=0;i<64;i++)
+    printf("%d ",s[i]);
+  printf("\n");
+  */
+  
+  pre[0] = *op;                                          //  P 
+  twistpoint_fp2_double(&pre[1], &pre[0]);               // 2P
+  twistpoint_fp2_add_vartime(&pre[2], &pre[0], &pre[1]); // 3P
+  twistpoint_fp2_double(&pre[3], &pre[1]);               // 4P
+  twistpoint_fp2_add_vartime(&pre[4], &pre[0], &pre[3]); // 5P
+  twistpoint_fp2_double(&pre[5], &pre[2]);               // 6P
+  twistpoint_fp2_add_vartime(&pre[6], &pre[0], &pre[5]); // 7P
+  twistpoint_fp2_double(&pre[7], &pre[3]);               // 8P
+
+  i = 64; 
+  while(!s[i]&&i>0) i--;
+
+  if(!s[i]) 
+    twistpoint_fp2_setneutral(rop);
+  else
+  {
+    choose_t(rop,pre,s[i]);
+    i--;
+    for(;i>=0;i--)
+    {
+      twistpoint_fp2_double(rop, rop);
+      twistpoint_fp2_double(rop, rop);
+      twistpoint_fp2_double(rop, rop);
+      twistpoint_fp2_double(rop, rop);
+      if(s[i])
+      {
+        choose_t(t,pre,s[i]);
+        twistpoint_fp2_add_vartime(rop,rop,t);
+      }
+    }
+  }
+}
+
+// Negate a point, store in rop:
+void twistpoint_fp2_neg(twistpoint_fp2_t rop, const twistpoint_fp2_t op)
+{
+	if (fp2e_iszero(op->m_z))
+	{
+		    twistpoint_fp2_set(rop,op);
+	}
+	else
+	{
+		fp2e_t tfpe1;
+		fp2e_neg(tfpe1, op->m_y);
+		fp2e_set(rop->m_x, op->m_x);
+		fp2e_set(rop->m_y, tfpe1);
+		fp2e_set(rop->m_z, op->m_z);
+	}
+}
+
+void twistpoint_fp2_set_fp2e(twistpoint_fp2_t rop, const fp2e_t x, const fp2e_t y, const fp2e_t z)
+{
+	fp2e_set(rop->m_x, x);
+	fp2e_set(rop->m_y, y);
+	fp2e_set(rop->m_z, z);
+  fp2e_setzero(rop->m_t);
+}
+
+void twistpoint_fp2_affineset_fp2e(twistpoint_fp2_t rop, const fp2e_t x, const fp2e_t y)
+{
+	fp2e_set(rop->m_x, x);
+	fp2e_set(rop->m_y, y);
+	fp2e_setone(rop->m_z);
+  fp2e_setzero(rop->m_t);
+}
+
+
+
+// Print a point:
+void twistpoint_fp2_print(FILE *outfile, const twistpoint_fp2_t point)
+{
+	fprintf(outfile, "______________Twist______________\n\nX = ");
+	fp2e_print(outfile, point->m_x);
+	fprintf(outfile, "\n\nY = ");
+	fp2e_print(outfile, point->m_y);
+	fprintf(outfile, "\n\nZ = ");
+	fp2e_print(outfile, point->m_z);
+	fprintf(outfile, "\n_________________________________\n");
+}
+

+ 50 - 0
dclxvi-20130329/twistpoint_fp2.h

@@ -0,0 +1,50 @@
+/*
+ * File:   dclxvi-20130329/twistpoint_fp2.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef TWISTPOINT_FP2_H
+#define TWISTPOINT_FP2_H
+
+#include "fp2e.h"
+#ifdef NEW_PARAMETERS
+#include "scalar_512.h"
+#else
+#include "scalar.h"
+#endif
+
+typedef struct twistpoint_fp2_struct twistpoint_fp2_struct_t;
+
+struct twistpoint_fp2_struct
+{	
+	fp2e_t m_x; // X-Coordinate (Jacobian Coordinate system)
+	fp2e_t m_y; // Y-Coordinate (Jacobian Coordinate system)
+	fp2e_t m_z; // Z-Coordinate (Jacobian Coordinate system)
+	fp2e_t m_t; // T = Z^2, only used during pairing computation, set to zero if not set
+};
+
+typedef twistpoint_fp2_struct_t twistpoint_fp2_t[1];
+
+void twistpoint_fp2_set(twistpoint_fp2_t rop, const twistpoint_fp2_t op);
+
+void twistpoint_fp2_setneutral(twistpoint_fp2_t rop);
+
+void twistpoint_fp2_neg(twistpoint_fp2_t rop, const twistpoint_fp2_t op);
+
+void twistpoint_fp2_set_fp2e(twistpoint_fp2_t rop, const fp2e_t x, const fp2e_t y, const fp2e_t z);
+
+void twistpoint_fp2_affineset_fp2e(twistpoint_fp2_t rop, const fp2e_t x, const fp2e_t y);
+
+void twistpoint_fp2_add_vartime(twistpoint_fp2_t rop, const twistpoint_fp2_t op1, const twistpoint_fp2_t op2);
+
+void twistpoint_fp2_double(twistpoint_fp2_t rop, const twistpoint_fp2_t op);
+
+void twistpoint_fp2_scalarmult_vartime(twistpoint_fp2_t rop, const twistpoint_fp2_t op, const scalar_t scalar);
+
+void twistpoint_fp2_print(FILE *outfile, const twistpoint_fp2_t op);
+
+// Transform to Affine Coordinates (z=1)
+void twistpoint_fp2_makeaffine(twistpoint_fp2_t op);
+
+#endif // ifdef TWISTPOINT_FP2_H

+ 64 - 0
dclxvi-20130329/twistpoint_fp2_multiscalar.c

@@ -0,0 +1,64 @@
+/*
+ * File:   dclxvi-20130329/twistpoint_fp2_multiscalar.c
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#include "twistpoint_fp2.h"
+#include "scalar.h"
+#include "index_heap.h"
+
+#define MAX_HEAP_SIZE 63 // Can also try 127 or 255
+void twistpoint_fp2_multiscalarmult_vartime(twistpoint_fp2_t rop, struct twistpoint_fp2_struct *p, scalar_t *s, const unsigned long long npoints)
+{
+  unsigned long long pos[MAX_HEAP_SIZE];
+  unsigned long long max1, max2,i;
+  twistpoint_fp2_t t;
+  unsigned long long tctr, ctr = npoints;
+
+  twistpoint_fp2_setneutral(rop);
+  while(ctr>=MAX_HEAP_SIZE)
+  {
+    heap_init(pos, MAX_HEAP_SIZE, s);
+    for(i=0;;i++)
+    {
+      heap_get2max(pos, &max1, &max2, s);
+      if(scalar_iszero_vartime(s[max2])) break;
+      scalar_sub_nored(s[max1],s[max1],s[max2]);
+      twistpoint_fp2_add_vartime(&p[max2],&p[max2],&p[max1]);
+      heap_rootreplaced(pos, MAX_HEAP_SIZE, s);
+    }
+    twistpoint_fp2_scalarmult_vartime(t, &p[max1], s[max1]);
+    twistpoint_fp2_add_vartime(rop,rop,t);
+    p += MAX_HEAP_SIZE;
+    s += MAX_HEAP_SIZE;
+    ctr -= MAX_HEAP_SIZE;
+  }
+  if(ctr > 5)
+  {
+    tctr = (ctr-1)|1; // need an odd heap size 
+    heap_init(pos, tctr, s);
+    for(i=0;;i++)
+    {
+      heap_get2max(pos, &max1, &max2, s);
+      if(scalar_iszero_vartime(s[max2])) break;
+      scalar_sub_nored(s[max1],s[max1],s[max2]);
+      twistpoint_fp2_add_vartime(&p[max2],&p[max2],&p[max1]);
+      heap_rootreplaced(pos, tctr, s);
+    }
+    twistpoint_fp2_scalarmult_vartime(t, &p[max1], s[max1]);
+    twistpoint_fp2_add_vartime(rop,rop,t);
+    p += tctr;
+    s += tctr;
+    ctr -= tctr;
+  }
+  while(ctr>0)
+  {
+    twistpoint_fp2_scalarmult_vartime(t,p,*s);
+    twistpoint_fp2_add_vartime(rop,rop,t);
+    p++;
+    s++;
+    ctr--;
+  }
+}
+

+ 14 - 0
dclxvi-20130329/twistpoint_fp2_multiscalar.h

@@ -0,0 +1,14 @@
+/*
+ * File:   dclxvi-20130329/twistpoint_fp2_multiscalar.h
+ * Author: Ruben Niederhagen, Peter Schwabe
+ * Public Domain
+ */
+
+#ifndef TWISTPOINT_FP2_MULTISCALAR_H
+#define TWISTPOINT_FP2_MULTISCALAR_H
+#include "twistpoint_fp2.h"
+#include "scalar.h"
+
+void twistpoint_fp2_multiscalarmult_vartime(twistpoint_fp2_t rop, struct twistpoint_fp2_struct *p, scalar_t *s, const unsigned long long npoints);
+
+#endif

BIN
doc/bgnfcf.pdf


+ 2 - 0
doc/more_doc

@@ -0,0 +1,2 @@
+For further documentation, type "make doc" in src directory.
+Requirements: doxygen, firefox

File diff suppressed because it is too large
+ 27 - 0
gengetopt/option.c


+ 36 - 0
gengetopt/option.ggo

@@ -0,0 +1,36 @@
+# gengetopt < ../gengetopt/option.ggo  --output-dir=../gengetopt --file-name=option --unamed-opts
+
+package "BGNFCF" 
+
+version "(version 2)"
+
+purpose 
+
+"
+
+Ce programme a pour objectif de démontrer la faisabilité d'un traitement cohérent sur des données chiffrées, avec la cryptographie homomorphe. Il s'agit d'une preuve de concept. Les données traitées sont des sous-titres chiffrés avec le schéma homomorphe BGNFCF. Il s'agit d'une variante du schéma de Boneh Goh Nissim (2006) ayant subi la conversion de Freeman (2010) ainsi que la transformation de Catalano et Fiore (2015). Le traitement opéré sur les sous-titres est le changement de casse des caractères. Il s'opère en évaluant un circuit arithmétique de profondeur multiplicative 2.
+
+Description des circuits :
+
+minmaj : passage minuscule majuscule, sans erreur, avec précalcul et postcalcul
+demo : minmaj sur un caractère avec affichage des chiffrés et de l'évalué
+minmaj2 : passage minuscule majuscule, avec erreur
+minmaj3 : passage minuscule majuscule, avec erreur et postcalcul
+size : donne la taille en octets des chiffrés de différents niveaux
+time : donne le temps en millisecondes du précalcul, calcul et postcalcul
+time2 : donne le temps en millisecondes du chiffrement, de l'évaluation et du déchiffrement
+addition et multiplication : teste les différentes opérations possibles et donne le temps pour chacune
+chiffrement : chiffre et déchiffre en mode curve et twist sans évaluation homomorphique
+ip : mise à jour des ips et du compteur selon la valeur du compteur 
+scalar_product : calcul du produit scalaire de vecteur de bits
+
+La version 2 effectue du calcul parallèle avec OpenMP. Le temps de calcul est diminué si l'on dispose de processeurs multi-coeurs.
+
+
+"
+     
+#option "subtitle_source" s "Les sous-titres peuvent être choisis par le développeur ou l'utilisateur" values="dev", "usr" enum default="dev" optional
+
+option "subtitle" s "Pour indiquer les sous-titres en ligne de commande" string typestr="SUBTITLE" optional
+
+option "circuit" c "Pour indiquer le circuit de calcul. minmaj=avec précalcul et postcalcul, minmaj2=avec erreur, minmaj3=avec erreur et postcalcul." values="minmaj", "chiffrement", "additionL1", "additionL2", "additionL3", "additionL4", "multiplicationL1", "multiplicationL1L2", "multiplicationL2", "size", "time", "minmaj2", "minmaj3", "demo", "ip", "scalar_product", "time2" enum default="minmaj" optional

+ 187 - 0
gengetopt/option.h

@@ -0,0 +1,187 @@
+/** @file option.h
+ *  @brief The header file for the command line option parser
+ *  generated by GNU Gengetopt version 2.22.6
+ *  http://www.gnu.org/software/gengetopt.
+ *  DO NOT modify this file, since it can be overwritten
+ *  @author GNU Gengetopt by Lorenzo Bettini */
+
+#ifndef OPTION_H
+#define OPTION_H
+
+/* If we use autoconf.  */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h> /* for FILE */
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifndef CMDLINE_PARSER_PACKAGE
+/** @brief the program name (used for printing errors) */
+#define CMDLINE_PARSER_PACKAGE "BGNFCF"
+#endif
+
+#ifndef CMDLINE_PARSER_PACKAGE_NAME
+/** @brief the complete program name (used for help and version) */
+#define CMDLINE_PARSER_PACKAGE_NAME "BGNFCF"
+#endif
+
+#ifndef CMDLINE_PARSER_VERSION
+/** @brief the program version */
+#define CMDLINE_PARSER_VERSION "(version 2)"
+#endif
+
+enum enum_circuit { circuit__NULL = -1, circuit_arg_minmaj = 0, circuit_arg_chiffrement, circuit_arg_additionL1, circuit_arg_additionL2, circuit_arg_additionL3, circuit_arg_additionL4, circuit_arg_multiplicationL1, circuit_arg_multiplicationL1L2, circuit_arg_multiplicationL2, circuit_arg_size, circuit_arg_time, circuit_arg_minmaj2, circuit_arg_minmaj3, circuit_arg_demo, circuit_arg_ip, circuit_arg_scalar_product, circuit_arg_time2 };
+
+/** @brief Where the command line options are stored */
+struct gengetopt_args_info
+{
+  const char *help_help; /**< @brief Print help and exit help description.  */
+  const char *version_help; /**< @brief Print version and exit help description.  */
+  char * subtitle_arg;	/**< @brief Pour indiquer les sous-titres en ligne de commande.  */
+  char * subtitle_orig;	/**< @brief Pour indiquer les sous-titres en ligne de commande original value given at command line.  */
+  const char *subtitle_help; /**< @brief Pour indiquer les sous-titres en ligne de commande help description.  */
+  enum enum_circuit circuit_arg;	/**< @brief Pour indiquer le circuit de calcul. minmaj=avec précalcul et postcalcul, minmaj2=avec erreur, minmaj3=avec erreur et postcalcul. (default='minmaj').  */
+  char * circuit_orig;	/**< @brief Pour indiquer le circuit de calcul. minmaj=avec précalcul et postcalcul, minmaj2=avec erreur, minmaj3=avec erreur et postcalcul. original value given at command line.  */
+  const char *circuit_help; /**< @brief Pour indiquer le circuit de calcul. minmaj=avec précalcul et postcalcul, minmaj2=avec erreur, minmaj3=avec erreur et postcalcul. help description.  */
+  
+  unsigned int help_given ;	/**< @brief Whether help was given.  */
+  unsigned int version_given ;	/**< @brief Whether version was given.  */
+  unsigned int subtitle_given ;	/**< @brief Whether subtitle was given.  */
+  unsigned int circuit_given ;	/**< @brief Whether circuit was given.  */
+
+  char **inputs ; /**< @brief unamed options (options without names) */
+  unsigned inputs_num ; /**< @brief unamed options number */
+} ;
+
+/** @brief The additional parameters to pass to parser functions */
+struct cmdline_parser_params
+{
+  int override; /**< @brief whether to override possibly already present options (default 0) */
+  int initialize; /**< @brief whether to initialize the option structure gengetopt_args_info (default 1) */
+  int check_required; /**< @brief whether to check that all required options were provided (default 1) */
+  int check_ambiguity; /**< @brief whether to check for options already specified in the option structure gengetopt_args_info (default 0) */
+  int print_errors; /**< @brief whether getopt_long should print an error message for a bad option (default 1) */
+} ;
+
+/** @brief the purpose string of the program */
+extern const char *gengetopt_args_info_purpose;
+/** @brief the usage string of the program */
+extern const char *gengetopt_args_info_usage;
+/** @brief the description string of the program */
+extern const char *gengetopt_args_info_description;
+/** @brief all the lines making the help output */
+extern const char *gengetopt_args_info_help[];
+
+/**
+ * The command line parser
+ * @param argc the number of command line options
+ * @param argv the command line options
+ * @param args_info the structure where option information will be stored
+ * @return 0 if everything went fine, NON 0 if an error took place
+ */
+int cmdline_parser (int argc, char **argv,
+  struct gengetopt_args_info *args_info);
+
+/**
+ * The command line parser (version with additional parameters - deprecated)
+ * @param argc the number of command line options
+ * @param argv the command line options
+ * @param args_info the structure where option information will be stored
+ * @param override whether to override possibly already present options
+ * @param initialize whether to initialize the option structure my_args_info
+ * @param check_required whether to check that all required options were provided
+ * @return 0 if everything went fine, NON 0 if an error took place
+ * @deprecated use cmdline_parser_ext() instead
+ */
+int cmdline_parser2 (int argc, char **argv,
+  struct gengetopt_args_info *args_info,
+  int override, int initialize, int check_required);
+
+/**
+ * The command line parser (version with additional parameters)
+ * @param argc the number of command line options
+ * @param argv the command line options
+ * @param args_info the structure where option information will be stored
+ * @param params additional parameters for the parser
+ * @return 0 if everything went fine, NON 0 if an error took place
+ */
+int cmdline_parser_ext (int argc, char **argv,
+  struct gengetopt_args_info *args_info,
+  struct cmdline_parser_params *params);
+
+/**
+ * Save the contents of the option struct into an already open FILE stream.
+ * @param outfile the stream where to dump options
+ * @param args_info the option struct to dump
+ * @return 0 if everything went fine, NON 0 if an error took place
+ */
+int cmdline_parser_dump(FILE *outfile,
+  struct gengetopt_args_info *args_info);
+
+/**
+ * Save the contents of the option struct into a (text) file.
+ * This file can be read by the config file parser (if generated by gengetopt)
+ * @param filename the file where to save
+ * @param args_info the option struct to save
+ * @return 0 if everything went fine, NON 0 if an error took place
+ */
+int cmdline_parser_file_save(const char *filename,
+  struct gengetopt_args_info *args_info);
+
+/**
+ * Print the help
+ */
+void cmdline_parser_print_help(void);
+/**
+ * Print the version
+ */
+void cmdline_parser_print_version(void);
+
+/**
+ * Initializes all the fields a cmdline_parser_params structure 
+ * to their default values
+ * @param params the structure to initialize
+ */
+void cmdline_parser_params_init(struct cmdline_parser_params *params);
+
+/**
+ * Allocates dynamically a cmdline_parser_params structure and initializes
+ * all its fields to their default values
+ * @return the created and initialized cmdline_parser_params structure
+ */
+struct cmdline_parser_params *cmdline_parser_params_create(void);
+
+/**
+ * Initializes the passed gengetopt_args_info structure's fields
+ * (also set default values for options that have a default)
+ * @param args_info the structure to initialize
+ */
+void cmdline_parser_init (struct gengetopt_args_info *args_info);
+/**
+ * Deallocates the string fields of the gengetopt_args_info structure
+ * (but does not deallocate the structure itself)
+ * @param args_info the structure to deallocate
+ */
+void cmdline_parser_free (struct gengetopt_args_info *args_info);
+
+/**
+ * Checks that all the required options were specified
+ * @param args_info the structure to check
+ * @param prog_name the name of the program that will be used to print
+ *   possible errors
+ * @return
+ */
+int cmdline_parser_required (struct gengetopt_args_info *args_info,
+  const char *prog_name);
+
+extern const char *cmdline_parser_circuit_values[];  /**< @brief Possible values for circuit. */
+
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+#endif /* OPTION_H */

+ 93 - 0
include/Bipoint.hpp

@@ -0,0 +1,93 @@
+#ifndef __BIPOINT_HPP
+
+#define __BIPOINT_HPP
+
+
+
+//#include "bgn.hpp"
+#include "mydouble.h" 
+extern "C" {
+#include "fpe.h"
+}
+#include "curvepoint_fp.h"
+extern "C" {
+#include "fp2e.h"	
+}
+#include "twistpoint_fp2.h"
+#include "zout.hpp"
+
+template <typename T>
+class Bipoint
+{}; //il faut une définition de la classe pour un type quelconque, on choisit de rien mettre dedans, on veut traiter seulement des cas particuliers
+
+template <>
+class Bipoint  <curvepoint_fp_t> //spécialisation pour curvepoint_fp_t
+{
+	
+	public:
+	//Bipoint() = default; 
+	Bipoint();
+	Bipoint(curvepoint_fp_t p1,curvepoint_fp_t p2);
+	//void bipoint_curvepoint_fp_init_set(Bipoint<curvepoint_fp_t> rop, const Bipoint<curvepoint_fp_t> op);
+	void set_point(curvepoint_fp_t, int numpoint); 
+	void print(int numpoint) const;	
+	curvepoint_fp_t& operator[](int n); //la valeur de retour doit être une référence.
+	Bipoint  <curvepoint_fp_t> operator+(Bipoint  <curvepoint_fp_t> b); 
+	bool operator==(Bipoint<curvepoint_fp_t> b);
+	void makeaffine();
+	void scalarmult_vartime(Bipoint<curvepoint_fp_t> op, scalar_t s);
+	void print() const;
+	
+	private:
+	curvepoint_fp_t point[2];	
+};
+
+template <>
+class Bipoint  <twistpoint_fp2_t> //spécialisation pour twistpoint_fp2_t
+{
+	
+	public:
+	Bipoint(); 
+	Bipoint(twistpoint_fp2_t p1,twistpoint_fp2_t p2);	
+	void set_point(twistpoint_fp2_t, int numpoint); 
+	void print(int numpoint) const;
+	twistpoint_fp2_t& operator[](int n);
+	Bipoint<twistpoint_fp2_t> operator+(Bipoint<twistpoint_fp2_t> b);
+	bool operator==(Bipoint<twistpoint_fp2_t> b);
+	void makeaffine();	
+	void scalarmult_vartime(Bipoint<twistpoint_fp2_t> op, scalar_t s);
+	void print() const;
+	
+	private:
+	twistpoint_fp2_t point[2];	
+};
+
+
+template <>
+class Bipoint  <fpe_t> //spécialisation pour fpe_t
+{
+	
+	public:
+	
+	void set_coordonnee(fpe_t, int numcoord); 
+	void get_coordonnee(int numcoord) const;
+	
+	private:
+	fpe_t coordonnee[2];	
+};
+
+
+template <>
+class Bipoint  <fp2e_t> //spécialisation pour fp2e_t
+{
+	
+	public:
+	
+	void set_coordonnee(fp2e_t, int numcoord); 
+	void get_coordonnee(int numcoord) const;
+	
+	private:
+	fp2e_t coordonnee[2];	
+};
+
+#endif

+ 38 - 0
include/BitChiffre.hpp

@@ -0,0 +1,38 @@
+#ifndef __BITCHIFFRE_HPP
+
+#define __BITCHIFFRE_HPP
+
+#include "typedef.h"
+#include "Bipoint.hpp"
+#include "zout.hpp"
+
+
+class BitChiffre
+{
+	public:
+	
+	BitChiffre() = default; //on remet le constructeur par défaut par défaut qui n'existe plus lorsqu'un constructeur est spécifié
+	BitChiffre(F2 a,Bipoint<curvepoint_fp_t> b ,Bipoint<twistpoint_fp2_t> c);
+	void set_bit_masque(F2); //prototype 
+	F2 get_bit_masque() const;
+	Type get_type() const;
+	Bipoint<curvepoint_fp_t> get_bipoint_curve() const;
+	Bipoint<twistpoint_fp2_t> get_bipoint_twist() const;
+	void set_bipoint_curve(Bipoint<curvepoint_fp_t> b);
+	void set_bipoint_twist(Bipoint<twistpoint_fp2_t> c);
+	void print() const;
+	void print_bit_masque() const;
+	void makeaffine();
+
+	
+	private:
+	
+	F2 bit_masque=0; //initialisation, attribut tjrs privé
+	Type type;		
+	Bipoint<curvepoint_fp_t> bipoint_curve;
+	Bipoint<twistpoint_fp2_t> bipoint_twist;
+};
+
+
+//typedef BitChiffre BitEvalL1; //BitEvalL1 est un alias utilisé pour dénommer le type de la somme homomorphique de chiffrés de niveau 1
+#endif

+ 34 - 0
include/BitEvalL1.hpp

@@ -0,0 +1,34 @@
+#ifndef __BITEVALL1_HPP
+
+#define __BITEVALL1_HPP
+
+#include "typedef.h"
+#include "Bipoint.hpp"
+#include "zout.hpp"
+
+
+template<typename T>
+class BitEvalL1
+{
+	public:
+	
+	BitEvalL1() = default; //on remet le constructeur par défaut par défaut qui n'existe plus lorsqu'un constructeur est spécifié
+	BitEvalL1(F2 a,Bipoint<T>);
+	void set_bit_masque(F2); //prototype 
+	F2 get_bit_masque() const;
+	Bipoint<T> get_bipoint() const;
+	void set_bipoint(Bipoint<T> b);
+	void print() const;
+	void print_bit_masque() const;
+	void makeaffine();
+
+	
+	private:
+	
+	F2 bit_masque=0; //initialisation, attribut tjrs privé
+	Bipoint<T> bipoint;
+};
+
+#include "BitEvalL1.tpp" 
+
+#endif

+ 54 - 0
include/BitEvalL1.tpp

@@ -0,0 +1,54 @@
+template<typename T>
+void BitEvalL1<T>::print() const
+{
+//	cout << "____________" << endl;
+	zout(bit_masque);
+	JUMP;
+	bipoint.print();
+//	cout << "____________" << endl;
+}
+
+template<typename T>
+void BitEvalL1<T>::print_bit_masque() const
+{
+	zout(bit_masque);
+}
+
+template<typename T>
+Bipoint<T> BitEvalL1<T>::get_bipoint() const
+{
+		return bipoint;
+}
+
+
+template<typename T>
+BitEvalL1<T>::BitEvalL1(F2 a,Bipoint<T> b)
+{
+	bit_masque = a;
+	bipoint = b;
+}
+
+template<typename T>
+void BitEvalL1<T>::set_bit_masque(F2 bit)
+//manipulateur pour modifer attribut si nécessaire, méthode définie en dehors de la classe
+{
+	bit_masque = bit;
+}
+
+template<typename T>
+F2 BitEvalL1<T>::get_bit_masque() const
+{
+	return bit_masque;
+}
+
+template<typename T>
+void BitEvalL1<T>::set_bipoint(Bipoint<T> b)
+{
+	bipoint = b;
+}
+
+template<typename T>
+void BitEvalL1<T>::makeaffine()
+{
+	bipoint.makeaffine();
+}

+ 28 - 0
include/BitEvalL2.hpp

@@ -0,0 +1,28 @@
+#ifndef __BITEVALL2_HPP
+
+#define __BITEVALL2_HPP
+
+#include "typedef.h"
+#include "Quadripoint.hpp"
+#include "zout.hpp"
+
+class BitEvalL2
+{
+	public:
+	
+	BitEvalL2() = default; //on remet le constructeur par défaut par défaut qui n'existe plus lorsqu'un constructeur est spécifié
+	void set_bit_masque(F2); //prototype 
+	F2 get_bit_masque() const;
+	Quadripoint get_quadripoint() const;
+	void set_quadripoint(Quadripoint b);
+	void print() const;
+	void print_bit_masque() const;
+
+	
+	private:
+	
+	F2 bit_masque=0; //initialisation, attribut tjrs privé
+	Quadripoint quadripoint;
+};
+
+#endif

+ 43 - 0
include/BitEvalL3.hpp

@@ -0,0 +1,43 @@
+#ifndef __BITEVALL3_HPP
+
+#define __BITEVALL3_HPP
+
+#include "typedef.h"
+#include "Bipoint.hpp"
+#include "Quadripoint.hpp"
+#include "zout.hpp"
+#include <vector>
+
+// si le circuit utilise des additions de niveau 3, on utilise BitEvalL3 et on utilise pas BitEvalL3
+
+class BitEvalL3
+{
+	public:
+	
+	BitEvalL3() = default; //on remet le constructeur par défaut par défaut qui n'existe plus lorsqu'un constructeur est spécifié
+
+	Quadripoint get_alpha() const;	
+	//Type get_type() const;
+	vector<Bipoint<curvepoint_fp_t>> get_vec_bipoints() const;
+	//Bipoint<twistpoint_fp2_t> get_bipoint_twist() const;
+	vector<Quadripoint> get_vec_quadripoints() const;
+
+	void set_alpha(Quadripoint a);
+	void set_vec_bipoints(vector<Bipoint<curvepoint_fp_t>> b);
+	void set_bipoint_curve(Bipoint<curvepoint_fp_t> b);	
+	//void set_bipoint_twist(Bipoint<twistpoint_fp2_t> c);
+	void set_vec_quadripoints(vector<Quadripoint> d); //utilisé dans additionL3
+	void set_quadripoint(Quadripoint d); //utilisé dans multiplicationL1L2
+	void print() const;
+
+	private:
+	
+	//Type type;	
+	//Bipoint<twistpoint_fp2_t> bipoint_twist;
+	Quadripoint alpha;
+	vector<Bipoint<curvepoint_fp_t>> vec_bipoints;
+	vector<Quadripoint> vec_quadripoints;
+	//int nb_additions;
+};
+
+#endif

+ 32 - 0
include/BitEvalL4.hpp

@@ -0,0 +1,32 @@
+#ifndef __BITEVALL4_HPP
+
+#define __BITEVALL4_HPP
+
+#include "typedef.h"
+#include "Bipoint.hpp"
+#include "Quadripoint.hpp"
+#include "zout.hpp"
+#include <vector>
+
+// si le circuit utilise des additions de niveau 4, on utilise BitEvalL4 et on utilise pas BitEvalL4
+
+class BitEvalL4
+{
+	public:
+	
+	BitEvalL4() = default; //on remet le constructeur par défaut par défaut qui n'existe plus lorsqu'un constructeur est spécifié
+	Quadripoint get_alpha() const;	
+	vector<Quadripoint> get_vec_quadripoints(int n) const;
+
+	void set_alpha(Quadripoint a);
+	void set_vec_quadripoints(vector<Quadripoint> d, int n); //utilisé dans additionL4
+	void set_quadripoint(Quadripoint d, int n); //utilisé dans multiplicationL2
+	
+	private:
+	
+
+	Quadripoint alpha;
+	vector<Quadripoint> vec_quadripoints[2];
+};
+
+#endif

+ 43 - 0
include/Fp.hpp

@@ -0,0 +1,43 @@
+#ifndef __FP_HPP
+
+#define __FP_HPP
+
+
+//#include "bgn.hpp" 
+
+#include "mydouble.h" 
+extern "C" {
+#include "fpe.h"
+#ifdef NEW_PARAMETERS
+#include "scalar_512.h"
+#else
+#include "scalar.h"
+#endif
+}
+//#include "fpe2scalar.hpp"
+
+
+
+class Fp
+{
+	friend class PrivateKey; //nécessaire pour fournir à PrivateKey l'accès aux membres privés de Fp.
+	friend class PublicKey;
+	
+	public:
+	
+	void set(fpe_t fpe); 
+	void set_random();
+	void set_ad_minus_bc(Fp b,Fp c,Fp d);
+	const scalar_t& scalar() const; //problem declared as function returning an array --> solution passage en réference de la valeur de retour
+	void print_fpe() const;	
+	Fp operator*(Fp b); // there is already one implicit parameter: the object for which the method is invoked! 
+	Fp operator-();
+	
+	private:
+	
+	scalar_t scalar_rep={};
+	fpe_t fpe_rep;
+	
+};
+
+#endif

+ 34 - 0
include/PrivateKey.hpp

@@ -0,0 +1,34 @@
+#ifndef __PRIVATEKEY_HPP
+
+#define __PRIVATEKEY_HPP
+#include "Bipoint.hpp"
+#include "Fp.hpp" 
+#include "zout.hpp"
+#include "Quadripoint.hpp"
+#include "fp12e.h"
+
+extern const curvepoint_fp_t bn_curvegen;
+
+class  PrivateKey
+{
+	
+	public:
+	PrivateKey() = default; 
+	PrivateKey(Fp a, Fp b, Fp c, Fp d, Fp e, Fp f, Fp g, Fp h);
+	void set(Fp a, Fp b, Fp c, Fp d, Fp e, Fp f, Fp g, Fp h);
+	Fp get(string name);
+	void print() const;
+	Bipoint<curvepoint_fp_t>  pi_1(Bipoint<curvepoint_fp_t>);
+	Bipoint<twistpoint_fp2_t> pi_2(Bipoint<twistpoint_fp2_t>);
+	Quadripoint pi_T(Quadripoint);
+	
+	private :
+	Fp i1, j1, k1, l1, i2, j2, k2, l2;
+	
+};
+
+//to remove after debug REMOVE
+//extern PrivateKey private_key;
+
+#endif
+

+ 30 - 0
include/PublicKey.hpp

@@ -0,0 +1,30 @@
+#ifndef __PUBLICKEY_HPP
+
+#define __PUBLICKEY_HPP
+
+#include "Bipoint.hpp"
+#include "zout.hpp"
+#include <type_traits>
+
+class  PublicKey
+{
+	public:
+	PublicKey() = default;
+	PublicKey(Bipoint<curvepoint_fp_t> a,Bipoint<twistpoint_fp2_t> b, Bipoint<curvepoint_fp_t> c,Bipoint<twistpoint_fp2_t> d);
+	void set(Bipoint<curvepoint_fp_t> a,Bipoint<twistpoint_fp2_t> b, Bipoint<curvepoint_fp_t> c,Bipoint<twistpoint_fp2_t> d);
+	void print() const;
+	Bipoint<curvepoint_fp_t> get_bipoint_curvegen() const;
+	Bipoint<twistpoint_fp2_t> get_bipoint_twistgen() const;	
+	Bipoint<curvepoint_fp_t> get_bipoint_curve_groupelt() const;
+	Bipoint<twistpoint_fp2_t> get_bipoint_twist_groupelt() const;	
+		
+	private :
+	Bipoint<curvepoint_fp_t> bipoint_curvegen; //subgroup_gen (i1g,j1,g)
+	Bipoint<twistpoint_fp2_t> bipoint_twistgen; //subgroup_gen (i2h, j2h)
+	Bipoint<curvepoint_fp_t> bipoint_curve_groupelt; // u
+	Bipoint<twistpoint_fp2_t> bipoint_twist_groupelt; // v
+};
+
+extern PublicKey public_key;
+
+#endif

+ 42 - 0
include/Quadripoint.hpp

@@ -0,0 +1,42 @@
+#ifndef __QUADRIPOINT_HPP
+
+#define __QUADRIPOINT_HPP
+
+
+
+//#include "bgn.hpp"
+#include "mydouble.h" 
+extern "C" {
+#include "fpe.h"
+}
+extern "C" {
+#include "fp2e.h"	
+}
+extern "C" {
+#include "fp6e.h"	
+}
+extern "C" {
+#include "fp12e.h"	
+}
+#include "zout.hpp"
+
+class Quadripoint
+{
+	public:
+	//Quadripoint() = default; 
+	Quadripoint();
+	Quadripoint(fp12e_t p1,fp12e_t p2, fp12e_t p3,fp12e_t p4);
+	void set(fp12e_t, int numpoint); 
+	void print(int numpoint) const;	
+	fp12e_t& operator[](int n); //la valeur de retour doit être une référence.
+	bool operator==(Quadripoint b);
+	Quadripoint operator*(Quadripoint b); 
+	Quadripoint square();
+	Quadripoint pow_vartime(const scalar_t exp);
+
+	void print() const;
+	
+	private:
+	fp12e_t point[4];	}; 
+
+#endif

+ 16 - 0
include/additionL1.hpp

@@ -0,0 +1,16 @@
+#ifndef __ADDITIONL1_HPP
+
+#define __ADDITIONL1_HPP
+
+#include "BitChiffre.hpp"
+#include "BitEvalL1.hpp"
+#include "keygen.hpp"
+
+//template <typename T>
+//BitEvalL1<T> additionL1 (BitEvalL1<T> a, BitEvalL1<T> b, PublicKey public_key, Type type);
+BitEvalL1<curvepoint_fp_t> additionL1 (BitEvalL1<curvepoint_fp_t> a, BitEvalL1<curvepoint_fp_t> b, PublicKey public_key);
+BitEvalL1<twistpoint_fp2_t> additionL1 (BitEvalL1<twistpoint_fp2_t> a, BitEvalL1<twistpoint_fp2_t> b, PublicKey public_key);
+BitChiffre additionL1 (BitChiffre a, BitChiffre b, PublicKey public_key);
+
+
+#endif /* __ADDITIONL1_HPP */

+ 12 - 0
include/additionL2.hpp

@@ -0,0 +1,12 @@
+#ifndef __ADDITIONL2_HPP
+
+#define __ADDITIONL2_HPP
+
+#include "BitEvalL2.hpp"
+#include "keygen.hpp"
+#include "pairing.hpp" 
+
+BitEvalL2 additionL2 (BitEvalL2 a, BitEvalL2 b, PublicKey public_key);
+
+
+#endif /* __ADDITIONL2_HPP */

+ 12 - 0
include/additionL3.hpp

@@ -0,0 +1,12 @@
+#ifndef __ADDITIONL3_HPP
+
+#define __ADDITIONL3_HPP
+
+#include "BitEvalL3.hpp"
+#include "keygen.hpp"
+#include "pairing.hpp" 
+
+BitEvalL3 additionL3 (BitEvalL3 a, BitEvalL3 b, PublicKey public_key);
+
+
+#endif /* __ADDITIONL3_HPP */

+ 12 - 0
include/additionL4.hpp

@@ -0,0 +1,12 @@
+#ifndef __ADDITIONL4_HPP
+
+#define __ADDITIONL4_HPP
+
+#include "BitEvalL4.hpp"
+#include "keygen.hpp"
+#include "pairing.hpp" 
+
+BitEvalL4 additionL4 (BitEvalL4 a, BitEvalL4 b, PublicKey public_key);
+
+
+#endif /* __ADDITIONL4_HPP */

+ 95 - 0
include/bgn.hpp

@@ -0,0 +1,95 @@
+#ifndef __BGN_HPP
+
+#define __BGN_HPP
+
+
+
+#include <bitset>
+#include <cinttypes>
+#include <cmath>
+#include <typeinfo>
+#include <cxxabi.h>
+#include <stdlib.h>
+#include <time.h>
+#include <chrono>
+#include <limits.h>		/* for CHAR_BIT */
+#include "gengetopt.h"
+#include "typedef.h"
+#include "zout.hpp"
+#include "size.hpp"
+#include "mydouble.h" 
+extern "C" {
+#include "fpe.h"
+}
+#include "quadruplet.hpp"
+#include "curvepoint_fp.h"
+extern "C" {
+#include "fp2e.h"	
+}
+#include "twistpoint_fp2.h"
+#include "Bipoint.hpp"
+#include "BitChiffre.hpp"
+#include "fpe2scalar.hpp"
+#include "chiffrement.hpp"
+#include "circuit_chiffrement.hpp" 
+#include "Fp.hpp"
+#include "keygen.hpp"
+#include "circuit_minmaj.hpp" 
+#include "circuit_additionL1.hpp" 
+#include "circuit_additionL2.hpp" 
+#include "circuit_multiplicationL1.hpp" 
+#include "circuit_multiplicationL1L2.hpp" 
+#include "circuit_additionL3.hpp"
+#include "circuit_multiplicationL2.hpp"
+#include "circuit_additionL4.hpp"
+#include "circuit_size.hpp" 
+#include "circuit_minmaj3.hpp" 
+#include "circuit_minmaj2.hpp" 
+#include "circuit_time.hpp" 
+#include "circuit_demo.hpp"
+#include "circuit_ip.hpp"
+#include "circuit_scalar_product.hpp"
+#include "circuit_time2.hpp" 
+
+
+// sans mydouble.h la cible « ../obj/bgn_check.o » a échouée erreur: conflicting declaration of C function ‘CheckDouble round(const CheckDouble&)’  --> ajouter mydouble.h devant le extern
+// sans extern "C" la cible « bgn » a échouée référence indéfinie vers « fpe_iszero(fpe_struct const*) »
+
+
+// macro pour gengetopt
+#define DEV 0
+#define USR 1
+
+#define MINMAJ 0
+#define CHIFFREMENT 1
+#define ADDITIONL1 2
+#define ADDITIONL2 3
+#define ADDITIONL3 4
+#define ADDITIONL4 5
+#define MULTIPLICATIONL1 6
+#define MULTIPLICATIONL1L2 7
+#define MULTIPLICATIONL2 8
+#define SIZE 9
+#define TIME 10
+#define MINMAJ2 11
+#define MINMAJ3 12
+#define DEMO 13
+#define IP 14 
+#define SCALAR_PRODUCT 15 
+#define TIME2 16
+
+	// BGN-Freeman-CF
+	// en théorie pk:=(p, G, H, G_T, e, g, h, (g^a1,g^b1), (h^a2,h^b2))
+	// en pratique pk:=(p, g, h, a1, b1, a2, b2)	
+	// en sécurité 128 : p et r sont des entiers de 256 bits
+	// on peut fixer p,r et t avec le x0 utilisé par Schwabe
+	// E(Fp)={(x,y) :  y^2 = x^3 + 5, x,y in Fp}
+	// g est un générateur de E(Fp) d'ordre p+1-t 
+	// E[r]={(x,y): y^2 = x^3 + 5, x,y in Fp^12, r*(x,y)=0}
+	// Ker(Pi-p)={(x,y): (x^p,y^p)=p*(x,y), x,y in Fp^12}
+	// h est un générateur de E[r] inter Ker (Pi -p)
+	// a1d1-b1c1 = a2d2-b2c2 = 1 choisis au hasard dans Fp
+	// générer trois éléments non tous nuls (not all zero) au hasard détermine un unique quatrième élément
+
+
+#endif /* __BGN_HPP */

+ 27 - 0
include/chiffrement.hpp

@@ -0,0 +1,27 @@
+#ifndef __CHIFFREMENT_HPP
+
+#define __CHIFFREMENT_HPP
+
+
+#include "BitChiffre.hpp"
+#include "BitEvalL1.hpp"
+#include "keygen.hpp"
+#include "representation.hpp"
+
+//template <typename T>
+//void chiffrement(BitEvalL1<T>& bit_chiffre,F2 bit_clair, PublicKey public_key, Type type);
+void chiffrement(BitEvalL1<curvepoint_fp_t>& bit_chiffre,F2 bit_clair, PublicKey public_key);
+void chiffrement(BitEvalL1<twistpoint_fp2_t>& bit_chiffre,F2 bit_clair, PublicKey public_key);
+
+void chiffrement(BitChiffre& bit_chiffre,F2 bit_clair, PublicKey public_key, Type type);
+void chiffrement(Bipoint<curvepoint_fp_t>& ciphertext,F2 bit_clair, PublicKey public_key);
+void chiffrement(Bipoint<twistpoint_fp2_t>& ciphertext,F2 bit_clair, PublicKey public_key);
+
+extern const scalar_t bn_n;
+extern const curvepoint_fp_t bn_curvegen;	
+extern const twistpoint_fp2_t bn_twistgen;
+	
+	
+#endif /* __CHIFFREMENT_HPP */
+
+

+ 22 - 0
include/circuit_additionL1.hpp

@@ -0,0 +1,22 @@
+#ifndef __CIRCUIT_ADDITIONL1_HPP
+
+#define __CIRCUIT_ADDITIONL1_HPP
+
+#include <limits.h>		/* for CHAR_BIT */
+#include "typedef.h"
+#include "BitChiffre.hpp"
+#include "BitEvalL1.hpp"
+#include "chiffrement.hpp" 
+#include "keygen.hpp"
+#include "dechiffrement.hpp" 
+#include "additionL1.hpp"
+#include "multiplicationL1.hpp"
+#include "multiplicationL1L2.hpp"
+#include "additionL2.hpp"
+#include "additionL3.hpp"
+#include "precomputation.hpp"
+#include "postcomputation.hpp"
+
+void circuit_additionL1(PublicKey public_key, PrivateKey private_key);
+
+#endif /* __CIRCUIT_ADDITIONL1_HPP */

+ 24 - 0
include/circuit_additionL2.hpp

@@ -0,0 +1,24 @@
+#ifndef __CIRCUIT_ADDITIONL2_HPP
+
+#define __CIRCUIT_ADDITIONL2_HPP
+
+#include <limits.h>		/* for CHAR_BIT */
+#include "typedef.h"
+#include "BitChiffre.hpp"
+#include "BitEvalL1.hpp"
+#include "BitEvalL2.hpp"
+#include "chiffrement.hpp" 
+#include "keygen.hpp"
+#include "dechiffrement.hpp" 
+#include "dechiffrementL2.hpp" 
+#include "additionL1.hpp"
+#include "multiplicationL1.hpp"
+#include "multiplicationL1L2.hpp"
+#include "additionL2.hpp"
+#include "additionL3.hpp"
+#include "precomputation.hpp"
+#include "postcomputation.hpp"
+
+void circuit_additionL2(PublicKey public_key, PrivateKey private_key);
+
+#endif /* __CIRCUIT_ADDITIONL2_HPP */

+ 25 - 0
include/circuit_additionL3.hpp

@@ -0,0 +1,25 @@
+#ifndef __CIRCUIT_ADDITIONL3_HPP
+
+#define __CIRCUIT_ADDITIONL3_HPP
+
+#include <limits.h>		/* for CHAR_BIT */
+#include "typedef.h"
+#include "BitChiffre.hpp"
+#include "BitEvalL1.hpp"
+#include "BitEvalL2.hpp"
+#include "chiffrement.hpp" 
+#include "keygen.hpp"
+#include "dechiffrement.hpp" 
+#include "dechiffrementL2.hpp" 
+#include "dechiffrementL3.hpp"
+#include "additionL1.hpp"
+#include "multiplicationL1.hpp"
+#include "multiplicationL1L2.hpp"
+#include "additionL2.hpp"
+#include "additionL3.hpp"
+#include "precomputation.hpp"
+#include "postcomputation.hpp"
+
+void circuit_additionL3(PublicKey public_key, PrivateKey private_key);
+
+#endif /* __CIRCUIT_ADDITIONL3_HPP */

+ 25 - 0
include/circuit_additionL4.hpp

@@ -0,0 +1,25 @@
+#ifndef __CIRCUIT_ADDITIONL4_HPP
+
+#define __CIRCUIT_ADDITIONL4_HPP
+
+#include <limits.h>		/* for CHAR_BIT */
+#include "typedef.h"
+#include "BitChiffre.hpp"
+#include "BitEvalL1.hpp"
+#include "BitEvalL2.hpp"
+#include "chiffrement.hpp" 
+#include "keygen.hpp"
+#include "dechiffrement.hpp" 
+#include "dechiffrementL2.hpp" 
+#include "dechiffrementL3.hpp"
+#include "additionL1.hpp"
+#include "multiplicationL1.hpp"
+#include "multiplicationL1L2.hpp"
+#include "additionL2.hpp"
+#include "additionL4.hpp"
+#include "precomputation.hpp"
+#include "postcomputation.hpp"
+
+void circuit_additionL4(PublicKey public_key, PrivateKey private_key);
+
+#endif /* __CIRCUIT_ADDITIONL4_HPP */

+ 21 - 0
include/circuit_chiffrement.hpp

@@ -0,0 +1,21 @@
+#ifndef __CIRCUIT_CHIFFREMENT_HPP
+
+#define __CIRCUIT_CHIFFREMENT_HPP
+
+#include <limits.h>		/* for CHAR_BIT */
+#include "typedef.h"
+#include "BitChiffre.hpp"
+#include "BitEvalL1.hpp"
+#include "chiffrement.hpp" 
+#include "keygen.hpp"
+#include "dechiffrement.hpp" 
+#include "additionL1.hpp"
+#include "multiplicationL1.hpp"
+#include "multiplicationL1L2.hpp"
+#include "additionL2.hpp"
+#include "additionL3.hpp"
+
+
+void circuit_chiffrement(char lettre, PublicKey public_key, PrivateKey private_key);
+
+#endif /* __CIRCUIT_CHIFFREMENT_HPP */

+ 24 - 0
include/circuit_demo.hpp

@@ -0,0 +1,24 @@
+#ifndef __CIRCUIT_DEMO_HPP
+
+#define __CIRCUIT_DEMO_HPP
+
+#include <limits.h>		/* for CHAR_BIT */
+#include "typedef.h"
+#include "BitChiffre.hpp"
+#include "BitEvalL1.hpp"
+#include "chiffrement.hpp" 
+#include "keygen.hpp"
+#include "dechiffrement.hpp" 
+#include "additionL1.hpp"
+#include "multiplicationL1.hpp"
+#include "multiplicationL1L2.hpp"
+#include "additionL2.hpp"
+#include "additionL3.hpp"
+#include "precomputation.hpp"
+#include "postcomputation.hpp"
+
+
+
+void circuit_demo(char& rop, char symbol, PublicKey public_key, PrivateKey private_key);
+
+#endif /* __CIRCUIT_DEMO_HPP */

+ 11 - 0
include/circuit_ip.hpp

@@ -0,0 +1,11 @@
+#ifndef __CIRCUIT_IP_HPP
+
+#define __CIRCUIT_IP_HPP
+
+#include "keygen.hpp"
+#include <string>
+#include <vector>
+
+void circuit_ip(PublicKey public_key, PrivateKey private_key);
+
+#endif /* __CIRCUIT_IP_HPP */

+ 24 - 0
include/circuit_minmaj.hpp

@@ -0,0 +1,24 @@
+#ifndef __CIRCUIT_MINMAJ_HPP
+
+#define __CIRCUIT_MINMAJ_HPP
+
+#include <limits.h>		/* for CHAR_BIT */
+#include "typedef.h"
+#include "BitChiffre.hpp"
+#include "BitEvalL1.hpp"
+#include "chiffrement.hpp" 
+#include "keygen.hpp"
+#include "dechiffrement.hpp" 
+#include "additionL1.hpp"
+#include "multiplicationL1.hpp"
+#include "multiplicationL1L2.hpp"
+#include "additionL2.hpp"
+#include "additionL3.hpp"
+#include "precomputation.hpp"
+#include "postcomputation.hpp"
+
+
+
+void circuit_minmaj(char& rop, char symbol, PublicKey public_key, PrivateKey private_key);
+
+#endif /* __CIRCUIT_MINMAJ_HPP */

+ 24 - 0
include/circuit_minmaj2.hpp

@@ -0,0 +1,24 @@
+#ifndef __CIRCUIT_MINMAJ2_HPP
+
+#define __CIRCUIT_MINMAJ2_HPP
+
+#include <limits.h>		/* for CHAR_BIT */
+#include "typedef.h"
+#include "BitChiffre.hpp"
+#include "BitEvalL1.hpp"
+#include "chiffrement.hpp" 
+#include "keygen.hpp"
+#include "dechiffrement.hpp" 
+#include "additionL1.hpp"
+#include "multiplicationL1.hpp"
+#include "multiplicationL1L2.hpp"
+#include "additionL2.hpp"
+#include "additionL3.hpp"
+#include "precomputation.hpp"
+#include "postcomputation.hpp"
+
+
+
+void circuit_minmaj2(char& rop, char symbol, PublicKey public_key, PrivateKey private_key);
+
+#endif /* __CIRCUIT_MINMAJ2_HPP */

+ 24 - 0
include/circuit_minmaj3.hpp

@@ -0,0 +1,24 @@
+#ifndef __CIRCUIT_MINMAJ3_HPP
+
+#define __CIRCUIT_MINMAJ3_HPP
+
+#include <limits.h>		/* for CHAR_BIT */
+#include "typedef.h"
+#include "BitChiffre.hpp"
+#include "BitEvalL1.hpp"
+#include "chiffrement.hpp" 
+#include "keygen.hpp"
+#include "dechiffrement.hpp" 
+#include "additionL1.hpp"
+#include "multiplicationL1.hpp"
+#include "multiplicationL1L2.hpp"
+#include "additionL2.hpp"
+#include "additionL3.hpp"
+#include "precomputation.hpp"
+#include "postcomputation.hpp"
+
+
+
+void circuit_minmaj3(char& rop, char symbol, PublicKey public_key, PrivateKey private_key);
+
+#endif /* __CIRCUIT_MINMAJ3_HPP */

+ 24 - 0
include/circuit_multiplicationL1.hpp

@@ -0,0 +1,24 @@
+#ifndef __CIRCUIT_MULTIPLICATIONL1_HPP
+
+#define __CIRCUIT_MULTIPLICATIONL1_HPP
+
+#include <limits.h>		/* for CHAR_BIT */
+#include "typedef.h"
+#include "BitChiffre.hpp"
+#include "BitEvalL1.hpp"
+#include "BitEvalL2.hpp"
+#include "chiffrement.hpp" 
+#include "keygen.hpp"
+#include "dechiffrement.hpp" 
+#include "dechiffrementL2.hpp" 
+#include "additionL1.hpp"
+#include "multiplicationL1.hpp"
+#include "multiplicationL1L2.hpp"
+#include "additionL2.hpp"
+#include "additionL3.hpp"
+#include "precomputation.hpp"
+#include "postcomputation.hpp"
+
+void circuit_multiplicationL1(PublicKey public_key, PrivateKey private_key);
+
+#endif /* __CIRCUIT_MULTIPLICATIONL1_HPP */

+ 25 - 0
include/circuit_multiplicationL1L2.hpp

@@ -0,0 +1,25 @@
+#ifndef __CIRCUIT_MULTIPLICATIONL1L2_HPP
+
+#define __CIRCUIT_MULTIPLICATIONL1L2_HPP
+
+#include <limits.h>		/* for CHAR_BIT */
+#include "typedef.h"
+#include "BitChiffre.hpp"
+#include "BitEvalL1.hpp"
+#include "BitEvalL2.hpp"
+#include "chiffrement.hpp" 
+#include "keygen.hpp"
+#include "dechiffrement.hpp" 
+#include "dechiffrementL2.hpp" 
+#include "dechiffrementL3.hpp" 
+#include "additionL1.hpp"
+#include "multiplicationL1.hpp"
+#include "multiplicationL1L2.hpp"
+#include "additionL2.hpp"
+#include "additionL3.hpp"
+#include "precomputation.hpp"
+#include "postcomputation.hpp"
+
+void circuit_multiplicationL1L2(PublicKey public_key, PrivateKey private_key);
+
+#endif /* __CIRCUIT_MULTIPLICATIONL1L2_HPP */

Some files were not shown because too many files changed in this diff