Browse Source

freeing memory

avadapal 1 year ago
parent
commit
f7bdf834c3

+ 441 - 437
2p-preprocessing/preprocessing.cpp

@@ -56,7 +56,9 @@ size_t bits_per_leaf = std::is_same<leaf_t, bool>::value ? 1 : sizeof(leaf_t) *
 bool is_packed = (sizeof(leaf_t) < sizeof(node_t));
 size_t leaves_per_node = is_packed ? sizeof(node_t) * CHAR_BIT / bits_per_leaf : 1;
 
-size_t input_bits(const size_t nitems) { return std::ceil(std::log2(nitems)); }
+size_t input_bits(const size_t nitems) {
+    return std::ceil(std::log2(nitems));
+}
 
 leaf_t val;
 
@@ -65,513 +67,515 @@ using namespace dpf;
 #include "mpc.h"
 void generate_random_targets(uint8_t **target_share_read, size_t n_threads, bool party, size_t expo)
 {
-	for (size_t i = 0; i < n_threads; i++)
-	{
-		target_share_read[i] = new uint8_t[64];
-	}
-
-	for (size_t j = 0; j < 64; ++j)
-	{
-		for (size_t i = 0; i < n_threads; ++i)
-		{
-			uint8_t random_value;
-			arc4random_buf(&random_value, sizeof(uint8_t));
-			target_share_read[i][j] = random_value; // rand();
-		}
-	}
+    for (size_t i = 0; i < n_threads; i++)
+    {
+        target_share_read[i] = new uint8_t[64];
+    }
+
+    for (size_t j = 0; j < 64; ++j)
+    {
+        for (size_t i = 0; i < n_threads; ++i)
+        {
+            uint8_t random_value;
+            arc4random_buf(&random_value, sizeof(uint8_t));
+            target_share_read[i][j] = random_value; // rand();
+        }
+    }
 }
 
 void compute_CW(bool party, tcp::socket &sout, __m128i L, __m128i R, uint8_t bit, __m128i &CW)
 {
 
-	// struct cw_construction
-	//{
-	__m128i rand_b, gamma_b;
-	uint8_t bit_b;
-	//};
-
-	__m128i *X, *Y;
-
-	if (party)
-	{
-		std::string qfile = std::string("./gamma1");
-		int qfd = open(qfile.c_str(), O_RDWR);
-		X = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),
-							PROT_READ, MAP_PRIVATE, qfd, 0);
-
-		qfile = std::string("./x1");
-		qfd = open(qfile.c_str(), O_RDWR);
-		Y = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),
-							PROT_READ, MAP_PRIVATE, qfd, 0);
-	}
-
-	if (!party)
-	{
-		std::string qfile = std::string("./gamma0");
-		int qfd = open(qfile.c_str(), O_RDWR);
-		X = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),
-							PROT_READ, MAP_PRIVATE, qfd, 0);
-
-		qfile = std::string("./x0");
-		qfd = open(qfile.c_str(), O_RDWR);
-		Y = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),
-							PROT_READ, MAP_PRIVATE, qfd, 0);
-	}
-
-	// cw_construction computecw;
-	//	read(sin, boost::asio::buffer(&computecw, sizeof(computecw)));
-
-	// computecw.rand_b;
-	//__m128i gamma_b = computecw.gamma_b;
-
-	if (party)
-	{
-		rand_b = Y[0];	//_mm_set_epi32(0x6fef9434, 0x6768121e, 0x20942286, 0x1b59f7a7);
-		gamma_b = X[0]; // _mm_set_epi32(0x6a499109 , 0x803067dd , 0xd1e2281b , 0xe71b6262);
-		bit_b = 1;		// computecw.bit_b;
-	}
-	else
-	{
-		rand_b = Y[0];	// _mm_set_epi32(0xb29747df, 0xf7300f6d, 0x9476d971, 0xd5f75d98);
-		gamma_b = X[0]; // _mm_set_epi32(0xb73142e2 , 0x10687aae , 0x06500d3ec , 0x29b5c85d);
-		bit_b = 1;		// computecw.bit_b;
-	}
-
-	uint8_t blinded_bit, blinded_bit_read;
-	blinded_bit = bit ^ bit_b;
-
-	__m128i blinded_L = L ^ R ^ rand_b;
-	__m128i blinded_L_read;
-
-	struct BlindsCW
-	{
-		__m128i blinded_message;
-		uint8_t blinded_bit;
-	};
-
-	BlindsCW blinds_sent, blinds_recv;
-
-	blinds_sent.blinded_bit = blinded_bit;
-	blinds_sent.blinded_message = blinded_L;
-
-	boost::asio::write(sout, boost::asio::buffer(&blinds_sent, sizeof(blinds_sent)));
-	boost::asio::read(sout, boost::asio::buffer(&blinds_recv, sizeof(blinds_recv)));
- communication_cost += sizeof(blinds_recv);
-	
- blinded_bit_read = blinds_recv.blinded_bit;
-	blinded_L_read = blinds_recv.blinded_message;
-
-	__m128i out_ = R ^ gamma_b; //_mm_setzero_si128;
-
-	if (bit)
-	{
-		out_ ^= (L ^ R ^ blinded_L_read);
-	}
-	if (blinded_bit_read)
-	{
-		out_ ^= rand_b;
-	}
-
-	__m128i out_reconstruction;
-	boost::asio::write(sout, boost::asio::buffer(&out_, sizeof(out_)));
-	boost::asio::read(sout, boost::asio::buffer(&out_reconstruction, sizeof(out_reconstruction)));
- communication_cost += sizeof(out_reconstruction);
-	
- out_reconstruction = out_ ^ out_reconstruction;
-
-	CW = out_reconstruction;
-
- #ifdef DEBUG
-  uint8_t bit_reconstruction;
-  boost::asio::write(sout, boost::asio::buffer(&bit, sizeof(bit)));
-  boost::asio::read(sout, boost::asio::buffer(&bit_reconstruction, sizeof(bit_reconstruction)));
-  bit_reconstruction = bit ^ bit_reconstruction;
-
-  __m128i L_reconstruction;
-  boost::asio::write(sout, boost::asio::buffer(&L, sizeof(L)));
-  boost::asio::read(sout, boost::asio::buffer(&L_reconstruction, sizeof(L_reconstruction)));
-  L_reconstruction = L ^ L_reconstruction;
-
-  __m128i R_reconstruction;
-  boost::asio::write(sout, boost::asio::buffer(&R, sizeof(R)));
-  boost::asio::read(sout, boost::asio::buffer(&R_reconstruction, sizeof(R_reconstruction)));
-  R_reconstruction = R ^ R_reconstruction;
-
-  __m128i CW_debug;
-
-  if (bit_reconstruction != 0)
-  {
-   CW_debug = L_reconstruction;
-  }
-  else
-  {
-   CW_debug = R_reconstruction;
-  }
-
-  assert(CW_debug[0] == CW[0]);
-  assert(CW_debug[1] == CW[1]);
- #endif
+    // struct cw_construction
+    //{
+    __m128i rand_b, gamma_b;
+    uint8_t bit_b;
+    //};
+
+    __m128i *X, *Y;
+
+    if (party)
+    {
+      std::string qfile = std::string("./gamma1");
+      int qfd = open(qfile.c_str(), O_RDWR);
+      X = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),
+                          PROT_READ, MAP_PRIVATE, qfd, 0);
+
+      qfile = std::string("./x1");
+      qfd = open(qfile.c_str(), O_RDWR);
+      Y = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),
+                          PROT_READ, MAP_PRIVATE, qfd, 0);
+      close(qfd);
+      munmap(X, 8 * sizeof(__m128i));
+      munmap(Y, 8 * sizeof(__m128i));
+    }
+
+    if (!party)
+    {
+      std::string qfile = std::string("./gamma0");
+      int qfd = open(qfile.c_str(), O_RDWR);
+      X = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),
+                          PROT_READ, MAP_PRIVATE, qfd, 0);
+
+      qfile = std::string("./x0");
+      qfd = open(qfile.c_str(), O_RDWR);
+      Y = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),
+                          PROT_READ, MAP_PRIVATE, qfd, 0);
+      close(qfd);
+      munmap(X, 8 * sizeof(__m128i));
+      munmap(Y, 8 * sizeof(__m128i));
+    }
+
+    // cw_construction computecw;
+    //	read(sin, boost::asio::buffer(&computecw, sizeof(computecw)));
+
+    // computecw.rand_b;
+    //__m128i gamma_b = computecw.gamma_b;
+
+    if (party)
+    {
+      rand_b = Y[0];	//_mm_set_epi32(0x6fef9434, 0x6768121e, 0x20942286, 0x1b59f7a7);
+      gamma_b = X[0]; // _mm_set_epi32(0x6a499109 , 0x803067dd , 0xd1e2281b , 0xe71b6262);
+      bit_b = 1;		// computecw.bit_b;
+    }
+    else
+    {
+      rand_b = Y[0];	// _mm_set_epi32(0xb29747df, 0xf7300f6d, 0x9476d971, 0xd5f75d98);
+      gamma_b = X[0]; // _mm_set_epi32(0xb73142e2 , 0x10687aae , 0x06500d3ec , 0x29b5c85d);
+      bit_b = 1;		// computecw.bit_b;
+    }
+
+    uint8_t blinded_bit, blinded_bit_read;
+    blinded_bit = bit ^ bit_b;
+
+    __m128i blinded_L = L ^ R ^ rand_b;
+    __m128i blinded_L_read;
+
+    struct BlindsCW
+    {
+        __m128i blinded_message;
+        uint8_t blinded_bit;
+    };
+
+    BlindsCW blinds_sent, blinds_recv;
+
+    blinds_sent.blinded_bit = blinded_bit;
+    blinds_sent.blinded_message = blinded_L;
+
+    boost::asio::write(sout, boost::asio::buffer(&blinds_sent, sizeof(blinds_sent)));
+    boost::asio::read(sout, boost::asio::buffer(&blinds_recv, sizeof(blinds_recv)));
+    communication_cost += sizeof(blinds_recv);
+
+    blinded_bit_read = blinds_recv.blinded_bit;
+    blinded_L_read = blinds_recv.blinded_message;
+
+    __m128i out_ = R ^ gamma_b; //_mm_setzero_si128;
+
+    if (bit)
+    {
+        out_ ^= (L ^ R ^ blinded_L_read);
+    }
+    if (blinded_bit_read)
+    {
+        out_ ^= rand_b;
+    }
+
+    __m128i out_reconstruction;
+    boost::asio::write(sout, boost::asio::buffer(&out_, sizeof(out_)));
+    boost::asio::read(sout, boost::asio::buffer(&out_reconstruction, sizeof(out_reconstruction)));
+    communication_cost += sizeof(out_reconstruction);
+
+    out_reconstruction = out_ ^ out_reconstruction;
+
+    CW = out_reconstruction;
+
+		#ifdef DEBUG
+		    uint8_t bit_reconstruction;
+		    boost::asio::write(sout, boost::asio::buffer(&bit, sizeof(bit)));
+		    boost::asio::read(sout, boost::asio::buffer(&bit_reconstruction, sizeof(bit_reconstruction)));
+		    bit_reconstruction = bit ^ bit_reconstruction;
+
+		    __m128i L_reconstruction;
+		    boost::asio::write(sout, boost::asio::buffer(&L, sizeof(L)));
+		    boost::asio::read(sout, boost::asio::buffer(&L_reconstruction, sizeof(L_reconstruction)));
+		    L_reconstruction = L ^ L_reconstruction;
+
+		    __m128i R_reconstruction;
+		    boost::asio::write(sout, boost::asio::buffer(&R, sizeof(R)));
+		    boost::asio::read(sout, boost::asio::buffer(&R_reconstruction, sizeof(R_reconstruction)));
+		    R_reconstruction = R ^ R_reconstruction;
+
+		    __m128i CW_debug;
+
+		    if (bit_reconstruction != 0)
+		    {
+		        CW_debug = L_reconstruction;
+		    }
+		    else
+		    {
+		        CW_debug = R_reconstruction;
+		    }
+
+		    assert(CW_debug[0] == CW[0]);
+		    assert(CW_debug[1] == CW[1]);
+		#endif
 }
 
 __m128i bit_mask_avx2_msb(unsigned int n)
 {
-	__m128i ones = _mm_set1_epi32(-1);
-	__m128i cnst32_128 = _mm_set_epi32(32, 64, 96, 128);
+    __m128i ones = _mm_set1_epi32(-1);
+    __m128i cnst32_128 = _mm_set_epi32(32, 64, 96, 128);
 
-	__m128i shift = _mm_set1_epi32(n);
-	shift = _mm_subs_epu16(cnst32_128, shift);
-	return _mm_sllv_epi32(ones, shift);
+    __m128i shift = _mm_set1_epi32(n);
+    shift = _mm_subs_epu16(cnst32_128, shift);
+    return _mm_sllv_epi32(ones, shift);
 }
 
 __m128i bit_mask_avx2_lsb(unsigned int n)
 {
-	__m128i ones = _mm_set1_epi32(-1);
-	__m128i cnst32_128 = _mm_set_epi32(128, 96, 64, 32);
+    __m128i ones = _mm_set1_epi32(-1);
+    __m128i cnst32_128 = _mm_set_epi32(128, 96, 64, 32);
 
-	__m128i shift = _mm_set1_epi32(n);
-	shift = _mm_subs_epu16(cnst32_128, shift);
-	return _mm_srlv_epi32(ones, shift);
+    __m128i shift = _mm_set1_epi32(n);
+    shift = _mm_subs_epu16(cnst32_128, shift);
+    return _mm_srlv_epi32(ones, shift);
 }
 
 template <typename node_t, typename prgkey_t>
 static inline void traverse(const prgkey_t &prgkey, const node_t &seed, node_t s[2])
 {
-	dpf::PRG(prgkey, clear_lsb(seed, 0b11), s, 2);
+    dpf::PRG(prgkey, clear_lsb(seed, 0b11), s, 2);
 } // dpf::expand
 
 inline void evalfull_mpc(const size_t &nodes_per_leaf, const size_t &depth, const size_t &nbits, const size_t &nodes_in_interval,
-						 const AES_KEY &prgkey, uint8_t target_share[64], std::vector<socket_t> &socketsPb,
-						 const size_t from, const size_t to, __m128i *output, int8_t *_t, __m128i &final_correction_word, bool party, size_t socket_no = 0)
+                         const AES_KEY &prgkey, uint8_t target_share[64], std::vector<socket_t> &socketsPb,
+                         const size_t from, const size_t to, __m128i *output, int8_t *_t, __m128i &final_correction_word, bool party, size_t socket_no = 0)
 {
 
-	__m128i root;
-
-	arc4random_buf(&root, sizeof(root));
+    __m128i root;
 
-	root = set_lsb(root, party);
+    arc4random_buf(&root, sizeof(root));
 
-	const size_t from_node = std::floor(static_cast<double>(from) / nodes_per_leaf);
+    root = set_lsb(root, party);
 
-	__m128i *s[2] = {
-		reinterpret_cast<__m128i *>(output) + nodes_in_interval * (nodes_per_leaf - 1),
-		s[0] + nodes_in_interval / 2};
+    const size_t from_node = std::floor(static_cast<double>(from) / nodes_per_leaf);
 
-	int8_t *t[2] = {_t, _t + nodes_in_interval / 2};
+    __m128i *s[2] = {
+        reinterpret_cast<__m128i *>(output) + nodes_in_interval * (nodes_per_leaf - 1),
+        s[0] + nodes_in_interval / 2
+    };
 
-	int curlayer = depth % 2;
+    int8_t *t[2] = {_t, _t + nodes_in_interval / 2};
 
-	s[curlayer][0] = root;
-	t[curlayer][0] = get_lsb(root, 0b01);
+    int curlayer = depth % 2;
 
-	__m128i *CW = (__m128i *)std::aligned_alloc(sizeof(__m256i), depth * sizeof(__m128i));
+    s[curlayer][0] = root;
+    t[curlayer][0] = get_lsb(root, 0b01);
 
-	for (size_t layer = 0; layer < depth; ++layer)
-	{
-   #ifdef VERBOSE
-		  printf("layer = %zu\n", layer);
-   #endif
-		 curlayer = 1 - curlayer;
+    __m128i *CW = (__m128i *)std::aligned_alloc(sizeof(__m256i), depth * sizeof(__m128i));
 
-		 size_t i = 0, j = 0;
-		 auto nextbit = (from_node >> (nbits - layer - 1)) & 1;
-		 size_t nodes_in_prev_layer = std::ceil(static_cast<double>(nodes_in_interval) / (1ULL << (depth - layer)));
-		 size_t nodes_in_cur_layer = std::ceil(static_cast<double>(nodes_in_interval) / (1ULL << (depth - layer - 1)));
+    for (size_t layer = 0; layer < depth; ++layer)
+    {
+#ifdef VERBOSE
+        printf("layer = %zu\n", layer);
+#endif
+        curlayer = 1 - curlayer;
+
+        size_t i = 0, j = 0;
+        auto nextbit = (from_node >> (nbits - layer - 1)) & 1;
+        size_t nodes_in_prev_layer = std::ceil(static_cast<double>(nodes_in_interval) / (1ULL << (depth - layer)));
+        size_t nodes_in_cur_layer = std::ceil(static_cast<double>(nodes_in_interval) / (1ULL << (depth - layer - 1)));
+
+        __m128i L = _mm_setzero_si128();
+        __m128i R = _mm_setzero_si128();
+
+        for (i = nextbit, j = nextbit; j < nodes_in_prev_layer - 1; ++j, i += 2)
+        {
+            traverse(prgkey, s[1 - curlayer][j], &s[curlayer][i]);
+            L ^= s[curlayer][i];
+            R ^= s[curlayer][i + 1];
+        }
+
+        if (nodes_in_prev_layer > j)
+        {
+            if (i < nodes_in_cur_layer - 1)
+            {
+                traverse(prgkey, s[1 - curlayer][j], &s[curlayer][i]);
+                L ^= s[curlayer][i];
+                R ^= s[curlayer][i + 1];
+            }
+        }
+
+        compute_CW(party, socketsPb[socket_no], L, R, target_share[layer], CW[layer]);
+
+        uint8_t advice_L = get_lsb(L) ^ target_share[layer];
+        uint8_t advice_R = get_lsb(R) ^ target_share[layer];
+
+        uint8_t cwt_L, cwt_R;
+        uint8_t advice[2];
+        uint8_t cwts[2];
+        advice[0] = advice_L;
+        advice[1] = advice_R;
+
+        boost::asio::write(socketsPb[socket_no + 1], boost::asio::buffer(&advice, sizeof(advice)));
+        boost::asio::read(socketsPb[socket_no + 1], boost::asio::buffer(&cwts, sizeof(cwts)));
+
+        cwt_L = cwts[0];
+        cwt_R = cwts[1];
+
+        cwt_L = cwt_L ^ advice_L ^ 1;
+        cwt_R = cwt_R ^ advice_R;
+
+        for (size_t j = 0; j < nodes_in_prev_layer; ++j)
+        {
+            t[curlayer][2 * j] = get_lsb(s[curlayer][2 * j]) ^ (cwt_L & t[1 - curlayer][j]);
+            s[curlayer][2 * j] = clear_lsb(xor_if(s[curlayer][2 * j], CW[layer], !t[1 - curlayer][j]), 0b11);
+            t[curlayer][(2 * j) + 1] = get_lsb(s[curlayer][(2 * j) + 1]) ^ (cwt_R & t[1 - curlayer][j]);
+            s[curlayer][(2 * j) + 1] = clear_lsb(xor_if(s[curlayer][(2 * j) + 1], CW[layer], !t[1 - curlayer][j]), 0b11);
+        }
+    }
 
-		 __m128i L = _mm_setzero_si128();
-		 __m128i R = _mm_setzero_si128();
+    free(CW);
+    __m128i Gamma = _mm_setzero_si128();
 
-   for (i = nextbit, j = nextbit; j < nodes_in_prev_layer - 1; ++j, i += 2)
-   {
-    traverse(prgkey, s[1 - curlayer][j], &s[curlayer][i]);
-    L ^= s[curlayer][i];
-    R ^= s[curlayer][i + 1];
-   }
+    for (size_t i = 0; i < to + 1; ++i)
+    {
+        Gamma[0] += output[i][0];
+        Gamma[1] += output[i][1];
+    }
 
-   if (nodes_in_prev_layer > j)
-   {
-    if (i < nodes_in_cur_layer - 1)
+    if (party)
     {
-     traverse(prgkey, s[1 - curlayer][j], &s[curlayer][i]);
-     L ^= s[curlayer][i];
-     R ^= s[curlayer][i + 1];
+        Gamma[0] = -Gamma[0];
+        Gamma[1] = -Gamma[1];
     }
-   }
 
-		 compute_CW(party, socketsPb[socket_no], L, R, target_share[layer], CW[layer]);
+    boost::asio::write(socketsPb[socket_no + 3], boost::asio::buffer(&Gamma, sizeof(Gamma)));
+    boost::asio::read(socketsPb[socket_no + 3], boost::asio::buffer(&final_correction_word, sizeof(final_correction_word)));
+    communication_cost += sizeof(Gamma);
+    final_correction_word = Gamma; // final_correction_word + Gamma;
 
-		 uint8_t advice_L = get_lsb(L) ^ target_share[layer];
-		 uint8_t advice_R = get_lsb(R) ^ target_share[layer];
+} // dpf::__evalinterval
 
-		 uint8_t cwt_L, cwt_R;
-		 uint8_t advice[2];
-		 uint8_t cwts[2];
-		 advice[0] = advice_L;
-		 advice[1] = advice_R;
+void convert_shares(__m128i **output, int8_t **flags, size_t n_threads, size_t db_nitems, __m128i *final_correction_word, tcp::socket &sb, bool party)
+{
 
-		 boost::asio::write(socketsPb[socket_no + 1], boost::asio::buffer(&advice, sizeof(advice)));
-		 boost::asio::read(socketsPb[socket_no + 1], boost::asio::buffer(&cwts, sizeof(cwts)));
+    for (size_t j = 0; j < db_nitems; ++j)
+    {
+        for (size_t k = 0; k < n_threads; ++k)
+        {
+            if (party)
+            {
+                output[k][j] = -output[k][j];
+                flags[k][j] = -flags[k][j];
+            }
+        }
+
+#ifdef DEBUG
+        int8_t out = flags[0][j];
+        int8_t out_rec;
+
+        boost::asio::write(sb, boost::asio::buffer(&out, sizeof(out)));
+        boost::asio::read(sb, boost::asio::buffer(&out_rec, sizeof(out_rec)));
+        out_rec = out_rec + out;
+
+
+        if (out_rec != 0)
+            std::cout << j << "(flags) --> " << (int)out_rec << std::endl
+                      << std::endl;
+
+        __m128i out2 = output[0][j];
+        __m128i out_rec2;
+
+        boost::asio::write(sb, boost::asio::buffer(&out2, sizeof(out2)));
+        boost::asio::read(sb, boost::asio::buffer(&out_rec2, sizeof(out_rec2)));
+        out_rec2 = out_rec2 + out2;
+        if (out_rec2[0] != 0)
+            std::cout << j << "--> " << out_rec2[0] << std::endl;
+#endif
+    }
 
-		 cwt_L = cwts[0];
-		 cwt_R = cwts[1];
+    for (size_t i = 0; i < n_threads; ++i)
+    {
 
-		 cwt_L = cwt_L ^ advice_L ^ 1;
-		 cwt_R = cwt_R ^ advice_R;
+        int64_t pm = 0;
+        int64_t rb;
+
+        arc4random_buf(&rb, sizeof(rb));
+        for (size_t j = 0; j < db_nitems; ++j)
+        {
+            if (party)
+            {
+                if (flags[i][j] != 0)
+                    pm -= 1;
+            }
+            if (!party)
+            {
+                if (flags[i][j] != 0)
+                    pm += 1; // flags[0][j];
+            }
+        }
+    }
+}
 
-		 for (size_t j = 0; j < nodes_in_prev_layer; ++j)
-		 {
-			 t[curlayer][2 * j] = get_lsb(s[curlayer][2 * j]) ^ (cwt_L & t[1 - curlayer][j]);
-			 s[curlayer][2 * j] = clear_lsb(xor_if(s[curlayer][2 * j], CW[layer], !t[1 - curlayer][j]), 0b11);
-			 t[curlayer][(2 * j) + 1] = get_lsb(s[curlayer][(2 * j) + 1]) ^ (cwt_R & t[1 - curlayer][j]);
-			 s[curlayer][(2 * j) + 1] = clear_lsb(xor_if(s[curlayer][(2 * j) + 1], CW[layer], !t[1 - curlayer][j]), 0b11);
-		 }
-	 }
+void accept_conncections_from_Pb(boost::asio::io_context &io_context, std::vector<socket_t> &socketsPb, int port, size_t j)
+{
+    tcp::acceptor acceptor_a(io_context, tcp::endpoint(tcp::v4(), port));
+    tcp::socket sb_a(acceptor_a.accept());
+    socketsPb[j] = std::move(sb_a);
+}
 
-	__m128i Gamma = _mm_setzero_si128();
+int main(int argc, char *argv[])
+{
 
-	for (size_t i = 0; i < to + 1; ++i)
-	{
-		Gamma[0] += output[i][0];
-		Gamma[1] += output[i][1];
-	}
+    boost::asio::io_context io_context;
+    tcp::resolver resolver(io_context);
+    const std::string host1 = argv[1];
 
-	if (party)
-	{
-		Gamma[0] = -Gamma[0];
-		Gamma[1] = -Gamma[1];
-	}
 
-	boost::asio::write(socketsPb[socket_no + 3], boost::asio::buffer(&Gamma, sizeof(Gamma)));
-	boost::asio::read(socketsPb[socket_no + 3], boost::asio::buffer(&final_correction_word, sizeof(final_correction_word)));
- communication_cost += sizeof(Gamma);
-	final_correction_word = Gamma; // final_correction_word + Gamma;
+    const size_t n_threads = atoi(argv[2]);
+    const size_t number_of_sockets = 5 * n_threads;
+    const size_t expo = atoi(argv[3]);
 
-} // dpf::__evalinterval
+    const size_t maxRAM = atoi(argv[4]);
 
-void convert_shares(__m128i **output, int8_t **flags, size_t n_threads, size_t db_nitems, __m128i *final_correction_word, tcp::socket &sb, bool party)
-{
+    const size_t db_nitems = 1ULL << expo;
 
-	for (size_t j = 0; j < db_nitems; ++j)
-	{
-		for (size_t k = 0; k < n_threads; ++k)
-		{
-			if (party)
-			{
-				output[k][j] = -output[k][j];
-				flags[k][j] = -flags[k][j];
-			}
-		}
-
-		 #ifdef DEBUG
-		int8_t out = flags[0][j];
-		int8_t out_rec;
-
-		boost::asio::write(sb, boost::asio::buffer(&out, sizeof(out)));
-		boost::asio::read(sb, boost::asio::buffer(&out_rec, sizeof(out_rec)));
-		out_rec = out_rec + out;
-
-		
-		if (out_rec != 0)
-			std::cout << j << "(flags) --> " << (int)out_rec << std::endl
-					  << std::endl;
-
-		__m128i out2 = output[0][j];
-		__m128i out_rec2;
-
-		boost::asio::write(sb, boost::asio::buffer(&out2, sizeof(out2)));
-		boost::asio::read(sb, boost::asio::buffer(&out_rec2, sizeof(out_rec2)));
-		out_rec2 = out_rec2 + out2;
-		if (out_rec2[0] != 0)
-			std::cout << j << "--> " << out_rec2[0] << std::endl;
-		 #endif
-	}
-
-	for (size_t i = 0; i < n_threads; ++i)
-	{
-
-		int64_t pm = 0;
-		int64_t rb;
-
-		arc4random_buf(&rb, sizeof(rb));
-		for (size_t j = 0; j < db_nitems; ++j)
-		{
-			if (party)
-			{
-				if (flags[i][j] != 0)
-					pm -= 1;
-			}
-			if (!party)
-			{
-				if (flags[i][j] != 0)
-					pm += 1; // flags[0][j];
-			}
-		}
-	}
-}
+    size_t RAM_needed_per_thread = 164 * db_nitems;
+    std::cout << "RAM needed = " << n_threads*RAM_needed_per_thread << " bytes = " << n_threads*RAM_needed_per_thread/1073741824 << " GiB" << std::endl;
+    std::cout << "RAM needed per thread = " << RAM_needed_per_thread << " bytes = " << (RAM_needed_per_thread>>30) << " GiB" << std::endl;
+    size_t thread_per_batch = std::floor(double(maxRAM<<30)/RAM_needed_per_thread);
+    if (thread_per_batch > n_threads) {
+        thread_per_batch = n_threads;
+    }
+    std::cout << "thread_per_batch = " << thread_per_batch << std::endl;
+    if (thread_per_batch < 1) {
+        std::cout << "You need more RAM" << std::endl;
+        exit(0);
+    }
+    size_t n_batches = std::ceil(double(n_threads)/thread_per_batch);
+    std::cout << "n_batches = " << n_batches << std::endl;
 
-void accept_conncections_from_Pb(boost::asio::io_context &io_context, std::vector<socket_t> &socketsPb, int port, size_t j)
-{
-	tcp::acceptor acceptor_a(io_context, tcp::endpoint(tcp::v4(), port));
-	tcp::socket sb_a(acceptor_a.accept());
-	socketsPb[j] = std::move(sb_a);
-}
+    std::vector<socket_t> socketsPb;
+    for (size_t j = 0; j < number_of_sockets + 1; ++j)
+    {
+        tcp::socket emptysocket(io_context);
+        socketsPb.emplace_back(std::move(emptysocket));
+    }
+    socketsPb.reserve(number_of_sockets + 1);
 
-int main(int argc, char *argv[])
-{
 
-	boost::asio::io_context io_context;
-	tcp::resolver resolver(io_context);
-	const std::string host1 = argv[1];
- 
-
-	const size_t n_threads = atoi(argv[2]);
-	const size_t number_of_sockets = 5 * n_threads;
-	const size_t expo = atoi(argv[3]);
-
-	const size_t maxRAM = atoi(argv[4]);
-
-	const size_t db_nitems = 1ULL << expo;
-
-      size_t RAM_needed_per_thread = 164 * db_nitems;
-      std::cout << "RAM needed = " << n_threads*RAM_needed_per_thread << " bytes = " << n_threads*RAM_needed_per_thread/1073741824 << " GiB" << std::endl;
-      std::cout << "RAM needed per thread = " << RAM_needed_per_thread << " bytes = " << (RAM_needed_per_thread>>30) << " GiB" << std::endl;
-      size_t thread_per_batch = std::floor(double(maxRAM<<30)/RAM_needed_per_thread);
-      if (thread_per_batch > n_threads) {
-	thread_per_batch = n_threads;
-      }
-      std::cout << "thread_per_batch = " << thread_per_batch << std::endl;
-      if (thread_per_batch < 1) {
-       std::cout << "You need more RAM" << std::endl;
-       exit(0);
-      }
-      size_t n_batches = std::ceil(double(n_threads)/thread_per_batch);
-      std::cout << "n_batches = " << n_batches << std::endl;
-
-	std::vector<socket_t> socketsPb;
-	for (size_t j = 0; j < number_of_sockets + 1; ++j)
-	{
-		tcp::socket emptysocket(io_context);
-		socketsPb.emplace_back(std::move(emptysocket));
-	}
-	socketsPb.reserve(number_of_sockets + 1);
-
-
-	std::vector<int> ports;
-	for (size_t j = 0; j < number_of_sockets; ++j)
-	{
-		int port = 6000;
-		ports.push_back(port + j);
-	}
-
-	std::vector<int> ports2_0;
-	for (size_t j = 0; j < number_of_sockets; ++j)
-	{
-		int port = 20000;
-		ports2_0.push_back(port + j);
-	}
-
-	std::vector<int> ports2_1;
-	for (size_t j = 0; j < number_of_sockets; ++j)
-	{
-		int port = 40000;
-		ports2_1.push_back(port + j);
-	}
-
-bool party;
+    std::vector<int> ports;
+    for (size_t j = 0; j < number_of_sockets; ++j)
+    {
+        int port = 6000;
+        ports.push_back(port + j);
+    }
+
+    std::vector<int> ports2_0;
+    for (size_t j = 0; j < number_of_sockets; ++j)
+    {
+        int port = 20000;
+        ports2_0.push_back(port + j);
+    }
+
+    std::vector<int> ports2_1;
+    for (size_t j = 0; j < number_of_sockets; ++j)
+    {
+        int port = 40000;
+        ports2_1.push_back(port + j);
+    }
+
+    bool party;
 
 #if (PARTY == 0)
-	party = false; 
-	for (size_t j = 0; j < number_of_sockets; ++j)
-	{
-		tcp::socket sb_a(io_context);
-		boost::asio::connect(sb_a, resolver.resolve({host1, std::to_string(ports[j])}));
-		socketsPb[j] = std::move(sb_a);
-	}
+    party = false;
+    for (size_t j = 0; j < number_of_sockets; ++j)
+    {
+        tcp::socket sb_a(io_context);
+        boost::asio::connect(sb_a, resolver.resolve({host1, std::to_string(ports[j])}));
+        socketsPb[j] = std::move(sb_a);
+    }
 #else
-	party = true;	
-	boost::asio::thread_pool pool2(number_of_sockets);
-	for (size_t j = 0; j < number_of_sockets; ++j)
-	{
-		boost::asio::post(pool2, std::bind(accept_conncections_from_Pb, std::ref(io_context), std::ref(socketsPb), ports[j], j));
-	}
-
-	pool2.join();
+    party = true;
+    boost::asio::thread_pool pool2(number_of_sockets);
+    for (size_t j = 0; j < number_of_sockets; ++j)
+    {
+        boost::asio::post(pool2, std::bind(accept_conncections_from_Pb, std::ref(io_context), std::ref(socketsPb), ports[j], j));
+    }
+
+    pool2.join();
 #endif
 
- 
-	__m128i *final_correction_word = (__m128i *)std::aligned_alloc(sizeof(__m256i), thread_per_batch * sizeof(__m128i));
 
-	AES_KEY aeskey;
+    __m128i *final_correction_word = (__m128i *)std::aligned_alloc(sizeof(__m256i), thread_per_batch * sizeof(__m128i));
 
-	__m128i **output = (__m128i **)malloc(sizeof(__m128i *) * thread_per_batch);
-	int8_t **flags = (int8_t **)malloc(sizeof(uint8_t *) * thread_per_batch);
+    AES_KEY aeskey;
 
-	for (size_t j = 0; j < thread_per_batch; ++j)
-	{
-		output[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));
-		flags[j] = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));
-	}
+    __m128i **output = (__m128i **)malloc(sizeof(__m128i *) * thread_per_batch);
+    int8_t **flags = (int8_t **)malloc(sizeof(uint8_t *) * thread_per_batch);
 
-	const size_t bits_per_leaf = std::is_same<leaf_t, bool>::value ? 1 : sizeof(leaf_t) * CHAR_BIT;
-	const bool is_packed = (sizeof(leaf_t) < sizeof(node_t));
-	const size_t nodes_per_leaf = is_packed ? 1 : std::ceil(static_cast<double>(bits_per_leaf) / (sizeof(node_t) * CHAR_BIT));
-	const size_t depth = std::ceil(std::log2(db_nitems));
-	const size_t nbits = std::ceil(std::log2(db_nitems));
-	const size_t nodes_in_interval = db_nitems - 1;
-	auto start = std::chrono::steady_clock::now();
+    for (size_t j = 0; j < thread_per_batch; ++j)
+    {
+        output[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));
+        flags[j] = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));
+    }
+
+    const size_t bits_per_leaf = std::is_same<leaf_t, bool>::value ? 1 : sizeof(leaf_t) * CHAR_BIT;
+    const bool is_packed = (sizeof(leaf_t) < sizeof(node_t));
+    const size_t nodes_per_leaf = is_packed ? 1 : std::ceil(static_cast<double>(bits_per_leaf) / (sizeof(node_t) * CHAR_BIT));
+    const size_t depth = std::ceil(std::log2(db_nitems));
+    const size_t nbits = std::ceil(std::log2(db_nitems));
+    const size_t nodes_in_interval = db_nitems - 1;
+    auto start = std::chrono::steady_clock::now();
 
 
 #ifdef VERBOSE
-		printf("n_threads = %zu\n\n", n_threads);
+    printf("n_threads = %zu\n\n", n_threads);
 #endif
- 
 
 
 
 
- for(size_t iters = 0; iters < n_batches; ++iters)
-{
-   if (n_batches > 1) {
-    printf("Starting evalfull_mpc batch %lu / %lu\n", iters+1, n_batches);
-   }
-   uint8_t **target_share_read = new uint8_t *[thread_per_batch];
-   generate_random_targets(target_share_read, thread_per_batch, party, expo);
-   boost::asio::thread_pool pool(thread_per_batch);
-   for (size_t j = 0; j < thread_per_batch; ++j)
-   {
-    boost::asio::post(pool, std::bind(evalfull_mpc, std::ref(nodes_per_leaf), std::ref(depth), std::ref(nbits), std::ref(nodes_in_interval),
-              std::ref(aeskey), target_share_read[j], std::ref(socketsPb), 0, db_nitems - 1, output[j],
-              flags[j], std::ref(final_correction_word[j]), party, 5 * j));
-   }
-
-   pool.join();
-
-
-   convert_shares(output, flags, thread_per_batch, db_nitems, final_correction_word, socketsPb[0], party);
-}
- auto end = std::chrono::steady_clock::now();
-	std::chrono::duration<double> elapsed_seconds = end - start;
-	std::cout << "WallClockTime: " << elapsed_seconds.count() << " s" << std::endl;
- std::cout << "CommunicationCost: " << communication_cost << " bytes" << std::endl;
- 
-
-	if(!party)
-	{
-		char const *p0_filename0;
-		p0_filename0 = "../duoram-online/preprocflags/party0_read_flags_b";
-		int w0 = open(p0_filename0, O_WRONLY | O_CREAT, S_IWRITE | S_IREAD);
-		int written = write(w0, flags[0], db_nitems * sizeof(flags[0][0]));
-  if (written < 0) perror("Write error");
-		close(w0);
-	}
-	else
-	{
-		char const *p0_filename0;
-		p0_filename0 = "../duoram-online/preprocflags/party1_read_flags_b";
-		int w0 = open(p0_filename0, O_WRONLY | O_CREAT, S_IWRITE | S_IREAD);
-		int written = write(w0, flags[0], db_nitems * sizeof(flags[0][0]));
-		if (written < 0) perror("Write error"); 
-		close(w0);
-	}
-
-	return 0;
+
+    for(size_t iters = 0; iters < n_batches; ++iters)
+    {
+        if (n_batches > 1) {
+            printf("Starting evalfull_mpc batch %lu / %lu\n", iters+1, n_batches);
+        }
+        uint8_t **target_share_read = new uint8_t *[thread_per_batch];
+        generate_random_targets(target_share_read, thread_per_batch, party, expo);
+        boost::asio::thread_pool pool(thread_per_batch);
+        for (size_t j = 0; j < thread_per_batch; ++j)
+        {
+            boost::asio::post(pool, std::bind(evalfull_mpc, std::ref(nodes_per_leaf), std::ref(depth), std::ref(nbits), std::ref(nodes_in_interval),
+                                              std::ref(aeskey), target_share_read[j], std::ref(socketsPb), 0, db_nitems - 1, output[j],
+                                              flags[j], std::ref(final_correction_word[j]), party, 5 * j));
+        }
+
+        pool.join();
+        for(size_t j = 0; j < thread_per_batch; ++j)
+        {
+            delete[] target_share_read[j];
+        }
+        delete[] target_share_read;
+        convert_shares(output, flags, thread_per_batch, db_nitems, final_correction_word, socketsPb[0], party);
+    }
+
+    for(size_t j = 0; j < thread_per_batch; ++j)
+    {
+
+        free(output[j]);
+        free(flags[j]);
+    }
+    free(output);
+    free(flags);
+    free(final_correction_word);
+
+    auto end = std::chrono::steady_clock::now();
+    std::chrono::duration<double> elapsed_seconds = end - start;
+    std::cout << "WallClockTime: " << elapsed_seconds.count() << " s" << std::endl;
+    std::cout << "CommunicationCost: " << communication_cost << " bytes" << std::endl;
+    
+    return 0;
 }

+ 30 - 3
duoram-online/duoram.cpp

@@ -315,6 +315,7 @@ int main(const int argc, const char * argv[])
     #endif
     delete[] WritePb_;
     delete[] WritePb_recv;
+    delete[] where_to_write;
 
     for(size_t w = 0; w < number_of_writes; ++w)
     {			
@@ -354,7 +355,7 @@ int main(const int argc, const char * argv[])
       
      
     for(size_t r = 0; r < number_of_ind_reads; ++r) WritePb_ind_reads[r] = where_to_read_independent[r] -ri;
-
+    delete[] where_to_read_independent;
     boost::asio::write(sockets_2[3], boost::asio::buffer(WritePb_ind_reads, number_of_ind_reads * sizeof(size_t)));
     boost::asio::write(sockets_[3], boost::asio::buffer(WritePb_ind_reads, number_of_ind_reads * sizeof(size_t)));
     boost::asio::read(sockets_[3], boost::asio::buffer(WritePb_ind_reads_recv, number_of_ind_reads * sizeof(size_t)));
@@ -381,7 +382,10 @@ int main(const int argc, const char * argv[])
       std::cout << "---> [duoram independent reads] " <<  print_reconstruction(sockets_[0], read_out_independent_reads[r]) << std::endl;
       #endif
     }
-
+    delete[] rotate;
+    delete[] Gamma_reads;
+    delete[] WritePb_ind_reads_recv;
+    delete[] WritePb_ind_reads;
     auto end_ind_reads = std::chrono::steady_clock::now();
     std::chrono::duration<double> elapsed_seconds_ind_reads = end_ind_reads - start_ind_reads;
     //printf("elapsed_seconds_ind_reads = %f\n",elapsed_seconds_ind_reads.count());
@@ -406,7 +410,7 @@ int main(const int argc, const char * argv[])
        std::cout << print_reconstruction(sockets_[0], read_out_dependent_reads[r]) << std::endl;
       #endif
     }
-    
+    delete[] where_to_read_dependent;
     auto end_dep_reads = std::chrono::steady_clock::now();
     std::chrono::duration<double> elapsed_seconds_dep_reads = end_dep_reads - start_dep_reads;
     dependent_read_time = elapsed_seconds_dep_reads.count();
@@ -416,6 +420,29 @@ int main(const int argc, const char * argv[])
      std::cout << std::endl << std::endl << "============== DEPENDENT READS END  ==============" << std::endl << std::endl;
     #endif
    #endif
+    
+    free(reading_b);
+    free(reading_c);
+    free(reading_d);
+    free(writing_b);
+    free(writing_c);
+    free(writing_d);
+    free(reading_temp);
+    free(DB);
+    free(updated_DB);
+    free(blinded_DB);
+    free(blinded_DB_recv);
+    free(updated_blinded_DB_recv);
+    free(b);
+    free(c);
+    free(d);
+    free(blinds);
+    free(updated_blinds);
+    
+    #ifdef ThreeParty
+     delete[] read_out;
+     delete[] Gamma;
+    #endif
   }  
 
 auto end_total = std::chrono::steady_clock::now();

+ 34 - 6
duoram-online/readvectors.h

@@ -13,6 +13,7 @@ int read_final_correction_word(bool party, DB_t& FCW_read, int i = 0)
    concatanate_index("../duoram-online/preprocflags/FCW0", tmp, i);
 			int const in0 { open(tmp, O_RDONLY ) };
 	 	size_t r = read(in0, &FCW_read,   sizeof(FCW_read));	
+	 	close(in0);
    if(r < 0) perror("Read error");
 		}
 		
@@ -21,7 +22,8 @@ int read_final_correction_word(bool party, DB_t& FCW_read, int i = 0)
    char tmp[100];
    concatanate_index("../duoram-online/preprocflags/FCW1", tmp, i);
 		 int const in0 { open(tmp, O_RDONLY ) };
-	 	size_t r = read(in0, &FCW_read,   sizeof(FCW_read));	
+	 	size_t r = read(in0, &FCW_read,   sizeof(FCW_read));
+	 		close(in0);	
  		if(r < 0) perror("Read error");
 		}
 
@@ -35,7 +37,8 @@ int read_rand_indx(bool party, DB_t& R, int i = 0)
    char tmp[100];
    concatanate_index("../duoram-online/preprocflags/R0", tmp, i);
 			int const in0 { open(tmp, O_RDONLY ) };
-	 	size_t r = read(in0, &R,   sizeof(R));	
+	 	size_t r = read(in0, &R,   sizeof(R));
+	 	close(in0);	
    if(r < 0) perror("Read error");
 		}
 		
@@ -45,6 +48,7 @@ int read_rand_indx(bool party, DB_t& R, int i = 0)
    concatanate_index("../duoram-online/preprocflags/R1", tmp, i);
 		 int const in0 { open(tmp, O_RDONLY ) };
 	 	size_t r = read(in0, &R,   sizeof(R));	
+	 	close(in0);
  		if(r < 0) perror("Read error");
 		}
 
@@ -59,16 +63,19 @@ int read_flags_for_reading(bool party, size_t db_nitems, int i = 0)
    concatanate_index("../duoram-online/preprocflags/party0_read_flags_b", tmp, i);
 	 	int const in0 { open(tmp, O_RDONLY ) };
 	 	size_t r = read(in0, reading_b,  sizeof(reading_b));	
+	 	close(in0);
    if(r < 0) perror("Read error");
   
    concatanate_index("../duoram-online/preprocflags/party0_read_flags_c", tmp, i);
    int const in1 { open( tmp, O_RDONLY ) };
 	 	r = read(in1, reading_c,  sizeof(reading_c));
+	 	close(in1);
    if(r < 0) perror("Read error");
 
    concatanate_index("../duoram-online/preprocflags/party0_read_flags_d", tmp, i);
 	  int const in2 { open( tmp, O_RDONLY ) };
-	  r = read(in2, reading_d,  sizeof(reading_d));	
+	  r = read(in2, reading_d,  sizeof(reading_d));
+	  close(in2);	
    if(r < 0) perror("Read error");
 	}
 
@@ -79,16 +86,19 @@ int read_flags_for_reading(bool party, size_t db_nitems, int i = 0)
    concatanate_index("../duoram-online/preprocflags/party1_read_flags_b", tmp, i);
 		 int const in0 { open(tmp, O_RDONLY ) };
 	 	size_t r = read(in0, reading_b,  sizeof(reading_b));	
+	 	close(in0);
 	 	if(r < 0) perror("Read error");
 
    concatanate_index("../duoram-online/preprocflags/party1_read_flags_c", tmp, i);
  	 int const in1 { open(tmp, O_RDONLY ) };
 		 r = read(in1, reading_c,  sizeof(reading_c));
+		 close(in1);
 		 if(r < 0) perror("Read error");
     
    concatanate_index("../duoram-online/preprocflags/party1_read_flags_d", tmp, i);
 		 int const in2 { open(tmp, O_RDONLY ) };
 		 r = read(in2, reading_d,  sizeof(reading_d));	
+		 close(in2);
    if(r < 0) perror("Read error");
 }
 
@@ -103,17 +113,20 @@ int read_flags_for_writing(bool party, size_t db_nitems, int i = 0)
     char tmp[100];
     concatanate_index("../duoram-online/preprocflags/party0_write_flags_b", tmp, i);
 	   int const in0_w { open(tmp, O_RDONLY ) };
-    size_t r = read(in0_w, writing_b,  sizeof(writing_b));	
+    size_t r = read(in0_w, writing_b,  sizeof(writing_b));
+    close(in0_w);	
     if(r < 0) perror("Read error");
 
     concatanate_index("../duoram-online/preprocflags/party0_write_flags_c", tmp,i);	   
 	   int const in1_w { open( tmp, O_RDONLY ) };
     r = read(in1_w, writing_c,  sizeof(writing_c));
+    close(in1_w);
     if(r < 0) perror("Read error");
 	
     concatanate_index("../duoram-online/preprocflags/party0_write_flags_d", tmp,i);
 	   int const in2_w { open( tmp, O_RDONLY ) };
    	r = read(in2_w, writing_d,  sizeof(writing_d));	
+   	close(in2_w);
     if(r < 0) perror("Read error");
 	}
 
@@ -123,16 +136,19 @@ int read_flags_for_writing(bool party, size_t db_nitems, int i = 0)
    concatanate_index("../duoram-online/preprocflags/party1_write_flags_b", tmp,i);
  		int const in0_w { open( tmp, O_RDONLY ) };
 	 	size_t r = read(in0_w, writing_b,  sizeof(writing_b));	
+	 	close(in0_w);
 	 	if(r < 0) perror("Read error");
 
    concatanate_index("../duoram-online/preprocflags/party1_write_flags_c", tmp, i);
    int const in1_w { open(tmp, O_RDONLY ) };
 		 r = read(in1_w, writing_c,  sizeof(writing_c));
+		 close(in1_w);
    if(r < 0) perror("Read error");
 
 		 concatanate_index("../duoram-online/preprocflags/party1_write_flags_d", tmp, i);
 		 int const in2_w { open( tmp, O_RDONLY ) };
 		 r = read(in2_w, writing_d,  sizeof(writing_d));	
+		 close(in2_w);
 	  if(r < 0) perror("Read error");
 	}
 
@@ -145,21 +161,25 @@ int read_flags_for_writing(bool party, size_t db_nitems, int i = 0)
    concatanate_index("../duoram-online/preprocflags/P2_party0_write_flags_c", tmp, i);
    int const in1_w { open(tmp, O_RDONLY ) };
    size_t r = read(in1_w, writing_c,  sizeof(writing_c));
+    close(in1_w);
    if(r < 0) perror("Read error");
    
    concatanate_index("../duoram-online/preprocflags/P2_party1_write_flags_d", tmp, i);
    int const in2_w { open(tmp, O_RDONLY ) };
    r = read(in2_w, writing_d,  sizeof(writing_d)); 
+   close(in2_w);
    if(r < 0) perror("Read error");
 
    concatanate_index("../duoram-online/preprocflags/P2_party0_write_c", tmp, i);
    int const in1_w_ { open(tmp, O_RDONLY ) };
    r = read(in1_w_, c,  sizeof(c));
+   close(in1_w_);
    if(r < 0) perror("Read error");
    
    concatanate_index("../duoram-online/preprocflags/P2_party1_write_d", tmp, i);
    int const in2_w_ { open(tmp, O_RDONLY ) };
    r = read(in2_w_, d,  sizeof(d));  
+   close(in2_w_);
    if(r < 0) perror("Read error");
 
    return 0;
@@ -171,11 +191,13 @@ int read_flags_for_generating_cancellation_terms(size_t db_nitems, int i = 0)
   concatanate_index("../duoram-online/preprocflags/P2_party1_read_flags_d", tmp, i);
   int const in2 { open(tmp, O_RDONLY ) };
   size_t r = read(in2, reading_d,  sizeof(reading_d)); 
+  close(in2);
   if(r < 0) perror("Read error");
 
   concatanate_index("../duoram-online/preprocflags/P2_party0_read_flags_c", tmp, i);
   int const in2_ { open(tmp, O_RDONLY ) };
   r = read(in2_, reading_c,  sizeof(reading_c)); 
+  close(in2_);	
   if(r < 0) perror("Read error");
    
   return 0;
@@ -190,16 +212,19 @@ int read_flags_for_updating(bool party, size_t db_nitems, int i = 0)
   concatanate_index("../duoram-online/preprocflags/party0_write_b", tmp, i);
 		int const in0_w_ { open(tmp, O_RDONLY ) };
 		size_t r = read(in0_w_, b,  sizeof(b));	
+		close(in0_w_);	
 	 if(r < 0) perror("Read error");
  	
   concatanate_index("../duoram-online/preprocflags/party0_write_c", tmp, i);
   int const in1_w_ { open(tmp, O_RDONLY ) };
 		r = read(in1_w_, c,  sizeof(c));
+		close(in1_w_);	
   if(r < 0) perror("Read error");
 
   concatanate_index("../duoram-online/preprocflags/party0_write_d", tmp, i); 
   int const in2_w_ { open(tmp, O_RDONLY ) };
 	 r = read(in2_w_, d,  sizeof(d));	
+	 close(in2_w_);	
   if(r < 0) perror("Read error");
 	}
 
@@ -207,17 +232,20 @@ int read_flags_for_updating(bool party, size_t db_nitems, int i = 0)
 	{
   concatanate_index("../duoram-online/preprocflags/party1_write_b", tmp, i);
  	int const in0_w_ { open( tmp, O_RDONLY ) };
-	 size_t r = read(in0_w_, b,  sizeof(b));	
+	 size_t r = read(in0_w_, b,  sizeof(b));
+	 close(in0_w_);	
   if(r < 0) perror("Read error");
  	
   concatanate_index("../duoram-online/preprocflags/party1_write_c", tmp, i);
   int const in1_w_ { open(tmp, O_RDONLY ) };
 		r = read(in1_w_, c,  sizeof(c));
+	 close(in1_w_);	
   if(r < 0) perror("Read error");
 
   concatanate_index("../duoram-online/preprocflags/party1_write_d", tmp, i); 	
   int const in2_w_ { open(tmp, O_RDONLY ) };
-		 r = read(in2_w_, d,  sizeof(d));	
+		 r = read(in2_w_, d,  sizeof(d));
+		 close(in2_w_);		
   if(r < 0) perror("Read error");
 	}
 

+ 80 - 78
preprocessing/preprocessing.cpp

@@ -68,20 +68,20 @@ int main(int argc, char * argv[])
     /* The function make_connections appears in network.h */
    make_connections(party, host1, host2,  io_context, socketsPb, socketsP2, ports,  ports2_1, ports2_0, number_of_sockets);
  
-      size_t RAM_needed_per_thread = 164 * db_nitems;
-      std::cout << "RAM needed = " << n_threads*RAM_needed_per_thread << " bytes = " << n_threads*RAM_needed_per_thread/1073741824 << " GiB" << std::endl;
-      std::cout << "RAM needed per thread = " << RAM_needed_per_thread << " bytes = " << (RAM_needed_per_thread>>30) << " GiB" << std::endl;
-      size_t thread_per_batch = std::floor(double(maxRAM<<30)/RAM_needed_per_thread);
-      if (thread_per_batch > n_threads) {
-	thread_per_batch = n_threads;
-      }
-      std::cout << "thread_per_batch = " << thread_per_batch << std::endl;
-      if (thread_per_batch < 1) {
-       std::cout << "You need more RAM" << std::endl;
-       exit(0);
-      }
-      size_t n_batches = std::ceil(double(n_threads)/thread_per_batch);
-      std::cout << "n_batches = " << n_batches << std::endl;
+   size_t RAM_needed_per_thread = 164 * db_nitems;
+   std::cout << "RAM needed = " << n_threads*RAM_needed_per_thread << " bytes = " << n_threads*RAM_needed_per_thread/1073741824 << " GiB" << std::endl;
+   std::cout << "RAM needed per thread = " << RAM_needed_per_thread << " bytes = " << (RAM_needed_per_thread>>30) << " GiB" << std::endl;
+   size_t thread_per_batch = std::floor(double(maxRAM<<30)/RAM_needed_per_thread);
+   if (thread_per_batch > n_threads) {
+     thread_per_batch = n_threads;
+   }
+   std::cout << "thread_per_batch = " << thread_per_batch << std::endl;
+   if (thread_per_batch < 1) {
+    std::cout << "You need more RAM" << std::endl;
+    exit(0);
+   }
+   size_t n_batches = std::ceil(double(n_threads)/thread_per_batch);
+   std::cout << "n_batches = " << n_batches << std::endl;
 
    uint8_t ** target_share_read = new uint8_t*[thread_per_batch];
 
@@ -104,61 +104,49 @@ int main(int argc, char * argv[])
    }
      
  
-   boost::asio::thread_pool pool_share_conversion(thread_per_batch);
+  boost::asio::thread_pool pool_share_conversion(thread_per_batch);
     
-
-    
-    // The following function call creates and evaluates DPFs at target_share_read[j] for j \in \{0, \ldots, n_threads}
-    // the flag vectors are stored in flags
-    // the leaves are stored in output
-    // the final correctionword is stored in final_correction_word
-
   dpfP2 * dpf_instance = (dpfP2 * ) malloc (sizeof(dpfP2) * n_threads);
 
 
   cw_construction computecw_array;
 
  
-     boost::asio::read(socketsP2[0], boost::asio::buffer(&computecw_array, sizeof(computecw_array)));
-     #ifdef VERBOSE
-      std::cout << "computecw_array.rand_b: " << computecw_array.rand_b[0] << " " << computecw_array.rand_b[1] << std::endl;
-     #endif
-
-      /* The function create_dpfs appears in dpf-gen.h*/
-      bool reading = true;
-
-      
-
-     size_t *thread_communication_costs = new size_t[thread_per_batch];
-     for(size_t iter = 0; iter < n_batches; ++iter)
-     { 
-        if (n_batches > 1) {
-          printf("Starting create_dpfs batch %lu / %lu\n", iter+1, n_batches);
-        }
-        boost::asio::thread_pool pool(thread_per_batch);
-        for(size_t j = 0; j < thread_per_batch; ++j)
-        {
-	  thread_communication_costs[j] = 0; 
-	  boost::asio::post(pool,
-	    std::bind(create_dpfs, reading,  db_nitems, std::ref(aeskey),
-		target_share_read[j], std::ref(socketsPb), std::ref(socketsP2),
-		0, db_nitems-1, output[j],  flags[j],
-		std::ref(final_correction_word[j]), computecw_array,
-		std::ref(dpf_instance), party, 5 * j, j,
-		std::ref(thread_communication_costs[j])));
-        }    
-        pool.join();
-        for(size_t j = 0; j < thread_per_batch; ++j) {
-	  communication_cost += thread_communication_costs[j];
-	}
-     }
-     delete[] thread_communication_costs;
-      
-     boost::asio::write(socketsP2[0], boost::asio::buffer(dpf_instance, n_threads * sizeof(dpfP2))); // do this in parallel.
-     communication_cost += (n_threads * sizeof(dpfP2));
+  boost::asio::read(socketsP2[0], boost::asio::buffer(&computecw_array, sizeof(computecw_array)));
  
-   #ifdef DEBUG
-
+  #ifdef VERBOSE
+  std::cout << "computecw_array.rand_b: " << computecw_array.rand_b[0] << " " << computecw_array.rand_b[1] << std::endl;
+  #endif
+
+    /* The function create_dpfs appears in dpf-gen.h*/
+  bool reading = true;
+  size_t *thread_communication_costs = new size_t[thread_per_batch];
+  for(size_t iter = 0; iter < n_batches; ++iter)
+  { 
+      if (n_batches > 1) {
+        printf("Starting create_dpfs batch %lu / %lu\n", iter+1, n_batches);
+      }
+      boost::asio::thread_pool pool(thread_per_batch);
+      for(size_t j = 0; j < thread_per_batch; ++j)
+      {
+    	  thread_communication_costs[j] = 0; 
+    	  boost::asio::post(pool,
+    	  std::bind(create_dpfs, reading,  db_nitems, std::ref(aeskey), target_share_read[j], std::ref(socketsPb), std::ref(socketsP2), 0, db_nitems-1, output[j],  flags[j],
+    		          std::ref(final_correction_word[j]), computecw_array, std::ref(dpf_instance), party, 5 * j, j, std::ref(thread_communication_costs[j])));
+      }    
+      pool.join();
+      for(size_t j = 0; j < thread_per_batch; ++j) {
+       communication_cost += thread_communication_costs[j];
+      }
+  }
+  
+  delete[] thread_communication_costs;
+      
+  boost::asio::write(socketsP2[0], boost::asio::buffer(dpf_instance, n_threads * sizeof(dpfP2))); // do this in parallel.
+  communication_cost += (n_threads * sizeof(dpfP2));
+  free(dpf_instance);
+   
+  #ifdef DEBUG
     for(size_t j = 0; j < n_threads; ++j)
     {
       std::cout << "n_threads = " << j << std::endl;
@@ -183,19 +171,20 @@ int main(int argc, char * argv[])
         final_correction_word_reconstruction = final_correction_word_reconstruction + final_correction_word[j][0];
         std::cout << "final_correction_word_reconstruction = " << final_correction_word_reconstruction << std::endl << std::endl;
      }
-    #endif
+  #endif
  
     /* 
      leaves is a additive shares of the outputs (leaves of the DPF)
      leafbits is the additive shares of flag bits of the DPFs
     */
-   int64_t ** leaves = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch);
-   int64_t ** leafbits  = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch); 
-   for(size_t j = 0; j < thread_per_batch; ++j)
-   {
-    leaves[j] = (int64_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(int64_t));
-    leafbits[j]  = (int64_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(int64_t));
-   }
+  int64_t ** leaves = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch);
+  int64_t ** leafbits  = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch); 
+ 
+  for(size_t j = 0; j < thread_per_batch; ++j)
+  {
+   leaves[j] = (int64_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(int64_t));
+   leafbits[j]  = (int64_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(int64_t));
+  }
 
 
 
@@ -203,23 +192,36 @@ int main(int argc, char * argv[])
    for(size_t j = 0; j < thread_per_batch; ++j)
    {
      boost::asio::post(pool_share_conversion,  std::bind(convert_shares, j, output, flags, n_threads, db_nitems, final_correction_word, 	leaves, leafbits, 
-                                                          std::ref(socketsPb), std::ref(socketsP2), party));	 	
+                                               std::ref(socketsPb), std::ref(socketsP2), party));	 	
    }
     
-    pool_share_conversion.join();
+   pool_share_conversion.join();
 
-    boost::asio::thread_pool pool_xor_to_additive(thread_per_batch); 
+   boost::asio::thread_pool pool_xor_to_additive(thread_per_batch); 
 
-    int64_t *additve_shares = new int64_t[thread_per_batch]; 
-    for(size_t j = 0; j < thread_per_batch; ++j)
-    {
+   int64_t *additve_shares = new int64_t[thread_per_batch]; 
+   
+   for(size_t j = 0; j < thread_per_batch; ++j)
+   {
      boost::asio::post(pool_xor_to_additive, std::bind(xor_to_additive, party, target_share_read[j], std::ref(socketsPb[j]), std::ref(socketsP2[j]), expo, std::ref(additve_shares[j])));
-    }
+   }
 
-    pool_xor_to_additive.join();
+   pool_xor_to_additive.join();
     
- 
+   for(size_t j = 0; j < thread_per_batch; ++j)
+   {
+    free(leaves[j]);
+    free(leafbits[j]);
+    free(output[j]);
+    free(flags[j]);
+    delete[] target_share_read[j];
+   }
     
+    free(leaves);
+    free(leafbits);
+    free(output);
+    free(flags);
+    delete[] target_share_read;
     /* For the artifact, don't actually write these in order to not use very
      * large amounts of storage
 

+ 1 - 1
preprocessing/share-conversion.h

@@ -315,7 +315,7 @@ void convert_shares(size_t i, __m128i ** output, int8_t ** flags, size_t n_threa
 		leaves[i][j]  = output[i][j][0];
 		flags_[j] = (flags[i][j] * pm) + (flags[i][j] * share_b_recv.PM) + (flags[i][j] * rb); 
 		flags_[j] += output[i][j][1]; 
-  flags_[j] -= (flags[i][j] * P2_shareconversion.FCWshare_reconstruction);		
+    flags_[j] -= (flags[i][j] * P2_shareconversion.FCWshare_reconstruction);		
 
 	
 		#ifdef DEBUG