1 year ago · f7bdf834c3
--- a/2p-preprocessing/preprocessing.cpp
+++ b/2p-preprocessing/preprocessing.cpp
@@ -56,7 +56,9 @@ size_t bits_per_leaf = std::is_same<leaf_t, bool>::value ? 1 : sizeof(leaf_t) *
 
															 bool is_packed = (sizeof(leaf_t) < sizeof(node_t));

														
 
															 size_t leaves_per_node = is_packed ? sizeof(node_t) * CHAR_BIT / bits_per_leaf : 1;

														
 
															-size_t input_bits(const size_t nitems) { return std::ceil(std::log2(nitems)); }

														
 
															+size_t input_bits(const size_t nitems) {

														
 
															+    return std::ceil(std::log2(nitems));

														
 
															+}

														
 
															 leaf_t val;

														
@@ -65,513 +67,515 @@ using namespace dpf;
 
															 #include "mpc.h"

														
 
															 void generate_random_targets(uint8_t **target_share_read, size_t n_threads, bool party, size_t expo)

														
 
															 {

														
 
															-	for (size_t i = 0; i < n_threads; i++)

														
 
															-	{

														
 
															-		target_share_read[i] = new uint8_t[64];

														
 
															-	}

														
 
															-

														
 
															-	for (size_t j = 0; j < 64; ++j)

														
 
															-	{

														
 
															-		for (size_t i = 0; i < n_threads; ++i)

														
 
															-		{

														
 
															-			uint8_t random_value;

														
 
															-			arc4random_buf(&random_value, sizeof(uint8_t));

														
 
															-			target_share_read[i][j] = random_value; // rand();

														
 
															-		}

														
 
															-	}

														
 
															+    for (size_t i = 0; i < n_threads; i++)

														
 
															+    {

														
 
															+        target_share_read[i] = new uint8_t[64];

														
 
															+    }

														
 
															+

														
 
															+    for (size_t j = 0; j < 64; ++j)

														
 
															+    {

														
 
															+        for (size_t i = 0; i < n_threads; ++i)

														
 
															+        {

														
 
															+            uint8_t random_value;

														
 
															+            arc4random_buf(&random_value, sizeof(uint8_t));

														
 
															+            target_share_read[i][j] = random_value; // rand();

														
 
															+        }

														
 
															+    }

														
 
															 }

														
 
															 void compute_CW(bool party, tcp::socket &sout, __m128i L, __m128i R, uint8_t bit, __m128i &CW)

														
 
															 {

														
 
															-	// struct cw_construction

														
 
															-	//{

														
 
															-	__m128i rand_b, gamma_b;

														
 
															-	uint8_t bit_b;

														
 
															-	//};

														
 
															-

														
 
															-	__m128i *X, *Y;

														
 
															-

														
 
															-	if (party)

														
 
															-	{

														
 
															-		std::string qfile = std::string("./gamma1");

														
 
															-		int qfd = open(qfile.c_str(), O_RDWR);

														
 
															-		X = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),

														
 
															-							PROT_READ, MAP_PRIVATE, qfd, 0);

														
 
															-

														
 
															-		qfile = std::string("./x1");

														
 
															-		qfd = open(qfile.c_str(), O_RDWR);

														
 
															-		Y = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),

														
 
															-							PROT_READ, MAP_PRIVATE, qfd, 0);

														
 
															-	}

														
 
															-

														
 
															-	if (!party)

														
 
															-	{

														
 
															-		std::string qfile = std::string("./gamma0");

														
 
															-		int qfd = open(qfile.c_str(), O_RDWR);

														
 
															-		X = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),

														
 
															-							PROT_READ, MAP_PRIVATE, qfd, 0);

														
 
															-

														
 
															-		qfile = std::string("./x0");

														
 
															-		qfd = open(qfile.c_str(), O_RDWR);

														
 
															-		Y = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),

														
 
															-							PROT_READ, MAP_PRIVATE, qfd, 0);

														
 
															-	}

														
 
															-

														
 
															-	// cw_construction computecw;

														
 
															-	//	read(sin, boost::asio::buffer(&computecw, sizeof(computecw)));

														
 
															-

														
 
															-	// computecw.rand_b;

														
 
															-	//__m128i gamma_b = computecw.gamma_b;

														
 
															-

														
 
															-	if (party)

														
 
															-	{

														
 
															-		rand_b = Y[0];	//_mm_set_epi32(0x6fef9434, 0x6768121e, 0x20942286, 0x1b59f7a7);

														
 
															-		gamma_b = X[0]; // _mm_set_epi32(0x6a499109 , 0x803067dd , 0xd1e2281b , 0xe71b6262);

														
 
															-		bit_b = 1;		// computecw.bit_b;

														
 
															-	}

														
 
															-	else

														
 
															-	{

														
 
															-		rand_b = Y[0];	// _mm_set_epi32(0xb29747df, 0xf7300f6d, 0x9476d971, 0xd5f75d98);

														
 
															-		gamma_b = X[0]; // _mm_set_epi32(0xb73142e2 , 0x10687aae , 0x06500d3ec , 0x29b5c85d);

														
 
															-		bit_b = 1;		// computecw.bit_b;

														
 
															-	}

														
 
															-

														
 
															-	uint8_t blinded_bit, blinded_bit_read;

														
 
															-	blinded_bit = bit ^ bit_b;

														
 
															-

														
 
															-	__m128i blinded_L = L ^ R ^ rand_b;

														
 
															-	__m128i blinded_L_read;

														
 
															-

														
 
															-	struct BlindsCW

														
 
															-	{

														
 
															-		__m128i blinded_message;

														
 
															-		uint8_t blinded_bit;

														
 
															-	};

														
 
															-

														
 
															-	BlindsCW blinds_sent, blinds_recv;

														
 
															-

														
 
															-	blinds_sent.blinded_bit = blinded_bit;

														
 
															-	blinds_sent.blinded_message = blinded_L;

														
 
															-

														
 
															-	boost::asio::write(sout, boost::asio::buffer(&blinds_sent, sizeof(blinds_sent)));

														
 
															-	boost::asio::read(sout, boost::asio::buffer(&blinds_recv, sizeof(blinds_recv)));

														
 
															- communication_cost += sizeof(blinds_recv);

														
 
															-	

														
 
															- blinded_bit_read = blinds_recv.blinded_bit;

														
 
															-	blinded_L_read = blinds_recv.blinded_message;

														
 
															-

														
 
															-	__m128i out_ = R ^ gamma_b; //_mm_setzero_si128;

														
 
															-

														
 
															-	if (bit)

														
 
															-	{

														
 
															-		out_ ^= (L ^ R ^ blinded_L_read);

														
 
															-	}

														
 
															-	if (blinded_bit_read)

														
 
															-	{

														
 
															-		out_ ^= rand_b;

														
 
															-	}

														
 
															-

														
 
															-	__m128i out_reconstruction;

														
 
															-	boost::asio::write(sout, boost::asio::buffer(&out_, sizeof(out_)));

														
 
															-	boost::asio::read(sout, boost::asio::buffer(&out_reconstruction, sizeof(out_reconstruction)));

														
 
															- communication_cost += sizeof(out_reconstruction);

														
 
															-	

														
 
															- out_reconstruction = out_ ^ out_reconstruction;

														
 
															-

														
 
															-	CW = out_reconstruction;

														
 
															-

														
 
															- #ifdef DEBUG

														
 
															-  uint8_t bit_reconstruction;

														
 
															-  boost::asio::write(sout, boost::asio::buffer(&bit, sizeof(bit)));

														
 
															-  boost::asio::read(sout, boost::asio::buffer(&bit_reconstruction, sizeof(bit_reconstruction)));

														
 
															-  bit_reconstruction = bit ^ bit_reconstruction;

														
 
															-

														
 
															-  __m128i L_reconstruction;

														
 
															-  boost::asio::write(sout, boost::asio::buffer(&L, sizeof(L)));

														
 
															-  boost::asio::read(sout, boost::asio::buffer(&L_reconstruction, sizeof(L_reconstruction)));

														
 
															-  L_reconstruction = L ^ L_reconstruction;

														
 
															-

														
 
															-  __m128i R_reconstruction;

														
 
															-  boost::asio::write(sout, boost::asio::buffer(&R, sizeof(R)));

														
 
															-  boost::asio::read(sout, boost::asio::buffer(&R_reconstruction, sizeof(R_reconstruction)));

														
 
															-  R_reconstruction = R ^ R_reconstruction;

														
 
															-

														
 
															-  __m128i CW_debug;

														
 
															-

														
 
															-  if (bit_reconstruction != 0)

														
 
															-  {

														
 
															-   CW_debug = L_reconstruction;

														
 
															-  }

														
 
															-  else

														
 
															-  {

														
 
															-   CW_debug = R_reconstruction;

														
 
															-  }

														
 
															-

														
 
															-  assert(CW_debug[0] == CW[0]);

														
 
															-  assert(CW_debug[1] == CW[1]);

														
 
															- #endif

														
 
															+    // struct cw_construction

														
 
															+    //{

														
 
															+    __m128i rand_b, gamma_b;

														
 
															+    uint8_t bit_b;

														
 
															+    //};

														
 
															+

														
 
															+    __m128i *X, *Y;

														
 
															+

														
 
															+    if (party)

														
 
															+    {

														
 
															+      std::string qfile = std::string("./gamma1");

														
 
															+      int qfd = open(qfile.c_str(), O_RDWR);

														
 
															+      X = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),

														
 
															+                          PROT_READ, MAP_PRIVATE, qfd, 0);

														
 
															+

														
 
															+      qfile = std::string("./x1");

														
 
															+      qfd = open(qfile.c_str(), O_RDWR);

														
 
															+      Y = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),

														
 
															+                          PROT_READ, MAP_PRIVATE, qfd, 0);

														
 
															+      close(qfd);

														
 
															+      munmap(X, 8 * sizeof(__m128i));

														
 
															+      munmap(Y, 8 * sizeof(__m128i));

														
 
															+    }

														
 
															+

														
 
															+    if (!party)

														
 
															+    {

														
 
															+      std::string qfile = std::string("./gamma0");

														
 
															+      int qfd = open(qfile.c_str(), O_RDWR);

														
 
															+      X = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),

														
 
															+                          PROT_READ, MAP_PRIVATE, qfd, 0);

														
 
															+

														
 
															+      qfile = std::string("./x0");

														
 
															+      qfd = open(qfile.c_str(), O_RDWR);

														
 
															+      Y = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),

														
 
															+                          PROT_READ, MAP_PRIVATE, qfd, 0);

														
 
															+      close(qfd);

														
 
															+      munmap(X, 8 * sizeof(__m128i));

														
 
															+      munmap(Y, 8 * sizeof(__m128i));

														
 
															+    }

														
 
															+

														
 
															+    // cw_construction computecw;

														
 
															+    //	read(sin, boost::asio::buffer(&computecw, sizeof(computecw)));

														
 
															+

														
 
															+    // computecw.rand_b;

														
 
															+    //__m128i gamma_b = computecw.gamma_b;

														
 
															+

														
 
															+    if (party)

														
 
															+    {

														
 
															+      rand_b = Y[0];	//_mm_set_epi32(0x6fef9434, 0x6768121e, 0x20942286, 0x1b59f7a7);

														
 
															+      gamma_b = X[0]; // _mm_set_epi32(0x6a499109 , 0x803067dd , 0xd1e2281b , 0xe71b6262);

														
 
															+      bit_b = 1;		// computecw.bit_b;

														
 
															+    }

														
 
															+    else

														
 
															+    {

														
 
															+      rand_b = Y[0];	// _mm_set_epi32(0xb29747df, 0xf7300f6d, 0x9476d971, 0xd5f75d98);

														
 
															+      gamma_b = X[0]; // _mm_set_epi32(0xb73142e2 , 0x10687aae , 0x06500d3ec , 0x29b5c85d);

														
 
															+      bit_b = 1;		// computecw.bit_b;

														
 
															+    }

														
 
															+

														
 
															+    uint8_t blinded_bit, blinded_bit_read;

														
 
															+    blinded_bit = bit ^ bit_b;

														
 
															+

														
 
															+    __m128i blinded_L = L ^ R ^ rand_b;

														
 
															+    __m128i blinded_L_read;

														
 
															+

														
 
															+    struct BlindsCW

														
 
															+    {

														
 
															+        __m128i blinded_message;

														
 
															+        uint8_t blinded_bit;

														
 
															+    };

														
 
															+

														
 
															+    BlindsCW blinds_sent, blinds_recv;

														
 
															+

														
 
															+    blinds_sent.blinded_bit = blinded_bit;

														
 
															+    blinds_sent.blinded_message = blinded_L;

														
 
															+

														
 
															+    boost::asio::write(sout, boost::asio::buffer(&blinds_sent, sizeof(blinds_sent)));

														
 
															+    boost::asio::read(sout, boost::asio::buffer(&blinds_recv, sizeof(blinds_recv)));

														
 
															+    communication_cost += sizeof(blinds_recv);

														
 
															+

														
 
															+    blinded_bit_read = blinds_recv.blinded_bit;

														
 
															+    blinded_L_read = blinds_recv.blinded_message;

														
 
															+

														
 
															+    __m128i out_ = R ^ gamma_b; //_mm_setzero_si128;

														
 
															+

														
 
															+    if (bit)

														
 
															+    {

														
 
															+        out_ ^= (L ^ R ^ blinded_L_read);

														
 
															+    }

														
 
															+    if (blinded_bit_read)

														
 
															+    {

														
 
															+        out_ ^= rand_b;

														
 
															+    }

														
 
															+

														
 
															+    __m128i out_reconstruction;

														
 
															+    boost::asio::write(sout, boost::asio::buffer(&out_, sizeof(out_)));

														
 
															+    boost::asio::read(sout, boost::asio::buffer(&out_reconstruction, sizeof(out_reconstruction)));

														
 
															+    communication_cost += sizeof(out_reconstruction);

														
 
															+

														
 
															+    out_reconstruction = out_ ^ out_reconstruction;

														
 
															+

														
 
															+    CW = out_reconstruction;

														
 
															+

														
 
															+		#ifdef DEBUG

														
 
															+		    uint8_t bit_reconstruction;

														
 
															+		    boost::asio::write(sout, boost::asio::buffer(&bit, sizeof(bit)));

														
 
															+		    boost::asio::read(sout, boost::asio::buffer(&bit_reconstruction, sizeof(bit_reconstruction)));

														
 
															+		    bit_reconstruction = bit ^ bit_reconstruction;

														
 
															+

														
 
															+		    __m128i L_reconstruction;

														
 
															+		    boost::asio::write(sout, boost::asio::buffer(&L, sizeof(L)));

														
 
															+		    boost::asio::read(sout, boost::asio::buffer(&L_reconstruction, sizeof(L_reconstruction)));

														
 
															+		    L_reconstruction = L ^ L_reconstruction;

														
 
															+

														
 
															+		    __m128i R_reconstruction;

														
 
															+		    boost::asio::write(sout, boost::asio::buffer(&R, sizeof(R)));

														
 
															+		    boost::asio::read(sout, boost::asio::buffer(&R_reconstruction, sizeof(R_reconstruction)));

														
 
															+		    R_reconstruction = R ^ R_reconstruction;

														
 
															+

														
 
															+		    __m128i CW_debug;

														
 
															+

														
 
															+		    if (bit_reconstruction != 0)

														
 
															+		    {

														
 
															+		        CW_debug = L_reconstruction;

														
 
															+		    }

														
 
															+		    else

														
 
															+		    {

														
 
															+		        CW_debug = R_reconstruction;

														
 
															+		    }

														
 
															+

														
 
															+		    assert(CW_debug[0] == CW[0]);

														
 
															+		    assert(CW_debug[1] == CW[1]);

														
 
															+		#endif

														
 
															 }

														
 
															 __m128i bit_mask_avx2_msb(unsigned int n)

														
 
															 {

														
 
															-	__m128i ones = _mm_set1_epi32(-1);

														
 
															-	__m128i cnst32_128 = _mm_set_epi32(32, 64, 96, 128);

														
 
															+    __m128i ones = _mm_set1_epi32(-1);

														
 
															+    __m128i cnst32_128 = _mm_set_epi32(32, 64, 96, 128);

														
 
															-	__m128i shift = _mm_set1_epi32(n);

														
 
															-	shift = _mm_subs_epu16(cnst32_128, shift);

														
 
															-	return _mm_sllv_epi32(ones, shift);

														
 
															+    __m128i shift = _mm_set1_epi32(n);

														
 
															+    shift = _mm_subs_epu16(cnst32_128, shift);

														
 
															+    return _mm_sllv_epi32(ones, shift);

														
 
															 }

														
 
															 __m128i bit_mask_avx2_lsb(unsigned int n)

														
 
															 {

														
 
															-	__m128i ones = _mm_set1_epi32(-1);

														
 
															-	__m128i cnst32_128 = _mm_set_epi32(128, 96, 64, 32);

														
 
															+    __m128i ones = _mm_set1_epi32(-1);

														
 
															+    __m128i cnst32_128 = _mm_set_epi32(128, 96, 64, 32);

														
 
															-	__m128i shift = _mm_set1_epi32(n);

														
 
															-	shift = _mm_subs_epu16(cnst32_128, shift);

														
 
															-	return _mm_srlv_epi32(ones, shift);

														
 
															+    __m128i shift = _mm_set1_epi32(n);

														
 
															+    shift = _mm_subs_epu16(cnst32_128, shift);

														
 
															+    return _mm_srlv_epi32(ones, shift);

														
 
															 }

														
 
															 template <typename node_t, typename prgkey_t>

														
 
															 static inline void traverse(const prgkey_t &prgkey, const node_t &seed, node_t s[2])

														
 
															 {

														
 
															-	dpf::PRG(prgkey, clear_lsb(seed, 0b11), s, 2);

														
 
															+    dpf::PRG(prgkey, clear_lsb(seed, 0b11), s, 2);

														
 
															 } // dpf::expand

														
 
															 inline void evalfull_mpc(const size_t &nodes_per_leaf, const size_t &depth, const size_t &nbits, const size_t &nodes_in_interval,

														
 
															-						 const AES_KEY &prgkey, uint8_t target_share[64], std::vector<socket_t> &socketsPb,

														
 
															-						 const size_t from, const size_t to, __m128i *output, int8_t *_t, __m128i &final_correction_word, bool party, size_t socket_no = 0)

														
 
															+                         const AES_KEY &prgkey, uint8_t target_share[64], std::vector<socket_t> &socketsPb,

														
 
															+                         const size_t from, const size_t to, __m128i *output, int8_t *_t, __m128i &final_correction_word, bool party, size_t socket_no = 0)

														
 
															 {

														
 
															-	__m128i root;

														
 
															-

														
 
															-	arc4random_buf(&root, sizeof(root));

														
 
															+    __m128i root;

														
 
															-	root = set_lsb(root, party);

														
 
															+    arc4random_buf(&root, sizeof(root));

														
 
															-	const size_t from_node = std::floor(static_cast<double>(from) / nodes_per_leaf);

														
 
															+    root = set_lsb(root, party);

														
 
															-	__m128i *s[2] = {

														
 
															-		reinterpret_cast<__m128i *>(output) + nodes_in_interval * (nodes_per_leaf - 1),

														
 
															-		s[0] + nodes_in_interval / 2};

														
 
															+    const size_t from_node = std::floor(static_cast<double>(from) / nodes_per_leaf);

														
 
															-	int8_t *t[2] = {_t, _t + nodes_in_interval / 2};

														
 
															+    __m128i *s[2] = {

														
 
															+        reinterpret_cast<__m128i *>(output) + nodes_in_interval * (nodes_per_leaf - 1),

														
 
															+        s[0] + nodes_in_interval / 2

														
 
															+    };

														
 
															-	int curlayer = depth % 2;

														
 
															+    int8_t *t[2] = {_t, _t + nodes_in_interval / 2};

														
 
															-	s[curlayer][0] = root;

														
 
															-	t[curlayer][0] = get_lsb(root, 0b01);

														
 
															+    int curlayer = depth % 2;

														
 
															-	__m128i *CW = (__m128i *)std::aligned_alloc(sizeof(__m256i), depth * sizeof(__m128i));

														
 
															+    s[curlayer][0] = root;

														
 
															+    t[curlayer][0] = get_lsb(root, 0b01);

														
 
															-	for (size_t layer = 0; layer < depth; ++layer)

														
 
															-	{

														
 
															-   #ifdef VERBOSE

														
 
															-		  printf("layer = %zu\n", layer);

														
 
															-   #endif

														
 
															-		 curlayer = 1 - curlayer;

														
 
															+    __m128i *CW = (__m128i *)std::aligned_alloc(sizeof(__m256i), depth * sizeof(__m128i));

														
 
															-		 size_t i = 0, j = 0;

														
 
															-		 auto nextbit = (from_node >> (nbits - layer - 1)) & 1;

														
 
															-		 size_t nodes_in_prev_layer = std::ceil(static_cast<double>(nodes_in_interval) / (1ULL << (depth - layer)));

														
 
															-		 size_t nodes_in_cur_layer = std::ceil(static_cast<double>(nodes_in_interval) / (1ULL << (depth - layer - 1)));

														
 
															+    for (size_t layer = 0; layer < depth; ++layer)

														
 
															+    {

														
 
															+#ifdef VERBOSE

														
 
															+        printf("layer = %zu\n", layer);

														
 
															+#endif

														
 
															+        curlayer = 1 - curlayer;

														
 
															+

														
 
															+        size_t i = 0, j = 0;

														
 
															+        auto nextbit = (from_node >> (nbits - layer - 1)) & 1;

														
 
															+        size_t nodes_in_prev_layer = std::ceil(static_cast<double>(nodes_in_interval) / (1ULL << (depth - layer)));

														
 
															+        size_t nodes_in_cur_layer = std::ceil(static_cast<double>(nodes_in_interval) / (1ULL << (depth - layer - 1)));

														
 
															+

														
 
															+        __m128i L = _mm_setzero_si128();

														
 
															+        __m128i R = _mm_setzero_si128();

														
 
															+

														
 
															+        for (i = nextbit, j = nextbit; j < nodes_in_prev_layer - 1; ++j, i += 2)

														
 
															+        {

														
 
															+            traverse(prgkey, s[1 - curlayer][j], &s[curlayer][i]);

														
 
															+            L ^= s[curlayer][i];

														
 
															+            R ^= s[curlayer][i + 1];

														
 
															+        }

														
 
															+

														
 
															+        if (nodes_in_prev_layer > j)

														
 
															+        {

														
 
															+            if (i < nodes_in_cur_layer - 1)

														
 
															+            {

														
 
															+                traverse(prgkey, s[1 - curlayer][j], &s[curlayer][i]);

														
 
															+                L ^= s[curlayer][i];

														
 
															+                R ^= s[curlayer][i + 1];

														
 
															+            }

														
 
															+        }

														
 
															+

														
 
															+        compute_CW(party, socketsPb[socket_no], L, R, target_share[layer], CW[layer]);

														
 
															+

														
 
															+        uint8_t advice_L = get_lsb(L) ^ target_share[layer];

														
 
															+        uint8_t advice_R = get_lsb(R) ^ target_share[layer];

														
 
															+

														
 
															+        uint8_t cwt_L, cwt_R;

														
 
															+        uint8_t advice[2];

														
 
															+        uint8_t cwts[2];

														
 
															+        advice[0] = advice_L;

														
 
															+        advice[1] = advice_R;

														
 
															+

														
 
															+        boost::asio::write(socketsPb[socket_no + 1], boost::asio::buffer(&advice, sizeof(advice)));

														
 
															+        boost::asio::read(socketsPb[socket_no + 1], boost::asio::buffer(&cwts, sizeof(cwts)));

														
 
															+

														
 
															+        cwt_L = cwts[0];

														
 
															+        cwt_R = cwts[1];

														
 
															+

														
 
															+        cwt_L = cwt_L ^ advice_L ^ 1;

														
 
															+        cwt_R = cwt_R ^ advice_R;

														
 
															+

														
 
															+        for (size_t j = 0; j < nodes_in_prev_layer; ++j)

														
 
															+        {

														
 
															+            t[curlayer][2 * j] = get_lsb(s[curlayer][2 * j]) ^ (cwt_L & t[1 - curlayer][j]);

														
 
															+            s[curlayer][2 * j] = clear_lsb(xor_if(s[curlayer][2 * j], CW[layer], !t[1 - curlayer][j]), 0b11);

														
 
															+            t[curlayer][(2 * j) + 1] = get_lsb(s[curlayer][(2 * j) + 1]) ^ (cwt_R & t[1 - curlayer][j]);

														
 
															+            s[curlayer][(2 * j) + 1] = clear_lsb(xor_if(s[curlayer][(2 * j) + 1], CW[layer], !t[1 - curlayer][j]), 0b11);

														
 
															+        }

														
 
															+    }

														
 
															-		 __m128i L = _mm_setzero_si128();

														
 
															-		 __m128i R = _mm_setzero_si128();

														
 
															+    free(CW);

														
 
															+    __m128i Gamma = _mm_setzero_si128();

														
 
															-   for (i = nextbit, j = nextbit; j < nodes_in_prev_layer - 1; ++j, i += 2)

														
 
															-   {

														
 
															-    traverse(prgkey, s[1 - curlayer][j], &s[curlayer][i]);

														
 
															-    L ^= s[curlayer][i];

														
 
															-    R ^= s[curlayer][i + 1];

														
 
															-   }

														
 
															+    for (size_t i = 0; i < to + 1; ++i)

														
 
															+    {

														
 
															+        Gamma[0] += output[i][0];

														
 
															+        Gamma[1] += output[i][1];

														
 
															+    }

														
 
															-   if (nodes_in_prev_layer > j)

														
 
															-   {

														
 
															-    if (i < nodes_in_cur_layer - 1)

														
 
															+    if (party)

														
 
															     {

														
 
															-     traverse(prgkey, s[1 - curlayer][j], &s[curlayer][i]);

														
 
															-     L ^= s[curlayer][i];

														
 
															-     R ^= s[curlayer][i + 1];

														
 
															+        Gamma[0] = -Gamma[0];

														
 
															+        Gamma[1] = -Gamma[1];

														
 
															     }

														
 
															-   }

														
 
															-		 compute_CW(party, socketsPb[socket_no], L, R, target_share[layer], CW[layer]);

														
 
															+    boost::asio::write(socketsPb[socket_no + 3], boost::asio::buffer(&Gamma, sizeof(Gamma)));

														
 
															+    boost::asio::read(socketsPb[socket_no + 3], boost::asio::buffer(&final_correction_word, sizeof(final_correction_word)));

														
 
															+    communication_cost += sizeof(Gamma);

														
 
															+    final_correction_word = Gamma; // final_correction_word + Gamma;

														
 
															-		 uint8_t advice_L = get_lsb(L) ^ target_share[layer];

														
 
															-		 uint8_t advice_R = get_lsb(R) ^ target_share[layer];

														
 
															+} // dpf::__evalinterval

														
 
															-		 uint8_t cwt_L, cwt_R;

														
 
															-		 uint8_t advice[2];

														
 
															-		 uint8_t cwts[2];

														
 
															-		 advice[0] = advice_L;

														
 
															-		 advice[1] = advice_R;

														
 
															+void convert_shares(__m128i **output, int8_t **flags, size_t n_threads, size_t db_nitems, __m128i *final_correction_word, tcp::socket &sb, bool party)

														
 
															+{

														
 
															-		 boost::asio::write(socketsPb[socket_no + 1], boost::asio::buffer(&advice, sizeof(advice)));

														
 
															-		 boost::asio::read(socketsPb[socket_no + 1], boost::asio::buffer(&cwts, sizeof(cwts)));

														
 
															+    for (size_t j = 0; j < db_nitems; ++j)

														
 
															+    {

														
 
															+        for (size_t k = 0; k < n_threads; ++k)

														
 
															+        {

														
 
															+            if (party)

														
 
															+            {

														
 
															+                output[k][j] = -output[k][j];

														
 
															+                flags[k][j] = -flags[k][j];

														
 
															+            }

														
 
															+        }

														
 
															+

														
 
															+#ifdef DEBUG

														
 
															+        int8_t out = flags[0][j];

														
 
															+        int8_t out_rec;

														
 
															+

														
 
															+        boost::asio::write(sb, boost::asio::buffer(&out, sizeof(out)));

														
 
															+        boost::asio::read(sb, boost::asio::buffer(&out_rec, sizeof(out_rec)));

														
 
															+        out_rec = out_rec + out;

														
 
															+

														
 
															+

														
 
															+        if (out_rec != 0)

														
 
															+            std::cout << j << "(flags) --> " << (int)out_rec << std::endl

														
 
															+                      << std::endl;

														
 
															+

														
 
															+        __m128i out2 = output[0][j];

														
 
															+        __m128i out_rec2;

														
 
															+

														
 
															+        boost::asio::write(sb, boost::asio::buffer(&out2, sizeof(out2)));

														
 
															+        boost::asio::read(sb, boost::asio::buffer(&out_rec2, sizeof(out_rec2)));

														
 
															+        out_rec2 = out_rec2 + out2;

														
 
															+        if (out_rec2[0] != 0)

														
 
															+            std::cout << j << "--> " << out_rec2[0] << std::endl;

														
 
															+#endif

														
 
															+    }

														
 
															-		 cwt_L = cwts[0];

														
 
															-		 cwt_R = cwts[1];

														
 
															+    for (size_t i = 0; i < n_threads; ++i)

														
 
															+    {

														
 
															-		 cwt_L = cwt_L ^ advice_L ^ 1;

														
 
															-		 cwt_R = cwt_R ^ advice_R;

														
 
															+        int64_t pm = 0;

														
 
															+        int64_t rb;

														
 
															+

														
 
															+        arc4random_buf(&rb, sizeof(rb));

														
 
															+        for (size_t j = 0; j < db_nitems; ++j)

														
 
															+        {

														
 
															+            if (party)

														
 
															+            {

														
 
															+                if (flags[i][j] != 0)

														
 
															+                    pm -= 1;

														
 
															+            }

														
 
															+            if (!party)

														
 
															+            {

														
 
															+                if (flags[i][j] != 0)

														
 
															+                    pm += 1; // flags[0][j];

														
 
															+            }

														
 
															+        }

														
 
															+    }

														
 
															+}

														
 
															-		 for (size_t j = 0; j < nodes_in_prev_layer; ++j)

														
 
															-		 {

														
 
															-			 t[curlayer][2 * j] = get_lsb(s[curlayer][2 * j]) ^ (cwt_L & t[1 - curlayer][j]);

														
 
															-			 s[curlayer][2 * j] = clear_lsb(xor_if(s[curlayer][2 * j], CW[layer], !t[1 - curlayer][j]), 0b11);

														
 
															-			 t[curlayer][(2 * j) + 1] = get_lsb(s[curlayer][(2 * j) + 1]) ^ (cwt_R & t[1 - curlayer][j]);

														
 
															-			 s[curlayer][(2 * j) + 1] = clear_lsb(xor_if(s[curlayer][(2 * j) + 1], CW[layer], !t[1 - curlayer][j]), 0b11);

														
 
															-		 }

														
 
															-	 }

														
 
															+void accept_conncections_from_Pb(boost::asio::io_context &io_context, std::vector<socket_t> &socketsPb, int port, size_t j)

														
 
															+{

														
 
															+    tcp::acceptor acceptor_a(io_context, tcp::endpoint(tcp::v4(), port));

														
 
															+    tcp::socket sb_a(acceptor_a.accept());

														
 
															+    socketsPb[j] = std::move(sb_a);

														
 
															+}

														
 
															-	__m128i Gamma = _mm_setzero_si128();

														
 
															+int main(int argc, char *argv[])

														
 
															+{

														
 
															-	for (size_t i = 0; i < to + 1; ++i)

														
 
															-	{

														
 
															-		Gamma[0] += output[i][0];

														
 
															-		Gamma[1] += output[i][1];

														
 
															-	}

														
 
															+    boost::asio::io_context io_context;

														
 
															+    tcp::resolver resolver(io_context);

														
 
															+    const std::string host1 = argv[1];

														
 
															-	if (party)

														
 
															-	{

														
 
															-		Gamma[0] = -Gamma[0];

														
 
															-		Gamma[1] = -Gamma[1];

														
 
															-	}

														
 
															-	boost::asio::write(socketsPb[socket_no + 3], boost::asio::buffer(&Gamma, sizeof(Gamma)));

														
 
															-	boost::asio::read(socketsPb[socket_no + 3], boost::asio::buffer(&final_correction_word, sizeof(final_correction_word)));

														
 
															- communication_cost += sizeof(Gamma);

														
 
															-	final_correction_word = Gamma; // final_correction_word + Gamma;

														
 
															+    const size_t n_threads = atoi(argv[2]);

														
 
															+    const size_t number_of_sockets = 5 * n_threads;

														
 
															+    const size_t expo = atoi(argv[3]);

														
 
															-} // dpf::__evalinterval

														
 
															+    const size_t maxRAM = atoi(argv[4]);

														
 
															-void convert_shares(__m128i **output, int8_t **flags, size_t n_threads, size_t db_nitems, __m128i *final_correction_word, tcp::socket &sb, bool party)

														
 
															-{

														
 
															+    const size_t db_nitems = 1ULL << expo;

														
 
															-	for (size_t j = 0; j < db_nitems; ++j)

														
 
															-	{

														
 
															-		for (size_t k = 0; k < n_threads; ++k)

														
 
															-		{

														
 
															-			if (party)

														
 
															-			{

														
 
															-				output[k][j] = -output[k][j];

														
 
															-				flags[k][j] = -flags[k][j];

														
 
															-			}

														
 
															-		}

														
 
															-

														
 
															-		 #ifdef DEBUG

														
 
															-		int8_t out = flags[0][j];

														
 
															-		int8_t out_rec;

														
 
															-

														
 
															-		boost::asio::write(sb, boost::asio::buffer(&out, sizeof(out)));

														
 
															-		boost::asio::read(sb, boost::asio::buffer(&out_rec, sizeof(out_rec)));

														
 
															-		out_rec = out_rec + out;

														
 
															-

														
 
															-		

														
 
															-		if (out_rec != 0)

														
 
															-			std::cout << j << "(flags) --> " << (int)out_rec << std::endl

														
 
															-					  << std::endl;

														
 
															-

														
 
															-		__m128i out2 = output[0][j];

														
 
															-		__m128i out_rec2;

														
 
															-

														
 
															-		boost::asio::write(sb, boost::asio::buffer(&out2, sizeof(out2)));

														
 
															-		boost::asio::read(sb, boost::asio::buffer(&out_rec2, sizeof(out_rec2)));

														
 
															-		out_rec2 = out_rec2 + out2;

														
 
															-		if (out_rec2[0] != 0)

														
 
															-			std::cout << j << "--> " << out_rec2[0] << std::endl;

														
 
															-		 #endif

														
 
															-	}

														
 
															-

														
 
															-	for (size_t i = 0; i < n_threads; ++i)

														
 
															-	{

														
 
															-

														
 
															-		int64_t pm = 0;

														
 
															-		int64_t rb;

														
 
															-

														
 
															-		arc4random_buf(&rb, sizeof(rb));

														
 
															-		for (size_t j = 0; j < db_nitems; ++j)

														
 
															-		{

														
 
															-			if (party)

														
 
															-			{

														
 
															-				if (flags[i][j] != 0)

														
 
															-					pm -= 1;

														
 
															-			}

														
 
															-			if (!party)

														
 
															-			{

														
 
															-				if (flags[i][j] != 0)

														
 
															-					pm += 1; // flags[0][j];

														
 
															-			}

														
 
															-		}

														
 
															-	}

														
 
															-}

														
 
															+    size_t RAM_needed_per_thread = 164 * db_nitems;

														
 
															+    std::cout << "RAM needed = " << n_threads*RAM_needed_per_thread << " bytes = " << n_threads*RAM_needed_per_thread/1073741824 << " GiB" << std::endl;

														
 
															+    std::cout << "RAM needed per thread = " << RAM_needed_per_thread << " bytes = " << (RAM_needed_per_thread>>30) << " GiB" << std::endl;

														
 
															+    size_t thread_per_batch = std::floor(double(maxRAM<<30)/RAM_needed_per_thread);

														
 
															+    if (thread_per_batch > n_threads) {

														
 
															+        thread_per_batch = n_threads;

														
 
															+    }

														
 
															+    std::cout << "thread_per_batch = " << thread_per_batch << std::endl;

														
 
															+    if (thread_per_batch < 1) {

														
 
															+        std::cout << "You need more RAM" << std::endl;

														
 
															+        exit(0);

														
 
															+    }

														
 
															+    size_t n_batches = std::ceil(double(n_threads)/thread_per_batch);

														
 
															+    std::cout << "n_batches = " << n_batches << std::endl;

														
 
															-void accept_conncections_from_Pb(boost::asio::io_context &io_context, std::vector<socket_t> &socketsPb, int port, size_t j)

														
 
															-{

														
 
															-	tcp::acceptor acceptor_a(io_context, tcp::endpoint(tcp::v4(), port));

														
 
															-	tcp::socket sb_a(acceptor_a.accept());

														
 
															-	socketsPb[j] = std::move(sb_a);

														
 
															-}

														
 
															+    std::vector<socket_t> socketsPb;

														
 
															+    for (size_t j = 0; j < number_of_sockets + 1; ++j)

														
 
															+    {

														
 
															+        tcp::socket emptysocket(io_context);

														
 
															+        socketsPb.emplace_back(std::move(emptysocket));

														
 
															+    }

														
 
															+    socketsPb.reserve(number_of_sockets + 1);

														
 
															-int main(int argc, char *argv[])

														
 
															-{

														
 
															-	boost::asio::io_context io_context;

														
 
															-	tcp::resolver resolver(io_context);

														
 
															-	const std::string host1 = argv[1];

														
 
															- 

														
 
															-

														
 
															-	const size_t n_threads = atoi(argv[2]);

														
 
															-	const size_t number_of_sockets = 5 * n_threads;

														
 
															-	const size_t expo = atoi(argv[3]);

														
 
															-

														
 
															-	const size_t maxRAM = atoi(argv[4]);

														
 
															-

														
 
															-	const size_t db_nitems = 1ULL << expo;

														
 
															-

														
 
															-      size_t RAM_needed_per_thread = 164 * db_nitems;

														
 
															-      std::cout << "RAM needed = " << n_threads*RAM_needed_per_thread << " bytes = " << n_threads*RAM_needed_per_thread/1073741824 << " GiB" << std::endl;

														
 
															-      std::cout << "RAM needed per thread = " << RAM_needed_per_thread << " bytes = " << (RAM_needed_per_thread>>30) << " GiB" << std::endl;

														
 
															-      size_t thread_per_batch = std::floor(double(maxRAM<<30)/RAM_needed_per_thread);

														
 
															-      if (thread_per_batch > n_threads) {

														
 
															-	thread_per_batch = n_threads;

														
 
															-      }

														
 
															-      std::cout << "thread_per_batch = " << thread_per_batch << std::endl;

														
 
															-      if (thread_per_batch < 1) {

														
 
															-       std::cout << "You need more RAM" << std::endl;

														
 
															-       exit(0);

														
 
															-      }

														
 
															-      size_t n_batches = std::ceil(double(n_threads)/thread_per_batch);

														
 
															-      std::cout << "n_batches = " << n_batches << std::endl;

														
 
															-

														
 
															-	std::vector<socket_t> socketsPb;

														
 
															-	for (size_t j = 0; j < number_of_sockets + 1; ++j)

														
 
															-	{

														
 
															-		tcp::socket emptysocket(io_context);

														
 
															-		socketsPb.emplace_back(std::move(emptysocket));

														
 
															-	}

														
 
															-	socketsPb.reserve(number_of_sockets + 1);

														
 
															-

														
 
															-

														
 
															-	std::vector<int> ports;

														
 
															-	for (size_t j = 0; j < number_of_sockets; ++j)

														
 
															-	{

														
 
															-		int port = 6000;

														
 
															-		ports.push_back(port + j);

														
 
															-	}

														
 
															-

														
 
															-	std::vector<int> ports2_0;

														
 
															-	for (size_t j = 0; j < number_of_sockets; ++j)

														
 
															-	{

														
 
															-		int port = 20000;

														
 
															-		ports2_0.push_back(port + j);

														
 
															-	}

														
 
															-

														
 
															-	std::vector<int> ports2_1;

														
 
															-	for (size_t j = 0; j < number_of_sockets; ++j)

														
 
															-	{

														
 
															-		int port = 40000;

														
 
															-		ports2_1.push_back(port + j);

														
 
															-	}

														
 
															-

														
 
															-bool party;

														
 
															+    std::vector<int> ports;

														
 
															+    for (size_t j = 0; j < number_of_sockets; ++j)

														
 
															+    {

														
 
															+        int port = 6000;

														
 
															+        ports.push_back(port + j);

														
 
															+    }

														
 
															+

														
 
															+    std::vector<int> ports2_0;

														
 
															+    for (size_t j = 0; j < number_of_sockets; ++j)

														
 
															+    {

														
 
															+        int port = 20000;

														
 
															+        ports2_0.push_back(port + j);

														
 
															+    }

														
 
															+

														
 
															+    std::vector<int> ports2_1;

														
 
															+    for (size_t j = 0; j < number_of_sockets; ++j)

														
 
															+    {

														
 
															+        int port = 40000;

														
 
															+        ports2_1.push_back(port + j);

														
 
															+    }

														
 
															+

														
 
															+    bool party;

														
 
															 #if (PARTY == 0)

														
 
															-	party = false; 

														
 
															-	for (size_t j = 0; j < number_of_sockets; ++j)

														
 
															-	{

														
 
															-		tcp::socket sb_a(io_context);

														
 
															-		boost::asio::connect(sb_a, resolver.resolve({host1, std::to_string(ports[j])}));

														
 
															-		socketsPb[j] = std::move(sb_a);

														
 
															-	}

														
 
															+    party = false;

														
 
															+    for (size_t j = 0; j < number_of_sockets; ++j)

														
 
															+    {

														
 
															+        tcp::socket sb_a(io_context);

														
 
															+        boost::asio::connect(sb_a, resolver.resolve({host1, std::to_string(ports[j])}));

														
 
															+        socketsPb[j] = std::move(sb_a);

														
 
															+    }

														
 
															 #else

														
 
															-	party = true;	

														
 
															-	boost::asio::thread_pool pool2(number_of_sockets);

														
 
															-	for (size_t j = 0; j < number_of_sockets; ++j)

														
 
															-	{

														
 
															-		boost::asio::post(pool2, std::bind(accept_conncections_from_Pb, std::ref(io_context), std::ref(socketsPb), ports[j], j));

														
 
															-	}

														
 
															-

														
 
															-	pool2.join();

														
 
															+    party = true;

														
 
															+    boost::asio::thread_pool pool2(number_of_sockets);

														
 
															+    for (size_t j = 0; j < number_of_sockets; ++j)

														
 
															+    {

														
 
															+        boost::asio::post(pool2, std::bind(accept_conncections_from_Pb, std::ref(io_context), std::ref(socketsPb), ports[j], j));

														
 
															+    }

														
 
															+

														
 
															+    pool2.join();

														
 
															 #endif

														
 
															- 

														
 
															-	__m128i *final_correction_word = (__m128i *)std::aligned_alloc(sizeof(__m256i), thread_per_batch * sizeof(__m128i));

														
 
															-	AES_KEY aeskey;

														
 
															+    __m128i *final_correction_word = (__m128i *)std::aligned_alloc(sizeof(__m256i), thread_per_batch * sizeof(__m128i));

														
 
															-	__m128i **output = (__m128i **)malloc(sizeof(__m128i *) * thread_per_batch);

														
 
															-	int8_t **flags = (int8_t **)malloc(sizeof(uint8_t *) * thread_per_batch);

														
 
															+    AES_KEY aeskey;

														
 
															-	for (size_t j = 0; j < thread_per_batch; ++j)

														
 
															-	{

														
 
															-		output[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));

														
 
															-		flags[j] = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));

														
 
															-	}

														
 
															+    __m128i **output = (__m128i **)malloc(sizeof(__m128i *) * thread_per_batch);

														
 
															+    int8_t **flags = (int8_t **)malloc(sizeof(uint8_t *) * thread_per_batch);

														
 
															-	const size_t bits_per_leaf = std::is_same<leaf_t, bool>::value ? 1 : sizeof(leaf_t) * CHAR_BIT;

														
 
															-	const bool is_packed = (sizeof(leaf_t) < sizeof(node_t));

														
 
															-	const size_t nodes_per_leaf = is_packed ? 1 : std::ceil(static_cast<double>(bits_per_leaf) / (sizeof(node_t) * CHAR_BIT));

														
 
															-	const size_t depth = std::ceil(std::log2(db_nitems));

														
 
															-	const size_t nbits = std::ceil(std::log2(db_nitems));

														
 
															-	const size_t nodes_in_interval = db_nitems - 1;

														
 
															-	auto start = std::chrono::steady_clock::now();

														
 
															+    for (size_t j = 0; j < thread_per_batch; ++j)

														
 
															+    {

														
 
															+        output[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));

														
 
															+        flags[j] = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));

														
 
															+    }

														
 
															+

														
 
															+    const size_t bits_per_leaf = std::is_same<leaf_t, bool>::value ? 1 : sizeof(leaf_t) * CHAR_BIT;

														
 
															+    const bool is_packed = (sizeof(leaf_t) < sizeof(node_t));

														
 
															+    const size_t nodes_per_leaf = is_packed ? 1 : std::ceil(static_cast<double>(bits_per_leaf) / (sizeof(node_t) * CHAR_BIT));

														
 
															+    const size_t depth = std::ceil(std::log2(db_nitems));

														
 
															+    const size_t nbits = std::ceil(std::log2(db_nitems));

														
 
															+    const size_t nodes_in_interval = db_nitems - 1;

														
 
															+    auto start = std::chrono::steady_clock::now();

														
 
															 #ifdef VERBOSE

														
 
															-		printf("n_threads = %zu\n\n", n_threads);

														
 
															+    printf("n_threads = %zu\n\n", n_threads);

														
 
															 #endif

														
 
															- 

														
 
															- for(size_t iters = 0; iters < n_batches; ++iters)

														
 
															-{

														
 
															-   if (n_batches > 1) {

														
 
															-    printf("Starting evalfull_mpc batch %lu / %lu\n", iters+1, n_batches);

														
 
															-   }

														
 
															-   uint8_t **target_share_read = new uint8_t *[thread_per_batch];

														
 
															-   generate_random_targets(target_share_read, thread_per_batch, party, expo);

														
 
															-   boost::asio::thread_pool pool(thread_per_batch);

														
 
															-   for (size_t j = 0; j < thread_per_batch; ++j)

														
 
															-   {

														
 
															-    boost::asio::post(pool, std::bind(evalfull_mpc, std::ref(nodes_per_leaf), std::ref(depth), std::ref(nbits), std::ref(nodes_in_interval),

														
 
															-              std::ref(aeskey), target_share_read[j], std::ref(socketsPb), 0, db_nitems - 1, output[j],

														
 
															-              flags[j], std::ref(final_correction_word[j]), party, 5 * j));

														
 
															-   }

														
 
															-

														
 
															-   pool.join();

														
 
															-

														
 
															-

														
 
															-   convert_shares(output, flags, thread_per_batch, db_nitems, final_correction_word, socketsPb[0], party);

														
 
															-}

														
 
															- auto end = std::chrono::steady_clock::now();

														
 
															-	std::chrono::duration<double> elapsed_seconds = end - start;

														
 
															-	std::cout << "WallClockTime: " << elapsed_seconds.count() << " s" << std::endl;

														
 
															- std::cout << "CommunicationCost: " << communication_cost << " bytes" << std::endl;

														
 
															- 

														
 
															-

														
 
															-	if(!party)

														
 
															-	{

														
 
															-		char const *p0_filename0;

														
 
															-		p0_filename0 = "../duoram-online/preprocflags/party0_read_flags_b";

														
 
															-		int w0 = open(p0_filename0, O_WRONLY | O_CREAT, S_IWRITE | S_IREAD);

														
 
															-		int written = write(w0, flags[0], db_nitems * sizeof(flags[0][0]));

														
 
															-  if (written < 0) perror("Write error");

														
 
															-		close(w0);

														
 
															-	}

														
 
															-	else

														
 
															-	{

														
 
															-		char const *p0_filename0;

														
 
															-		p0_filename0 = "../duoram-online/preprocflags/party1_read_flags_b";

														
 
															-		int w0 = open(p0_filename0, O_WRONLY | O_CREAT, S_IWRITE | S_IREAD);

														
 
															-		int written = write(w0, flags[0], db_nitems * sizeof(flags[0][0]));

														
 
															-		if (written < 0) perror("Write error"); 

														
 
															-		close(w0);

														
 
															-	}

														
 
															-

														
 
															-	return 0;

														
 
															+

														
 
															+    for(size_t iters = 0; iters < n_batches; ++iters)

														
 
															+    {

														
 
															+        if (n_batches > 1) {

														
 
															+            printf("Starting evalfull_mpc batch %lu / %lu\n", iters+1, n_batches);

														
 
															+        }

														
 
															+        uint8_t **target_share_read = new uint8_t *[thread_per_batch];

														
 
															+        generate_random_targets(target_share_read, thread_per_batch, party, expo);

														
 
															+        boost::asio::thread_pool pool(thread_per_batch);

														
 
															+        for (size_t j = 0; j < thread_per_batch; ++j)

														
 
															+        {

														
 
															+            boost::asio::post(pool, std::bind(evalfull_mpc, std::ref(nodes_per_leaf), std::ref(depth), std::ref(nbits), std::ref(nodes_in_interval),

														
 
															+                                              std::ref(aeskey), target_share_read[j], std::ref(socketsPb), 0, db_nitems - 1, output[j],

														
 
															+                                              flags[j], std::ref(final_correction_word[j]), party, 5 * j));

														
 
															+        }

														
 
															+

														
 
															+        pool.join();

														
 
															+        for(size_t j = 0; j < thread_per_batch; ++j)

														
 
															+        {

														
 
															+            delete[] target_share_read[j];

														
 
															+        }

														
 
															+        delete[] target_share_read;

														
 
															+        convert_shares(output, flags, thread_per_batch, db_nitems, final_correction_word, socketsPb[0], party);

														
 
															+    }

														
 
															+

														
 
															+    for(size_t j = 0; j < thread_per_batch; ++j)

														
 
															+    {

														
 
															+

														
 
															+        free(output[j]);

														
 
															+        free(flags[j]);

														
 
															+    }

														
 
															+    free(output);

														
 
															+    free(flags);

														
 
															+    free(final_correction_word);

														
 
															+

														
 
															+    auto end = std::chrono::steady_clock::now();

														
 
															+    std::chrono::duration<double> elapsed_seconds = end - start;

														
 
															+    std::cout << "WallClockTime: " << elapsed_seconds.count() << " s" << std::endl;

														
 
															+    std::cout << "CommunicationCost: " << communication_cost << " bytes" << std::endl;

														
 
															+    

														
 
															+    return 0;

														
 
															 }

														
--- a/duoram-online/duoram.cpp
+++ b/duoram-online/duoram.cpp
@@ -315,6 +315,7 @@ int main(const int argc, const char * argv[])
 
															     #endif

														
 
															     delete[] WritePb_;

														
 
															     delete[] WritePb_recv;

														
 
															+    delete[] where_to_write;

														
 
															     for(size_t w = 0; w < number_of_writes; ++w)

														
 
															     {			

														
@@ -354,7 +355,7 @@ int main(const int argc, const char * argv[])
 
															     for(size_t r = 0; r < number_of_ind_reads; ++r) WritePb_ind_reads[r] = where_to_read_independent[r] -ri;

														
 
															-

														
 
															+    delete[] where_to_read_independent;

														
 
															     boost::asio::write(sockets_2[3], boost::asio::buffer(WritePb_ind_reads, number_of_ind_reads * sizeof(size_t)));

														
 
															     boost::asio::write(sockets_[3], boost::asio::buffer(WritePb_ind_reads, number_of_ind_reads * sizeof(size_t)));

														
 
															     boost::asio::read(sockets_[3], boost::asio::buffer(WritePb_ind_reads_recv, number_of_ind_reads * sizeof(size_t)));

														
@@ -381,7 +382,10 @@ int main(const int argc, const char * argv[])
 
															       std::cout << "---> [duoram independent reads] " <<  print_reconstruction(sockets_[0], read_out_independent_reads[r]) << std::endl;

														
 
															       #endif

														
 
															     }

														
 
															-

														
 
															+    delete[] rotate;

														
 
															+    delete[] Gamma_reads;

														
 
															+    delete[] WritePb_ind_reads_recv;

														
 
															+    delete[] WritePb_ind_reads;

														
 
															     auto end_ind_reads = std::chrono::steady_clock::now();

														
 
															     std::chrono::duration<double> elapsed_seconds_ind_reads = end_ind_reads - start_ind_reads;

														
 
															     //printf("elapsed_seconds_ind_reads = %f\n",elapsed_seconds_ind_reads.count());

														
@@ -406,7 +410,7 @@ int main(const int argc, const char * argv[])
 
															        std::cout << print_reconstruction(sockets_[0], read_out_dependent_reads[r]) << std::endl;

														
 
															       #endif

														
 
															     }

														
 
															-    

														
 
															+    delete[] where_to_read_dependent;

														
 
															     auto end_dep_reads = std::chrono::steady_clock::now();

														
 
															     std::chrono::duration<double> elapsed_seconds_dep_reads = end_dep_reads - start_dep_reads;

														
 
															     dependent_read_time = elapsed_seconds_dep_reads.count();

														
@@ -416,6 +420,29 @@ int main(const int argc, const char * argv[])
 
															      std::cout << std::endl << std::endl << "============== DEPENDENT READS END  ==============" << std::endl << std::endl;

														
 
															     #endif

														
 
															    #endif

														
 
															+    

														
 
															+    free(reading_b);

														
 
															+    free(reading_c);

														
 
															+    free(reading_d);

														
 
															+    free(writing_b);

														
 
															+    free(writing_c);

														
 
															+    free(writing_d);

														
 
															+    free(reading_temp);

														
 
															+    free(DB);

														
 
															+    free(updated_DB);

														
 
															+    free(blinded_DB);

														
 
															+    free(blinded_DB_recv);

														
 
															+    free(updated_blinded_DB_recv);

														
 
															+    free(b);

														
 
															+    free(c);

														
 
															+    free(d);

														
 
															+    free(blinds);

														
 
															+    free(updated_blinds);

														
 
															+    

														
 
															+    #ifdef ThreeParty

														
 
															+     delete[] read_out;

														
 
															+     delete[] Gamma;

														
 
															+    #endif

														
 
															   }  

														
 
															 auto end_total = std::chrono::steady_clock::now();

														
--- a/duoram-online/readvectors.h
+++ b/duoram-online/readvectors.h
@@ -13,6 +13,7 @@ int read_final_correction_word(bool party, DB_t& FCW_read, int i = 0)
 
															    concatanate_index("../duoram-online/preprocflags/FCW0", tmp, i);

														
 
															 			int const in0 { open(tmp, O_RDONLY ) };

														
 
															 	 	size_t r = read(in0, &FCW_read,   sizeof(FCW_read));	

														
 
															+	 	close(in0);

														
 
															    if(r < 0) perror("Read error");

														
 
															 		}

														
@@ -21,7 +22,8 @@ int read_final_correction_word(bool party, DB_t& FCW_read, int i = 0)
 
															    char tmp[100];

														
 
															    concatanate_index("../duoram-online/preprocflags/FCW1", tmp, i);

														
 
															 		 int const in0 { open(tmp, O_RDONLY ) };

														
 
															-	 	size_t r = read(in0, &FCW_read,   sizeof(FCW_read));	

														
 
															+	 	size_t r = read(in0, &FCW_read,   sizeof(FCW_read));

														
 
															+	 		close(in0);	

														
 
															  		if(r < 0) perror("Read error");

														
 
															 		}

														
@@ -35,7 +37,8 @@ int read_rand_indx(bool party, DB_t& R, int i = 0)
 
															    char tmp[100];

														
 
															    concatanate_index("../duoram-online/preprocflags/R0", tmp, i);

														
 
															 			int const in0 { open(tmp, O_RDONLY ) };

														
 
															-	 	size_t r = read(in0, &R,   sizeof(R));	

														
 
															+	 	size_t r = read(in0, &R,   sizeof(R));

														
 
															+	 	close(in0);	

														
 
															    if(r < 0) perror("Read error");

														
 
															 		}

														
@@ -45,6 +48,7 @@ int read_rand_indx(bool party, DB_t& R, int i = 0)
 
															    concatanate_index("../duoram-online/preprocflags/R1", tmp, i);

														
 
															 		 int const in0 { open(tmp, O_RDONLY ) };

														
 
															 	 	size_t r = read(in0, &R,   sizeof(R));	

														
 
															+	 	close(in0);

														
 
															  		if(r < 0) perror("Read error");

														
 
															 		}

														
@@ -59,16 +63,19 @@ int read_flags_for_reading(bool party, size_t db_nitems, int i = 0)
 
															    concatanate_index("../duoram-online/preprocflags/party0_read_flags_b", tmp, i);

														
 
															 	 	int const in0 { open(tmp, O_RDONLY ) };

														
 
															 	 	size_t r = read(in0, reading_b,  sizeof(reading_b));	

														
 
															+	 	close(in0);

														
 
															    if(r < 0) perror("Read error");

														
 
															    concatanate_index("../duoram-online/preprocflags/party0_read_flags_c", tmp, i);

														
 
															    int const in1 { open( tmp, O_RDONLY ) };

														
 
															 	 	r = read(in1, reading_c,  sizeof(reading_c));

														
 
															+	 	close(in1);

														
 
															    if(r < 0) perror("Read error");

														
 
															    concatanate_index("../duoram-online/preprocflags/party0_read_flags_d", tmp, i);

														
 
															 	  int const in2 { open( tmp, O_RDONLY ) };

														
 
															-	  r = read(in2, reading_d,  sizeof(reading_d));	

														
 
															+	  r = read(in2, reading_d,  sizeof(reading_d));

														
 
															+	  close(in2);	

														
 
															    if(r < 0) perror("Read error");

														
 
															 	}

														
@@ -79,16 +86,19 @@ int read_flags_for_reading(bool party, size_t db_nitems, int i = 0)
 
															    concatanate_index("../duoram-online/preprocflags/party1_read_flags_b", tmp, i);

														
 
															 		 int const in0 { open(tmp, O_RDONLY ) };

														
 
															 	 	size_t r = read(in0, reading_b,  sizeof(reading_b));	

														
 
															+	 	close(in0);

														
 
															 	 	if(r < 0) perror("Read error");

														
 
															    concatanate_index("../duoram-online/preprocflags/party1_read_flags_c", tmp, i);

														
 
															  	 int const in1 { open(tmp, O_RDONLY ) };

														
 
															 		 r = read(in1, reading_c,  sizeof(reading_c));

														
 
															+		 close(in1);

														
 
															 		 if(r < 0) perror("Read error");

														
 
															    concatanate_index("../duoram-online/preprocflags/party1_read_flags_d", tmp, i);

														
 
															 		 int const in2 { open(tmp, O_RDONLY ) };

														
 
															 		 r = read(in2, reading_d,  sizeof(reading_d));	

														
 
															+		 close(in2);

														
 
															    if(r < 0) perror("Read error");

														
 
															 }

														
@@ -103,17 +113,20 @@ int read_flags_for_writing(bool party, size_t db_nitems, int i = 0)
 
															     char tmp[100];

														
 
															     concatanate_index("../duoram-online/preprocflags/party0_write_flags_b", tmp, i);

														
 
															 	   int const in0_w { open(tmp, O_RDONLY ) };

														
 
															-    size_t r = read(in0_w, writing_b,  sizeof(writing_b));	

														
 
															+    size_t r = read(in0_w, writing_b,  sizeof(writing_b));

														
 
															+    close(in0_w);	

														
 
															     if(r < 0) perror("Read error");

														
 
															     concatanate_index("../duoram-online/preprocflags/party0_write_flags_c", tmp,i);	   

														
 
															 	   int const in1_w { open( tmp, O_RDONLY ) };

														
 
															     r = read(in1_w, writing_c,  sizeof(writing_c));

														
 
															+    close(in1_w);

														
 
															     if(r < 0) perror("Read error");

														
 
															     concatanate_index("../duoram-online/preprocflags/party0_write_flags_d", tmp,i);

														
 
															 	   int const in2_w { open( tmp, O_RDONLY ) };

														
 
															    	r = read(in2_w, writing_d,  sizeof(writing_d));	

														
 
															+   	close(in2_w);

														
 
															     if(r < 0) perror("Read error");

														
 
															 	}

														
@@ -123,16 +136,19 @@ int read_flags_for_writing(bool party, size_t db_nitems, int i = 0)
 
															    concatanate_index("../duoram-online/preprocflags/party1_write_flags_b", tmp,i);

														
 
															  		int const in0_w { open( tmp, O_RDONLY ) };

														
 
															 	 	size_t r = read(in0_w, writing_b,  sizeof(writing_b));	

														
 
															+	 	close(in0_w);

														
 
															 	 	if(r < 0) perror("Read error");

														
 
															    concatanate_index("../duoram-online/preprocflags/party1_write_flags_c", tmp, i);

														
 
															    int const in1_w { open(tmp, O_RDONLY ) };

														
 
															 		 r = read(in1_w, writing_c,  sizeof(writing_c));

														
 
															+		 close(in1_w);

														
 
															    if(r < 0) perror("Read error");

														
 
															 		 concatanate_index("../duoram-online/preprocflags/party1_write_flags_d", tmp, i);

														
 
															 		 int const in2_w { open( tmp, O_RDONLY ) };

														
 
															 		 r = read(in2_w, writing_d,  sizeof(writing_d));	

														
 
															+		 close(in2_w);

														
 
															 	  if(r < 0) perror("Read error");

														
 
															 	}

														
@@ -145,21 +161,25 @@ int read_flags_for_writing(bool party, size_t db_nitems, int i = 0)
 
															    concatanate_index("../duoram-online/preprocflags/P2_party0_write_flags_c", tmp, i);

														
 
															    int const in1_w { open(tmp, O_RDONLY ) };

														
 
															    size_t r = read(in1_w, writing_c,  sizeof(writing_c));

														
 
															+    close(in1_w);

														
 
															    if(r < 0) perror("Read error");

														
 
															    concatanate_index("../duoram-online/preprocflags/P2_party1_write_flags_d", tmp, i);

														
 
															    int const in2_w { open(tmp, O_RDONLY ) };

														
 
															    r = read(in2_w, writing_d,  sizeof(writing_d)); 

														
 
															+   close(in2_w);

														
 
															    if(r < 0) perror("Read error");

														
 
															    concatanate_index("../duoram-online/preprocflags/P2_party0_write_c", tmp, i);

														
 
															    int const in1_w_ { open(tmp, O_RDONLY ) };

														
 
															    r = read(in1_w_, c,  sizeof(c));

														
 
															+   close(in1_w_);

														
 
															    if(r < 0) perror("Read error");

														
 
															    concatanate_index("../duoram-online/preprocflags/P2_party1_write_d", tmp, i);

														
 
															    int const in2_w_ { open(tmp, O_RDONLY ) };

														
 
															    r = read(in2_w_, d,  sizeof(d));  

														
 
															+   close(in2_w_);

														
 
															    if(r < 0) perror("Read error");

														
 
															    return 0;

														
@@ -171,11 +191,13 @@ int read_flags_for_generating_cancellation_terms(size_t db_nitems, int i = 0)
 
															   concatanate_index("../duoram-online/preprocflags/P2_party1_read_flags_d", tmp, i);

														
 
															   int const in2 { open(tmp, O_RDONLY ) };

														
 
															   size_t r = read(in2, reading_d,  sizeof(reading_d)); 

														
 
															+  close(in2);

														
 
															   if(r < 0) perror("Read error");

														
 
															   concatanate_index("../duoram-online/preprocflags/P2_party0_read_flags_c", tmp, i);

														
 
															   int const in2_ { open(tmp, O_RDONLY ) };

														
 
															   r = read(in2_, reading_c,  sizeof(reading_c)); 

														
 
															+  close(in2_);	

														
 
															   if(r < 0) perror("Read error");

														
 
															   return 0;

														
@@ -190,16 +212,19 @@ int read_flags_for_updating(bool party, size_t db_nitems, int i = 0)
 
															   concatanate_index("../duoram-online/preprocflags/party0_write_b", tmp, i);

														
 
															 		int const in0_w_ { open(tmp, O_RDONLY ) };

														
 
															 		size_t r = read(in0_w_, b,  sizeof(b));	

														
 
															+		close(in0_w_);	

														
 
															 	 if(r < 0) perror("Read error");

														
 
															   concatanate_index("../duoram-online/preprocflags/party0_write_c", tmp, i);

														
 
															   int const in1_w_ { open(tmp, O_RDONLY ) };

														
 
															 		r = read(in1_w_, c,  sizeof(c));

														
 
															+		close(in1_w_);	

														
 
															   if(r < 0) perror("Read error");

														
 
															   concatanate_index("../duoram-online/preprocflags/party0_write_d", tmp, i); 

														
 
															   int const in2_w_ { open(tmp, O_RDONLY ) };

														
 
															 	 r = read(in2_w_, d,  sizeof(d));	

														
 
															+	 close(in2_w_);	

														
 
															   if(r < 0) perror("Read error");

														
 
															 	}

														
@@ -207,17 +232,20 @@ int read_flags_for_updating(bool party, size_t db_nitems, int i = 0)
 
															 	{

														
 
															   concatanate_index("../duoram-online/preprocflags/party1_write_b", tmp, i);

														
 
															  	int const in0_w_ { open( tmp, O_RDONLY ) };

														
 
															-	 size_t r = read(in0_w_, b,  sizeof(b));	

														
 
															+	 size_t r = read(in0_w_, b,  sizeof(b));

														
 
															+	 close(in0_w_);	

														
 
															   if(r < 0) perror("Read error");

														
 
															   concatanate_index("../duoram-online/preprocflags/party1_write_c", tmp, i);

														
 
															   int const in1_w_ { open(tmp, O_RDONLY ) };

														
 
															 		r = read(in1_w_, c,  sizeof(c));

														
 
															+	 close(in1_w_);	

														
 
															   if(r < 0) perror("Read error");

														
 
															   concatanate_index("../duoram-online/preprocflags/party1_write_d", tmp, i); 	

														
 
															   int const in2_w_ { open(tmp, O_RDONLY ) };

														
 
															-		 r = read(in2_w_, d,  sizeof(d));	

														
 
															+		 r = read(in2_w_, d,  sizeof(d));

														
 
															+		 close(in2_w_);		

														
 
															   if(r < 0) perror("Read error");

														
 
															 	}

														
--- a/preprocessing/preprocessing.cpp
+++ b/preprocessing/preprocessing.cpp
@@ -68,20 +68,20 @@ int main(int argc, char * argv[])
 
															     /* The function make_connections appears in network.h */

														
 
															    make_connections(party, host1, host2,  io_context, socketsPb, socketsP2, ports,  ports2_1, ports2_0, number_of_sockets);

														
 
															-      size_t RAM_needed_per_thread = 164 * db_nitems;

														
 
															-      std::cout << "RAM needed = " << n_threads*RAM_needed_per_thread << " bytes = " << n_threads*RAM_needed_per_thread/1073741824 << " GiB" << std::endl;

														
 
															-      std::cout << "RAM needed per thread = " << RAM_needed_per_thread << " bytes = " << (RAM_needed_per_thread>>30) << " GiB" << std::endl;

														
 
															-      size_t thread_per_batch = std::floor(double(maxRAM<<30)/RAM_needed_per_thread);

														
 
															-      if (thread_per_batch > n_threads) {

														
 
															-	thread_per_batch = n_threads;

														
 
															-      }

														
 
															-      std::cout << "thread_per_batch = " << thread_per_batch << std::endl;

														
 
															-      if (thread_per_batch < 1) {

														
 
															-       std::cout << "You need more RAM" << std::endl;

														
 
															-       exit(0);

														
 
															-      }

														
 
															-      size_t n_batches = std::ceil(double(n_threads)/thread_per_batch);

														
 
															-      std::cout << "n_batches = " << n_batches << std::endl;

														
 
															+   size_t RAM_needed_per_thread = 164 * db_nitems;

														
 
															+   std::cout << "RAM needed = " << n_threads*RAM_needed_per_thread << " bytes = " << n_threads*RAM_needed_per_thread/1073741824 << " GiB" << std::endl;

														
 
															+   std::cout << "RAM needed per thread = " << RAM_needed_per_thread << " bytes = " << (RAM_needed_per_thread>>30) << " GiB" << std::endl;

														
 
															+   size_t thread_per_batch = std::floor(double(maxRAM<<30)/RAM_needed_per_thread);

														
 
															+   if (thread_per_batch > n_threads) {

														
 
															+     thread_per_batch = n_threads;

														
 
															+   }

														
 
															+   std::cout << "thread_per_batch = " << thread_per_batch << std::endl;

														
 
															+   if (thread_per_batch < 1) {

														
 
															+    std::cout << "You need more RAM" << std::endl;

														
 
															+    exit(0);

														
 
															+   }

														
 
															+   size_t n_batches = std::ceil(double(n_threads)/thread_per_batch);

														
 
															+   std::cout << "n_batches = " << n_batches << std::endl;

														
 
															    uint8_t ** target_share_read = new uint8_t*[thread_per_batch];

														
@@ -104,61 +104,49 @@ int main(int argc, char * argv[])
 
															    }

														
 
															-   boost::asio::thread_pool pool_share_conversion(thread_per_batch);

														
 
															+  boost::asio::thread_pool pool_share_conversion(thread_per_batch);

														
 
															-

														
 
															-    

														
 
															-    // The following function call creates and evaluates DPFs at target_share_read[j] for j \in \{0, \ldots, n_threads}

														
 
															-    // the flag vectors are stored in flags

														
 
															-    // the leaves are stored in output

														
 
															-    // the final correctionword is stored in final_correction_word

														
 
															-

														
 
															   dpfP2 * dpf_instance = (dpfP2 * ) malloc (sizeof(dpfP2) * n_threads);

														
 
															   cw_construction computecw_array;

														
 
															-     boost::asio::read(socketsP2[0], boost::asio::buffer(&computecw_array, sizeof(computecw_array)));

														
 
															-     #ifdef VERBOSE

														
 
															-      std::cout << "computecw_array.rand_b: " << computecw_array.rand_b[0] << " " << computecw_array.rand_b[1] << std::endl;

														
 
															-     #endif

														
 
															-

														
 
															-      /* The function create_dpfs appears in dpf-gen.h*/

														
 
															-      bool reading = true;

														
 
															-

														
 
															-      

														
 
															-

														
 
															-     size_t *thread_communication_costs = new size_t[thread_per_batch];

														
 
															-     for(size_t iter = 0; iter < n_batches; ++iter)

														
 
															-     { 

														
 
															-        if (n_batches > 1) {

														
 
															-          printf("Starting create_dpfs batch %lu / %lu\n", iter+1, n_batches);

														
 
															-        }

														
 
															-        boost::asio::thread_pool pool(thread_per_batch);

														
 
															-        for(size_t j = 0; j < thread_per_batch; ++j)

														
 
															-        {

														
 
															-	  thread_communication_costs[j] = 0; 

														
 
															-	  boost::asio::post(pool,

														
 
															-	    std::bind(create_dpfs, reading,  db_nitems, std::ref(aeskey),

														
 
															-		target_share_read[j], std::ref(socketsPb), std::ref(socketsP2),

														
 
															-		0, db_nitems-1, output[j],  flags[j],

														
 
															-		std::ref(final_correction_word[j]), computecw_array,

														
 
															-		std::ref(dpf_instance), party, 5 * j, j,

														
 
															-		std::ref(thread_communication_costs[j])));

														
 
															-        }    

														
 
															-        pool.join();

														
 
															-        for(size_t j = 0; j < thread_per_batch; ++j) {

														
 
															-	  communication_cost += thread_communication_costs[j];

														
 
															-	}

														
 
															-     }

														
 
															-     delete[] thread_communication_costs;

														
 
															-      

														
 
															-     boost::asio::write(socketsP2[0], boost::asio::buffer(dpf_instance, n_threads * sizeof(dpfP2))); // do this in parallel.

														
 
															-     communication_cost += (n_threads * sizeof(dpfP2));

														
 
															+  boost::asio::read(socketsP2[0], boost::asio::buffer(&computecw_array, sizeof(computecw_array)));

														
 
															-   #ifdef DEBUG

														
 
															-

														
 
															+  #ifdef VERBOSE

														
 
															+  std::cout << "computecw_array.rand_b: " << computecw_array.rand_b[0] << " " << computecw_array.rand_b[1] << std::endl;

														
 
															+  #endif

														
 
															+

														
 
															+    /* The function create_dpfs appears in dpf-gen.h*/

														
 
															+  bool reading = true;

														
 
															+  size_t *thread_communication_costs = new size_t[thread_per_batch];

														
 
															+  for(size_t iter = 0; iter < n_batches; ++iter)

														
 
															+  { 

														
 
															+      if (n_batches > 1) {

														
 
															+        printf("Starting create_dpfs batch %lu / %lu\n", iter+1, n_batches);

														
 
															+      }

														
 
															+      boost::asio::thread_pool pool(thread_per_batch);

														
 
															+      for(size_t j = 0; j < thread_per_batch; ++j)

														
 
															+      {

														
 
															+    	  thread_communication_costs[j] = 0; 

														
 
															+    	  boost::asio::post(pool,

														
 
															+    	  std::bind(create_dpfs, reading,  db_nitems, std::ref(aeskey), target_share_read[j], std::ref(socketsPb), std::ref(socketsP2), 0, db_nitems-1, output[j],  flags[j],

														
 
															+    		          std::ref(final_correction_word[j]), computecw_array, std::ref(dpf_instance), party, 5 * j, j, std::ref(thread_communication_costs[j])));

														
 
															+      }    

														
 
															+      pool.join();

														
 
															+      for(size_t j = 0; j < thread_per_batch; ++j) {

														
 
															+       communication_cost += thread_communication_costs[j];

														
 
															+      }

														
 
															+  }

														
 
															+  

														
 
															+  delete[] thread_communication_costs;

														
 
															+      

														
 
															+  boost::asio::write(socketsP2[0], boost::asio::buffer(dpf_instance, n_threads * sizeof(dpfP2))); // do this in parallel.

														
 
															+  communication_cost += (n_threads * sizeof(dpfP2));

														
 
															+  free(dpf_instance);

														
 
															+   

														
 
															+  #ifdef DEBUG

														
 
															     for(size_t j = 0; j < n_threads; ++j)

														
 
															     {

														
 
															       std::cout << "n_threads = " << j << std::endl;

														
@@ -183,19 +171,20 @@ int main(int argc, char * argv[])
 
															         final_correction_word_reconstruction = final_correction_word_reconstruction + final_correction_word[j][0];

														
 
															         std::cout << "final_correction_word_reconstruction = " << final_correction_word_reconstruction << std::endl << std::endl;

														
 
															      }

														
 
															-    #endif

														
 
															+  #endif

														
 
															     /* 

														
 
															      leaves is a additive shares of the outputs (leaves of the DPF)

														
 
															      leafbits is the additive shares of flag bits of the DPFs

														
 
															     */

														
 
															-   int64_t ** leaves = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch);

														
 
															-   int64_t ** leafbits  = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch); 

														
 
															-   for(size_t j = 0; j < thread_per_batch; ++j)

														
 
															-   {

														
 
															-    leaves[j] = (int64_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(int64_t));

														
 
															-    leafbits[j]  = (int64_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(int64_t));

														
 
															-   }

														
 
															+  int64_t ** leaves = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch);

														
 
															+  int64_t ** leafbits  = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch); 

														
 
															+ 

														
 
															+  for(size_t j = 0; j < thread_per_batch; ++j)

														
 
															+  {

														
 
															+   leaves[j] = (int64_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(int64_t));

														
 
															+   leafbits[j]  = (int64_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(int64_t));

														
 
															+  }

														
@@ -203,23 +192,36 @@ int main(int argc, char * argv[])
 
															    for(size_t j = 0; j < thread_per_batch; ++j)

														
 
															    {

														
 
															      boost::asio::post(pool_share_conversion,  std::bind(convert_shares, j, output, flags, n_threads, db_nitems, final_correction_word, 	leaves, leafbits, 

														
 
															-                                                          std::ref(socketsPb), std::ref(socketsP2), party));	 	

														
 
															+                                               std::ref(socketsPb), std::ref(socketsP2), party));	 	

														
 
															    }

														
 
															-    pool_share_conversion.join();

														
 
															+   pool_share_conversion.join();

														
 
															-    boost::asio::thread_pool pool_xor_to_additive(thread_per_batch); 

														
 
															+   boost::asio::thread_pool pool_xor_to_additive(thread_per_batch); 

														
 
															-    int64_t *additve_shares = new int64_t[thread_per_batch]; 

														
 
															-    for(size_t j = 0; j < thread_per_batch; ++j)

														
 
															-    {

														
 
															+   int64_t *additve_shares = new int64_t[thread_per_batch]; 

														
 
															+   

														
 
															+   for(size_t j = 0; j < thread_per_batch; ++j)

														
 
															+   {

														
 
															      boost::asio::post(pool_xor_to_additive, std::bind(xor_to_additive, party, target_share_read[j], std::ref(socketsPb[j]), std::ref(socketsP2[j]), expo, std::ref(additve_shares[j])));

														
 
															-    }

														
 
															+   }

														
 
															-    pool_xor_to_additive.join();

														
 
															+   pool_xor_to_additive.join();

														
 
															- 

														
 
															+   for(size_t j = 0; j < thread_per_batch; ++j)

														
 
															+   {

														
 
															+    free(leaves[j]);

														
 
															+    free(leafbits[j]);

														
 
															+    free(output[j]);

														
 
															+    free(flags[j]);

														
 
															+    delete[] target_share_read[j];

														
 
															+   }

														
 
															+    free(leaves);

														
 
															+    free(leafbits);

														
 
															+    free(output);

														
 
															+    free(flags);

														
 
															+    delete[] target_share_read;

														
 
															     /* For the artifact, don't actually write these in order to not use very

														
 
															      * large amounts of storage

														
--- a/preprocessing/share-conversion.h
+++ b/preprocessing/share-conversion.h
@@ -315,7 +315,7 @@ void convert_shares(size_t i, __m128i ** output, int8_t ** flags, size_t n_threa
 
															 		leaves[i][j]  = output[i][j][0];

														
 
															 		flags_[j] = (flags[i][j] * pm) + (flags[i][j] * share_b_recv.PM) + (flags[i][j] * rb); 

														
 
															 		flags_[j] += output[i][j][1]; 

														
 
															-  flags_[j] -= (flags[i][j] * P2_shareconversion.FCWshare_reconstruction);		

														
 
															+    flags_[j] -= (flags[i][j] * P2_shareconversion.FCWshare_reconstruction);		

														
 
															 		#ifdef DEBUG