1 year ago · c57db2987e
--- a/preprocessing/p2preprocessing.cpp
+++ b/preprocessing/p2preprocessing.cpp
@@ -137,23 +137,7 @@ int main(int argc, char* argv[])
 
				 
			
 
				    auto start = std::chrono::steady_clock::now(); 
			
 
				 
			
 
				-	__m128i ** output0 = (__m128i ** ) malloc(sizeof(__m128i *) * thread_per_batch);
			
 
				-	int8_t  ** flags0  = (int8_t ** ) malloc(sizeof(uint8_t *) * thread_per_batch);
			
 
				-	 
			
 
				-	for(size_t j = 0; j < thread_per_batch; ++j)
			
 
				-	{
			
 
				-		output0[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));
			
 
				-		flags0[j]  = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));
			
 
				-	}
			
 
				-  
			
 
				-	__m128i ** output1 = (__m128i ** ) malloc(sizeof(__m128i *) * thread_per_batch);
			
 
				-	int8_t  ** flags1  = (int8_t ** ) malloc(sizeof(uint8_t *) * thread_per_batch);
			
 
				-	 
			
 
				-	for(size_t j = 0; j < thread_per_batch; ++j)
			
 
				-	{
			
 
				-		output1[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));
			
 
				-		flags1[j]  = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));
			
 
				-	}
			
 
				+
			
 
				  
			
 
				  dpfP2 * dpf_instance0 = (dpfP2 * ) malloc (sizeof(dpfP2) * n_threads);
			
 
				  dpfP2 * dpf_instance1 = (dpfP2 * ) malloc (sizeof(dpfP2) * n_threads);
			
@@ -202,22 +186,45 @@ int main(int argc, char* argv[])
 
				   boost::asio::write(sockets1[0], boost::asio::buffer(&computecw1_array,  sizeof(computecw1_array)));
			
 
				   communication_cost += sizeof(computecw0_array);
			
 
				   communication_cost += sizeof(computecw1_array);
			
 
				-
			
 
				+ 
			
 
				   boost::asio::read(sockets0[0], boost::asio::buffer(dpf_instance0, n_threads * sizeof(dpfP2)));
			
 
				   boost::asio::read(sockets1[0], boost::asio::buffer(dpf_instance1, n_threads * sizeof(dpfP2))); 
			
 
				 
			
 
				-     for(size_t iter = 0; iter < n_batches; ++iter)
			
 
				-     { 
			
 
				-  boost::asio::thread_pool pool(thread_per_batch); 
			
 
				+
			
 
				+  __m128i ** output0 = (__m128i ** ) malloc(sizeof(__m128i *) * thread_per_batch);
			
 
				+  int8_t  ** flags0  = (int8_t ** ) malloc(sizeof(uint8_t *) * thread_per_batch);
			
 
				+   
			
 
				+  for(size_t j = 0; j < thread_per_batch; ++j)
			
 
				+  {
			
 
				+    output0[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));
			
 
				+    flags0[j]  = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));
			
 
				+  }
			
 
				+  
			
 
				+  __m128i ** output1 = (__m128i ** ) malloc(sizeof(__m128i *) * thread_per_batch);
			
 
				+  int8_t  ** flags1  = (int8_t ** ) malloc(sizeof(uint8_t *) * thread_per_batch);
			
 
				+   
			
 
				+  for(size_t j = 0; j < thread_per_batch; ++j)
			
 
				+  {
			
 
				+    output1[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));
			
 
				+    flags1[j]  = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));
			
 
				+  }
			
 
				+
			
 
				+  for(size_t iter = 0; iter < n_batches; ++iter)
			
 
				+  { 
			
 
				+    boost::asio::thread_pool pool(thread_per_batch); 
			
 
				     for(size_t j = 0; j < thread_per_batch; ++j)
			
 
				     {
			
 
				      boost::asio::post(pool, std::bind(mpc_gen,  std::ref(depth), std::ref(aeskey), db_nitems, n_threads,  std::ref(sockets0), std::ref(sockets1), 
			
 
				-                                     output0, flags0,  output1, flags1,  std::ref(dpf_instance0), std::ref(dpf_instance1), j, 5 * j));    
			
 
				+                                        output0, flags0,  output1, flags1,  std::ref(dpf_instance0), std::ref(dpf_instance1), j, 5 * j));    
			
 
				     }  
			
 
				 
			
 
				- pool.join();
			
 
				-     }
			
 
				+    pool.join();
			
 
				+  }
			
 
				  
			
 
				+
			
 
				+  free(dpf_instance0);
			
 
				+  free(dpf_instance1);
			
 
				+
			
 
				   boost::asio::thread_pool pool3(thread_per_batch); 
			
 
				   
			
 
				  int64_t ** leaves0    = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch);
			
@@ -249,9 +256,19 @@ int main(int argc, char* argv[])
 
				    boost::asio::post(pool4,  std::bind(P2_xor_to_additive, std::ref(sockets0[j]), std::ref(sockets1[j]), j));
			
 
				   }
			
 
				   pool4.join();
			
 
				+ 
			
 
				+  for(size_t j = 0; j < thread_per_batch; ++j)
			
 
				+  {
			
 
				+    free(leafbits0[j]); 
			
 
				+    free(leafbits1[j]);
			
 
				+    free(output0[j]);
			
 
				+    free(output1[j]);
			
 
				+  }  
			
 
				 
			
 
				-
			
 
				-
			
 
				+  free(leafbits0);
			
 
				+  free(leafbits1);
			
 
				+  free(output1);
			
 
				+  free(output0);
			
 
				 
			
 
				   for(size_t i = 0; i < thread_per_batch; ++i)
			
 
				   {
			
@@ -259,6 +276,19 @@ int main(int argc, char* argv[])
 
				    P2_write_evalfull_outs_into_a_file(true,  i, db_nitems,  flags1[i], 	leaves1[i]);
			
 
				   }
			
 
				 
			
 
				+  for(size_t j = 0; j < thread_per_batch; ++j)
			
 
				+  {
			
 
				+    free(leaves0[j]); 
			
 
				+    free(leaves1[j]);
			
 
				+    free(flags0[j]);
			
 
				+    free(flags1[j]);
			
 
				+  } 
			
 
				+
			
 
				+  free(leaves0);
			
 
				+  free(leaves1);
			
 
				+  free(flags0);
			
 
				+  free(flags1);
			
 
				+
			
 
				   #ifdef DEBUG
			
 
				    for(size_t ind = 0; ind < n_threads; ++ind)
			
 
				    {
			
@@ -272,7 +302,9 @@ int main(int argc, char* argv[])
 
				     }
			
 
				    }
			
 
				   #endif
			
 
				-  
			
 
				+
			
 
				+
			
 
				+
			
 
				     auto end = std::chrono::steady_clock::now();
			
 
				     std::chrono::duration<double> elapsed_seconds = end-start;
			
 
				     //std::cout << "time to generate and evaluate " << n_threads << " dpfs of size 2^" << atoi(argv[4]) << " is: " << elapsed_seconds.count() << "s\n";