| 
					
				 | 
			
			
				@@ -82,12 +82,11 @@ int main(int argc, char* argv[]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  const size_t n_threads = atoi(argv[3]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  const size_t number_of_sockets = 5 * n_threads; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  const size_t db_nitems = 1ULL << atoi(argv[4]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- const size_t op = atoi(argv[5]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- const size_t maxRAM = atoi(argv[6]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ const size_t maxRAM = atoi(argv[5]); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  std::cout << "maxRAM = "  << maxRAM << std::endl; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  size_t RAM_needed = 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- RAM_needed = n_threads *  9 * ((sizeof(__m128i) * db_nitems)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ RAM_needed = n_threads *  164 * db_nitems; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  std::cout << "RAM needed = " << RAM_needed << " bytes = " << RAM_needed/1073741824 << " GiB" << std::endl; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  size_t n_batches = std::ceil(double(RAM_needed)/(1073741824 * maxRAM)); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  std::cout << "n_batches = " << n_batches << std::endl; 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -136,6 +135,7 @@ int main(int argc, char* argv[]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   auto start = std::chrono::steady_clock::now();  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 	__m128i ** output0 = (__m128i ** ) malloc(sizeof(__m128i *) * thread_per_batch); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 	int8_t  ** flags0  = (int8_t ** ) malloc(sizeof(uint8_t *) * thread_per_batch); 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -200,10 +200,14 @@ int main(int argc, char* argv[]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   boost::asio::write(sockets0[0], boost::asio::buffer(&computecw0_array,  sizeof(computecw0_array))); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   boost::asio::write(sockets1[0], boost::asio::buffer(&computecw1_array,  sizeof(computecw1_array))); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  communication_cost += sizeof(computecw0_array); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  communication_cost += sizeof(computecw1_array); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   boost::asio::read(sockets0[0], boost::asio::buffer(dpf_instance0, n_threads * sizeof(dpfP2))); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   boost::asio::read(sockets1[0], boost::asio::buffer(dpf_instance1, n_threads * sizeof(dpfP2)));  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     for(size_t iter = 0; iter < n_batches; ++iter) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     {  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   boost::asio::thread_pool pool(thread_per_batch);  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for(size_t j = 0; j < thread_per_batch; ++j) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     { 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -212,21 +216,8 @@ int main(int argc, char* argv[]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     }   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  pool.join(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- if(op == 1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    boost::asio::thread_pool pool(thread_per_batch);  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    for(size_t j = 0; j < thread_per_batch; ++j) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-     boost::asio::post(pool, std::bind(mpc_gen,  std::ref(depth), std::ref(aeskey), db_nitems, n_threads,  std::ref(sockets0), std::ref(sockets1),  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                     output0, flags0,  output1, flags1,  std::ref(dpf_instance0), std::ref(dpf_instance1), j, 5 * j));     
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    }   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-   pool.join(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   boost::asio::thread_pool pool3(thread_per_batch);  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				    
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  int64_t ** leaves0    = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch); 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -282,6 +273,12 @@ int main(int argc, char* argv[]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   #endif 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				    
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    auto end = std::chrono::steady_clock::now(); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    std::chrono::duration<double> elapsed_seconds = end-start; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    //std::cout << "time to generate and evaluate " << n_threads << " dpfs of size 2^" << atoi(argv[4]) << " is: " << elapsed_seconds.count() << "s\n"; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    std::cout << "WallClockTime: "  << elapsed_seconds.count() << std::endl; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    std::cout << "CommunicationCost: " << communication_cost << " bytes" << std::endl; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   return 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 |