|
@@ -137,23 +137,7 @@ int main(int argc, char* argv[])
|
|
|
|
|
|
auto start = std::chrono::steady_clock::now();
|
|
|
|
|
|
- __m128i ** output0 = (__m128i ** ) malloc(sizeof(__m128i *) * thread_per_batch);
|
|
|
- int8_t ** flags0 = (int8_t ** ) malloc(sizeof(uint8_t *) * thread_per_batch);
|
|
|
-
|
|
|
- for(size_t j = 0; j < thread_per_batch; ++j)
|
|
|
- {
|
|
|
- output0[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));
|
|
|
- flags0[j] = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));
|
|
|
- }
|
|
|
-
|
|
|
- __m128i ** output1 = (__m128i ** ) malloc(sizeof(__m128i *) * thread_per_batch);
|
|
|
- int8_t ** flags1 = (int8_t ** ) malloc(sizeof(uint8_t *) * thread_per_batch);
|
|
|
-
|
|
|
- for(size_t j = 0; j < thread_per_batch; ++j)
|
|
|
- {
|
|
|
- output1[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));
|
|
|
- flags1[j] = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));
|
|
|
- }
|
|
|
+
|
|
|
|
|
|
dpfP2 * dpf_instance0 = (dpfP2 * ) malloc (sizeof(dpfP2) * n_threads);
|
|
|
dpfP2 * dpf_instance1 = (dpfP2 * ) malloc (sizeof(dpfP2) * n_threads);
|
|
@@ -202,22 +186,45 @@ int main(int argc, char* argv[])
|
|
|
boost::asio::write(sockets1[0], boost::asio::buffer(&computecw1_array, sizeof(computecw1_array)));
|
|
|
communication_cost += sizeof(computecw0_array);
|
|
|
communication_cost += sizeof(computecw1_array);
|
|
|
-
|
|
|
+
|
|
|
boost::asio::read(sockets0[0], boost::asio::buffer(dpf_instance0, n_threads * sizeof(dpfP2)));
|
|
|
boost::asio::read(sockets1[0], boost::asio::buffer(dpf_instance1, n_threads * sizeof(dpfP2)));
|
|
|
|
|
|
- for(size_t iter = 0; iter < n_batches; ++iter)
|
|
|
- {
|
|
|
- boost::asio::thread_pool pool(thread_per_batch);
|
|
|
+
|
|
|
+ __m128i ** output0 = (__m128i ** ) malloc(sizeof(__m128i *) * thread_per_batch);
|
|
|
+ int8_t ** flags0 = (int8_t ** ) malloc(sizeof(uint8_t *) * thread_per_batch);
|
|
|
+
|
|
|
+ for(size_t j = 0; j < thread_per_batch; ++j)
|
|
|
+ {
|
|
|
+ output0[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));
|
|
|
+ flags0[j] = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));
|
|
|
+ }
|
|
|
+
|
|
|
+ __m128i ** output1 = (__m128i ** ) malloc(sizeof(__m128i *) * thread_per_batch);
|
|
|
+ int8_t ** flags1 = (int8_t ** ) malloc(sizeof(uint8_t *) * thread_per_batch);
|
|
|
+
|
|
|
+ for(size_t j = 0; j < thread_per_batch; ++j)
|
|
|
+ {
|
|
|
+ output1[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));
|
|
|
+ flags1[j] = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));
|
|
|
+ }
|
|
|
+
|
|
|
+ for(size_t iter = 0; iter < n_batches; ++iter)
|
|
|
+ {
|
|
|
+ boost::asio::thread_pool pool(thread_per_batch);
|
|
|
for(size_t j = 0; j < thread_per_batch; ++j)
|
|
|
{
|
|
|
boost::asio::post(pool, std::bind(mpc_gen, std::ref(depth), std::ref(aeskey), db_nitems, n_threads, std::ref(sockets0), std::ref(sockets1),
|
|
|
- output0, flags0, output1, flags1, std::ref(dpf_instance0), std::ref(dpf_instance1), j, 5 * j));
|
|
|
+ output0, flags0, output1, flags1, std::ref(dpf_instance0), std::ref(dpf_instance1), j, 5 * j));
|
|
|
}
|
|
|
|
|
|
- pool.join();
|
|
|
- }
|
|
|
+ pool.join();
|
|
|
+ }
|
|
|
|
|
|
+
|
|
|
+ free(dpf_instance0);
|
|
|
+ free(dpf_instance1);
|
|
|
+
|
|
|
boost::asio::thread_pool pool3(thread_per_batch);
|
|
|
|
|
|
int64_t ** leaves0 = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch);
|
|
@@ -249,9 +256,19 @@ int main(int argc, char* argv[])
|
|
|
boost::asio::post(pool4, std::bind(P2_xor_to_additive, std::ref(sockets0[j]), std::ref(sockets1[j]), j));
|
|
|
}
|
|
|
pool4.join();
|
|
|
+
|
|
|
+ for(size_t j = 0; j < thread_per_batch; ++j)
|
|
|
+ {
|
|
|
+ free(leafbits0[j]);
|
|
|
+ free(leafbits1[j]);
|
|
|
+ free(output0[j]);
|
|
|
+ free(output1[j]);
|
|
|
+ }
|
|
|
|
|
|
-
|
|
|
-
|
|
|
+ free(leafbits0);
|
|
|
+ free(leafbits1);
|
|
|
+ free(output1);
|
|
|
+ free(output0);
|
|
|
|
|
|
for(size_t i = 0; i < thread_per_batch; ++i)
|
|
|
{
|
|
@@ -259,6 +276,19 @@ int main(int argc, char* argv[])
|
|
|
P2_write_evalfull_outs_into_a_file(true, i, db_nitems, flags1[i], leaves1[i]);
|
|
|
}
|
|
|
|
|
|
+ for(size_t j = 0; j < thread_per_batch; ++j)
|
|
|
+ {
|
|
|
+ free(leaves0[j]);
|
|
|
+ free(leaves1[j]);
|
|
|
+ free(flags0[j]);
|
|
|
+ free(flags1[j]);
|
|
|
+ }
|
|
|
+
|
|
|
+ free(leaves0);
|
|
|
+ free(leaves1);
|
|
|
+ free(flags0);
|
|
|
+ free(flags1);
|
|
|
+
|
|
|
#ifdef DEBUG
|
|
|
for(size_t ind = 0; ind < n_threads; ++ind)
|
|
|
{
|
|
@@ -272,7 +302,9 @@ int main(int argc, char* argv[])
|
|
|
}
|
|
|
}
|
|
|
#endif
|
|
|
-
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
auto end = std::chrono::steady_clock::now();
|
|
|
std::chrono::duration<double> elapsed_seconds = end-start;
|
|
|
//std::cout << "time to generate and evaluate " << n_threads << " dpfs of size 2^" << atoi(argv[4]) << " is: " << elapsed_seconds.count() << "s\n";
|