preprocessing.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. #include <type_traits> // std::is_same<>
  2. #include <limits> // std::numeric_limits<>
  3. #include <climits> // CHAR_BIT
  4. #include <cmath> // std::log2, std::ceil, std::floor
  5. #include <stdexcept> // std::runtime_error
  6. #include <array> // std::array<>
  7. #include <iostream> // std::istream and std::ostream
  8. #include <vector> // std::vector<>
  9. #include <memory> // std::shared_ptr<>
  10. #include <utility> // std::move
  11. #include <algorithm> // std::copy
  12. #include <cstring> // std::memcpy
  13. #include <bsd/stdlib.h> // arc4random_buf
  14. #include <x86intrin.h> // SSE and AVX intrinsics
  15. #include <../boost/asio/thread_pool.hpp>
  16. #include <../boost/lexical_cast.hpp>
  17. #include <../boost/asio.hpp>
  18. #include <fcntl.h>
  19. #include <cstdlib>
  20. #include <chrono>
  21. #include <sys/mman.h>
  22. #include <sys/stat.h>
  23. #include <fstream>
  24. #include <future>
  25. #include <mutex>
  26. typedef __m128i node_t;
  27. constexpr size_t leaf_size = 1;
  28. typedef __m128i leaf_type;
  29. typedef std::array<leaf_type, leaf_size> leaf_t;
  30. size_t communication_cost = 0;
  31. #include "bitutils.h"
  32. #include "block.h"
  33. #include "prg_aes_impl.h"
  34. #include "filesio.h"
  35. using boost::asio::ip::tcp;
  36. using socket_t = boost::asio::ip::tcp::socket;
  37. using namespace dpf; // The namespace is found in bitutils.h
  38. #include "mpc.h"
  39. #include "network.h"
  40. #include "dpfgen.h"
  41. #include "share-conversion.h"
  42. int main(int argc, char * argv[])
  43. {
  44. boost::asio::io_context io_context;
  45. std::string addr = "127.0.0.1";
  46. const std::string host1 = (argc < 2) ? "127.0.0.1" : argv[1];
  47. const std::string host2 = (argc < 3) ? "127.0.0.1" : argv[2];
  48. const size_t n_threads = atoi(argv[3]);
  49. const size_t expo = atoi(argv[4]);
  50. const size_t db_nitems = 1ULL << expo;
  51. const size_t maxRAM = atoi(argv[5]);
  52. //std::cout << "n_threads = " << n_threads << std::endl;
  53. const size_t number_of_sockets = 5 * n_threads;
  54. std::vector<socket_t> socketsPb, socketsP2;
  55. std::vector<int> ports, ports2_1, ports2_0;
  56. bool party;
  57. /* The function make_connections appears in network.h */
  58. make_connections(party, host1, host2, io_context, socketsPb, socketsP2, ports, ports2_1, ports2_0, number_of_sockets);
  59. size_t RAM_needed_per_thread = 164 * db_nitems;
  60. std::cout << "RAM needed = " << n_threads*RAM_needed_per_thread << " bytes = " << n_threads*RAM_needed_per_thread/1073741824 << " GiB" << std::endl;
  61. std::cout << "RAM needed per thread = " << RAM_needed_per_thread << " bytes = " << (RAM_needed_per_thread>>30) << " GiB" << std::endl;
  62. size_t thread_per_batch = std::floor(double(maxRAM<<30)/RAM_needed_per_thread);
  63. if (thread_per_batch > n_threads) {
  64. thread_per_batch = n_threads;
  65. }
  66. std::cout << "thread_per_batch = " << thread_per_batch << std::endl;
  67. if (thread_per_batch < 1) {
  68. std::cout << "You need more RAM" << std::endl;
  69. exit(0);
  70. }
  71. size_t n_batches = std::ceil(double(n_threads)/thread_per_batch);
  72. std::cout << "n_batches = " << n_batches << std::endl;
  73. uint8_t ** target_share_read = new uint8_t*[thread_per_batch];
  74. generate_random_targets(target_share_read, thread_per_batch, party, expo);
  75. AES_KEY aeskey;
  76. auto start = std::chrono::steady_clock::now();
  77. __m128i * final_correction_word = (__m128i *) std::aligned_alloc(sizeof(__m256i), thread_per_batch * sizeof(__m128i));
  78. __m128i ** output = (__m128i ** ) malloc(sizeof(__m128i *) * thread_per_batch);
  79. int8_t ** flags = (int8_t ** ) malloc(sizeof(uint8_t *) * thread_per_batch);
  80. for(size_t j = 0; j < thread_per_batch; ++j)
  81. {
  82. output[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));
  83. flags[j] = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));
  84. }
  85. boost::asio::thread_pool pool_share_conversion(thread_per_batch);
  86. dpfP2 * dpf_instance = (dpfP2 * ) malloc (sizeof(dpfP2) * n_threads);
  87. cw_construction computecw_array;
  88. boost::asio::read(socketsP2[0], boost::asio::buffer(&computecw_array, sizeof(computecw_array)));
  89. #ifdef VERBOSE
  90. std::cout << "computecw_array.rand_b: " << computecw_array.rand_b[0] << " " << computecw_array.rand_b[1] << std::endl;
  91. #endif
  92. /* The function create_dpfs appears in dpf-gen.h*/
  93. bool reading = true;
  94. size_t *thread_communication_costs = new size_t[thread_per_batch];
  95. for(size_t iter = 0; iter < n_batches; ++iter)
  96. {
  97. if (n_batches > 1) {
  98. printf("Starting create_dpfs batch %lu / %lu\n", iter+1, n_batches);
  99. }
  100. boost::asio::thread_pool pool(thread_per_batch);
  101. for(size_t j = 0; j < thread_per_batch; ++j)
  102. {
  103. thread_communication_costs[j] = 0;
  104. boost::asio::post(pool,
  105. std::bind(create_dpfs, reading, db_nitems, std::ref(aeskey), target_share_read[j], std::ref(socketsPb), std::ref(socketsP2), 0, db_nitems-1, output[j], flags[j],
  106. std::ref(final_correction_word[j]), computecw_array, std::ref(dpf_instance), party, 5 * j, j, std::ref(thread_communication_costs[j])));
  107. }
  108. pool.join();
  109. for(size_t j = 0; j < thread_per_batch; ++j) {
  110. communication_cost += thread_communication_costs[j];
  111. }
  112. }
  113. delete[] thread_communication_costs;
  114. boost::asio::write(socketsP2[0], boost::asio::buffer(dpf_instance, n_threads * sizeof(dpfP2))); // do this in parallel.
  115. communication_cost += (n_threads * sizeof(dpfP2));
  116. free(dpf_instance);
  117. #ifdef DEBUG
  118. for(size_t j = 0; j < n_threads; ++j)
  119. {
  120. std::cout << "n_threads = " << j << std::endl;
  121. for(size_t i = 0; i < db_nitems; ++i)
  122. {
  123. int8_t flags_reconstruction;
  124. boost::asio::write(socketsPb[0], boost::asio::buffer(&flags[j][i], sizeof(flags[j][i])));
  125. boost::asio::read(socketsPb[0], boost::asio::buffer(&flags_reconstruction, sizeof(flags_reconstruction)));
  126. flags_reconstruction -= flags[j][i];
  127. if(flags_reconstruction != 0) std::cout << i << " (flag) ---> " << (int) flags_reconstruction << std::endl;
  128. int64_t output_reconstruction;
  129. boost::asio::write(socketsPb[0], boost::asio::buffer(&output[j][i][0], sizeof(output[j][i][0])));
  130. boost::asio::read(socketsPb[0], boost::asio::buffer(&output_reconstruction, sizeof(output_reconstruction)));
  131. output_reconstruction -= output[j][i][0];
  132. if(output_reconstruction != 0) std::cout << i << " (output) ---> " << output_reconstruction << std::endl;
  133. }
  134. int64_t final_correction_word_reconstruction = 0;
  135. boost::asio::write(socketsPb[0], boost::asio::buffer(&final_correction_word[j][0], sizeof(final_correction_word[j][0])));
  136. boost::asio::read(socketsPb[0], boost::asio::buffer(&final_correction_word_reconstruction, sizeof(final_correction_word_reconstruction)));
  137. final_correction_word_reconstruction = final_correction_word_reconstruction + final_correction_word[j][0];
  138. std::cout << "final_correction_word_reconstruction = " << final_correction_word_reconstruction << std::endl << std::endl;
  139. }
  140. #endif
  141. /*
  142. leaves is a additive shares of the outputs (leaves of the DPF)
  143. leafbits is the additive shares of flag bits of the DPFs
  144. */
  145. int64_t ** leaves = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch);
  146. int64_t ** leafbits = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch);
  147. for(size_t j = 0; j < thread_per_batch; ++j)
  148. {
  149. leaves[j] = (int64_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(int64_t));
  150. leafbits[j] = (int64_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(int64_t));
  151. }
  152. /* The function convert_shares appears in share-conversion.h */
  153. for(size_t j = 0; j < thread_per_batch; ++j)
  154. {
  155. boost::asio::post(pool_share_conversion, std::bind(convert_shares, j, output, flags, n_threads, db_nitems, final_correction_word, leaves, leafbits,
  156. std::ref(socketsPb), std::ref(socketsP2), party));
  157. }
  158. pool_share_conversion.join();
  159. boost::asio::thread_pool pool_xor_to_additive(thread_per_batch);
  160. int64_t *additve_shares = new int64_t[thread_per_batch];
  161. for(size_t j = 0; j < thread_per_batch; ++j)
  162. {
  163. boost::asio::post(pool_xor_to_additive, std::bind(xor_to_additive, party, target_share_read[j], std::ref(socketsPb[j]), std::ref(socketsP2[j]), expo, std::ref(additve_shares[j])));
  164. }
  165. pool_xor_to_additive.join();
  166. for(size_t j = 0; j < thread_per_batch; ++j)
  167. {
  168. free(leaves[j]);
  169. free(leafbits[j]);
  170. free(output[j]);
  171. free(flags[j]);
  172. delete[] target_share_read[j];
  173. }
  174. free(leaves);
  175. free(leafbits);
  176. free(output);
  177. free(flags);
  178. free(final_correction_word);
  179. delete[] target_share_read;
  180. /* For the artifact, don't actually write these in order to not use very
  181. * large amounts of storage
  182. for(size_t i = 0; i < thread_per_batch; ++i)
  183. {
  184. write_evalfull_outs_into_a_file(party, i, db_nitems, flags[i], leaves[i], final_correction_word[i], additve_shares[i]);
  185. }
  186. */
  187. auto end = std::chrono::steady_clock::now();
  188. std::chrono::duration<double> elapsed_seconds = end-start;
  189. //std::cout << "time to generate and evaluate " << n_threads << " dpfs of size 2^" << atoi(argv[4]) << " is: " << elapsed_seconds.count() << "s\n";
  190. std::cout << "WallClockTime: " << elapsed_seconds.count() << std::endl;
  191. // std::cout << "elapsed_ FIO = " << elapsed_seconds.count() << std::endl;
  192. std::cout << "CommunicationCost: " << communication_cost << " bytes" << std::endl;
  193. #ifdef VERBOSE
  194. for(size_t j = 0; j < n_threads; ++j)
  195. {
  196. int64_t add_;
  197. boost::asio::write(socketsPb[0], boost::asio::buffer(&additve_shares[j], sizeof(additve_shares[j])));
  198. boost::asio::read(socketsPb[0], boost::asio::buffer(&add_, sizeof(add_)));
  199. add_ = add_ + additve_shares[j];
  200. std::cout << "add_ = " << add_ << std::endl;
  201. }
  202. #endif
  203. delete[] additve_shares;
  204. return 0;
  205. }