p2preprocessing.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. #include <type_traits> // std::is_same<>
  2. #include <limits> // std::numeric_limits<>
  3. #include <climits> // CHAR_BIT
  4. #include <cmath> // std::log2, std::ceil, std::floor
  5. #include <stdexcept> // std::runtime_error
  6. #include <array> // std::array<>
  7. #include <iostream> // std::istream and std::ostream
  8. #include <vector> // std::vector<>
  9. #include <memory> // std::shared_ptr<>
  10. #include <utility> // std::move
  11. #include <algorithm> // std::copy
  12. #include <cstring> // std::memcpy
  13. #include <bsd/stdlib.h> // arc4random_buf
  14. #include <x86intrin.h> // SSE and AVX intrinsics
  15. #include <../boost/asio/thread_pool.hpp>
  16. #include <../boost/asio.hpp>
  17. #include <../boost/lexical_cast.hpp>
  18. #include <iostream>
  19. #include <chrono>
  20. #include <sys/mman.h>
  21. #include <sys/stat.h>
  22. #include <fcntl.h>
  23. #include <fstream>
  24. #include <mutex>
  25. #include "bitutils.h"
  26. #include "block.h"
  27. #include "prg_aes_impl.h"
  28. #include "filesio.h"
  29. using boost::asio::ip::tcp;
  30. using namespace dpf;
  31. typedef __m128i leaf_type;
  32. typedef __m128i leaf_t;
  33. typedef __m128i node_t;
  34. using socket_t = boost::asio::ip::tcp::socket;
  35. size_t bits_per_leaf = std::is_same<leaf_t, bool>::value ? 1 : sizeof(leaf_t) * CHAR_BIT;
  36. bool is_packed = (sizeof(leaf_t) < sizeof(node_t));
  37. size_t leaves_per_node = is_packed ? sizeof(node_t) * CHAR_BIT / bits_per_leaf : 1;
  38. size_t __depth(const size_t nitems) { return std::ceil(std::log2(std::ceil(static_cast<double>(nitems) / leaves_per_node))); }
  39. size_t communication_cost = 0;
  40. #include "mpc.h"
  41. #include "dpfgen.h"
  42. #include "share-conversion.h"
  43. void mpc_gen (const size_t depth, AES_KEY& prgkey, const size_t db_nitems,
  44. const size_t n_threads, std::vector<socket_t>& sockets0, std::vector<socket_t>& sockets1, __m128i** output0, int8_t ** flags0, __m128i** output1, int8_t ** flags1, dpfP2 * dpf_instance0 ,
  45. dpfP2 * dpf_instance1, size_t ind,size_t socket_no = 0)
  46. {
  47. evaluate_dpfs(db_nitems, dpf_instance0[ind], prgkey, 0, db_nitems-1, output0[ind], flags0[ind], false, ind);
  48. evaluate_dpfs(db_nitems, dpf_instance1[ind], prgkey, 0, db_nitems-1, output1[ind], flags1[ind], true , ind);
  49. #ifdef DEBUG
  50. for(size_t j = 0; j < db_nitems; ++j)
  51. {
  52. std::cout << j << "-> " << (int) flags0[0][j] << " <-> " << (int) flags1[0][j] << std::endl;
  53. std::cout << j << "-> " << output0[0][j][0] << " <-> " << output1[0][j][0] << std::endl << std::endl;
  54. }
  55. #endif
  56. }
  57. void accept_conncections_from_Pb(boost::asio::io_context&io_context, std::vector<socket_t>& sockets0, int port, size_t j)
  58. {
  59. tcp::acceptor acceptor2_(io_context, tcp::endpoint(tcp::v4(), port));
  60. tcp::socket s2(acceptor2_.accept());
  61. sockets0[j] = std::move(s2);
  62. }
  63. int main(int argc, char* argv[])
  64. {
  65. AES_KEY aeskey;
  66. boost::asio::io_context io_context;
  67. tcp::resolver resolver(io_context);
  68. const std::string host0 = (argc < 2) ? "127.0.0.1" : argv[1];
  69. const std::string host1 = (argc < 3) ? "127.0.0.1" : argv[2];
  70. const size_t n_threads = atoi(argv[3]);
  71. const size_t number_of_sockets = 5 * n_threads;
  72. const size_t depth = atoi(argv[4]);
  73. const size_t db_nitems = 1ULL << depth;
  74. const size_t maxRAM = atoi(argv[5]);
  75. size_t RAM_needed_per_thread = 164 * db_nitems;
  76. std::cout << "RAM needed = " << n_threads*RAM_needed_per_thread << " bytes = " << n_threads*RAM_needed_per_thread/1073741824 << " GiB" << std::endl;
  77. std::cout << "RAM needed per thread = " << RAM_needed_per_thread << " bytes = " << (RAM_needed_per_thread>>30) << " GiB" << std::endl;
  78. size_t thread_per_batch = std::floor(double(maxRAM<<30)/RAM_needed_per_thread);
  79. if (thread_per_batch > n_threads) {
  80. thread_per_batch = n_threads;
  81. }
  82. std::cout << "thread_per_batch = " << thread_per_batch << std::endl;
  83. if (thread_per_batch < 1) {
  84. std::cout << "You need more RAM" << std::endl;
  85. exit(0);
  86. }
  87. size_t n_batches = std::ceil(double(n_threads)/thread_per_batch);
  88. std::cout << "n_batches = " << n_batches << std::endl;
  89. std::vector<int> ports2_0;
  90. for(size_t j = 0; j < number_of_sockets; ++j)
  91. {
  92. int port = 22000;
  93. ports2_0.push_back(port + j);
  94. }
  95. std::vector<int> ports2_1;
  96. for(size_t j = 0; j < number_of_sockets; ++j)
  97. {
  98. int port = 42000;
  99. ports2_1.push_back(port + j);
  100. }
  101. std::vector<socket_t> sockets0;
  102. std::vector<socket_t> sockets1;
  103. sockets0.reserve(number_of_sockets + 1);
  104. sockets1.reserve(number_of_sockets + 1);
  105. boost::asio::thread_pool pool2(number_of_sockets * 2);
  106. for(size_t j = 0; j < number_of_sockets; ++j)
  107. {
  108. boost::asio::post(pool2, std::bind(accept_conncections_from_Pb, std::ref(io_context), std::ref(sockets1), ports2_1[j], j));
  109. }
  110. for(size_t j = 0; j < number_of_sockets; ++j)
  111. {
  112. boost::asio::post(pool2, std::bind(accept_conncections_from_Pb, std::ref(io_context), std::ref(sockets0), ports2_0[j], j));
  113. }
  114. pool2.join();
  115. auto start = std::chrono::steady_clock::now();
  116. cw_construction computecw0_array, computecw1_array;
  117. // for(size_t i = 0; i < 128; ++i)
  118. // {
  119. // for(size_t j = 0; j < depth; ++j)
  120. // {
  121. __m128i rand0, rand1, gamma0, gamma1;
  122. arc4random_buf(&rand0, sizeof(__m128i));
  123. arc4random_buf(&rand1, sizeof(__m128i));
  124. uint8_t bit0, bit1;
  125. bit0 = rand();
  126. bit0 = bit0 % 2;
  127. bit1 = rand();
  128. bit1 = bit1 %2;
  129. gamma0 = (bit1 == 1) ? rand0 : _mm_setzero_si128();
  130. gamma1 = (bit0 == 1) ? rand1 : _mm_setzero_si128();
  131. computecw0_array.rand_b = rand0;
  132. computecw0_array.gamma_b = gamma0;
  133. computecw0_array.bit_b = bit0;
  134. computecw1_array.rand_b = rand1;
  135. computecw1_array.gamma_b = gamma1;
  136. computecw1_array.bit_b = bit1;
  137. // }
  138. // }
  139. boost::asio::write(sockets0[0], boost::asio::buffer(&computecw0_array, sizeof(computecw0_array)));
  140. boost::asio::write(sockets1[0], boost::asio::buffer(&computecw1_array, sizeof(computecw1_array)));
  141. communication_cost += sizeof(computecw0_array);
  142. communication_cost += sizeof(computecw1_array);
  143. dpfP2 * dpf_instance0 = (dpfP2 * ) malloc (sizeof(dpfP2) * n_threads);
  144. dpfP2 * dpf_instance1 = (dpfP2 * ) malloc (sizeof(dpfP2) * n_threads);
  145. boost::asio::read(sockets0[0], boost::asio::buffer(dpf_instance0, n_threads * sizeof(dpfP2)));
  146. boost::asio::read(sockets1[0], boost::asio::buffer(dpf_instance1, n_threads * sizeof(dpfP2)));
  147. __m128i ** output0 = (__m128i ** ) malloc(sizeof(__m128i *) * thread_per_batch);
  148. int8_t ** flags0 = (int8_t ** ) malloc(sizeof(uint8_t *) * thread_per_batch);
  149. for(size_t j = 0; j < thread_per_batch; ++j)
  150. {
  151. output0[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));
  152. flags0[j] = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));
  153. }
  154. __m128i ** output1 = (__m128i ** ) malloc(sizeof(__m128i *) * thread_per_batch);
  155. int8_t ** flags1 = (int8_t ** ) malloc(sizeof(uint8_t *) * thread_per_batch);
  156. for(size_t j = 0; j < thread_per_batch; ++j)
  157. {
  158. output1[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));
  159. flags1[j] = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));
  160. }
  161. for(size_t iter = 0; iter < n_batches; ++iter)
  162. {
  163. if (n_batches > 1) {
  164. printf("Starting mpc_gen batch %lu / %lu\n", iter+1, n_batches);
  165. }
  166. boost::asio::thread_pool pool(thread_per_batch);
  167. for(size_t j = 0; j < thread_per_batch; ++j)
  168. {
  169. boost::asio::post(pool, std::bind(mpc_gen, std::ref(depth), std::ref(aeskey), db_nitems, n_threads, std::ref(sockets0), std::ref(sockets1),
  170. output0, flags0, output1, flags1, std::ref(dpf_instance0), std::ref(dpf_instance1), j, 5 * j));
  171. }
  172. pool.join();
  173. }
  174. free(dpf_instance0);
  175. free(dpf_instance1);
  176. boost::asio::thread_pool pool3(thread_per_batch);
  177. int64_t ** leaves0 = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch);
  178. int64_t ** leafbits0 = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch);
  179. int64_t ** leaves1 = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch);
  180. int64_t ** leafbits1 = (int64_t ** ) malloc(sizeof(int64_t *) * thread_per_batch);
  181. for(size_t j = 0; j < thread_per_batch; ++j)
  182. {
  183. leaves0[j] = (int64_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(int64_t));
  184. leafbits0[j] = (int64_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(int64_t));
  185. leaves1[j] = (int64_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(int64_t));
  186. leafbits1[j] = (int64_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(int64_t));
  187. }
  188. /* The function convert_sharesP2 appears in share-conversion.h */
  189. for(size_t j = 0; j < thread_per_batch; ++j)
  190. {
  191. boost::asio::post(pool3, std::bind(convert_sharesP2, db_nitems, output0, flags0, output1, flags1, leaves0, leafbits0, leaves1, leafbits1, std::ref(sockets0), std::ref(sockets1), j, j));
  192. }
  193. pool3.join();
  194. /* The function P2_xor_to_additive appears in share-conversion.h */
  195. boost::asio::thread_pool pool4(thread_per_batch);
  196. for(size_t j = 0; j < thread_per_batch; ++j)
  197. {
  198. boost::asio::post(pool4, std::bind(P2_xor_to_additive, std::ref(sockets0[j]), std::ref(sockets1[j]), j));
  199. }
  200. pool4.join();
  201. for(size_t j = 0; j < thread_per_batch; ++j)
  202. {
  203. free(leafbits0[j]);
  204. free(leafbits1[j]);
  205. free(output0[j]);
  206. free(output1[j]);
  207. }
  208. free(leafbits0);
  209. free(leafbits1);
  210. free(output1);
  211. free(output0);
  212. /* For the artifact, don't actually write these in order to not use very
  213. * large amounts of storage
  214. for(size_t i = 0; i < thread_per_batch; ++i)
  215. {
  216. P2_write_evalfull_outs_into_a_file(false, i, db_nitems, flags0[i], leaves0[i]);
  217. P2_write_evalfull_outs_into_a_file(true, i, db_nitems, flags1[i], leaves1[i]);
  218. }
  219. */
  220. for(size_t j = 0; j < thread_per_batch; ++j)
  221. {
  222. free(leaves0[j]);
  223. free(leaves1[j]);
  224. free(flags0[j]);
  225. free(flags1[j]);
  226. }
  227. free(leaves0);
  228. free(leaves1);
  229. free(flags0);
  230. free(flags1);
  231. #ifdef DEBUG
  232. for(size_t ind = 0; ind < n_threads; ++ind)
  233. {
  234. for(size_t j = 0; j < db_nitems; ++j)
  235. {
  236. if(flags0[ind][j] + flags1[ind][j] != 0)
  237. {
  238. std::cout << j << "-> " << (int) (flags0[ind][j] + flags1[ind][j]) << " = " << (int) (flags0[ind][j]) << " + " << (int) (flags1[ind][j]) << std::endl;
  239. std::cout << j << "-> " << output0[ind][j][0] << " <-> " << output1[ind][j][0] << std::endl << std::endl;
  240. }
  241. }
  242. }
  243. #endif
  244. auto end = std::chrono::steady_clock::now();
  245. std::chrono::duration<double> elapsed_seconds = end-start;
  246. //std::cout << "time to generate and evaluate " << n_threads << " dpfs of size 2^" << atoi(argv[4]) << " is: " << elapsed_seconds.count() << "s\n";
  247. std::cout << "WallClockTime: " << elapsed_seconds.count() << std::endl;
  248. std::cout << "CommunicationCost: " << communication_cost << " bytes" << std::endl;
  249. return 0;
  250. }