preprocessing.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. #include <type_traits> // std::is_same<>
  2. #include <limits> // std::numeric_limits<>
  3. #include <climits> // CHAR_BIT
  4. #include <cmath> // std::log2, std::ceil, std::floor
  5. #include <stdexcept> // std::runtime_error
  6. #include <array> // std::array<>
  7. #include <iostream> // std::istream and std::ostream
  8. #include <vector> // std::vector<>
  9. #include <memory> // std::shared_ptr<>
  10. #include <utility> // std::move
  11. #include <algorithm> // std::copy
  12. #include <cstring> // std::memcpy
  13. #include <bsd/stdlib.h> // arc4random_buf
  14. #include <x86intrin.h> // SSE and AVX intrinsics
  15. #include <boost/asio/thread_pool.hpp>
  16. #include "bitutils.h"
  17. #include "block.h"
  18. #include "prg.h"
  19. #include "prg_aes_impl.h"
  20. #include <iostream>
  21. #include <fcntl.h>
  22. #include <cstdlib>
  23. #include "block.h"
  24. #include <chrono>
  25. #include <sys/mman.h>
  26. #include <sys/stat.h>
  27. #include <fcntl.h>
  28. #include <fstream>
  29. #include <future>
  30. #include <boost/asio.hpp>
  31. using boost::asio::ip::tcp;
  32. #include <mutex>
  33. #include <boost/lexical_cast.hpp>
  34. using socket_t = boost::asio::ip::tcp::socket;
  35. typedef unsigned char byte_t;
  36. typedef __m128i node_t;
  37. block<__m128i> seed_for_blinds;
  38. constexpr size_t leaf_size = 1;
  39. typedef __m128i leaf_type;
  40. typedef std::array<leaf_type, leaf_size> leaf_t;
  41. size_t bits_per_leaf = std::is_same<leaf_t, bool>::value ? 1 : sizeof(leaf_t) * CHAR_BIT;
  42. bool is_packed = (sizeof(leaf_t) < sizeof(node_t));
  43. size_t leaves_per_node = is_packed ? sizeof(node_t) * CHAR_BIT / bits_per_leaf : 1;
  44. size_t input_bits(const size_t nitems) { return std::ceil(std::log2(nitems)); }
  45. leaf_t val;
  46. using namespace dpf;
  47. #include "mpc.h"
  48. void generate_random_targets(uint8_t **target_share_read, size_t n_threads, bool party, size_t expo)
  49. {
  50. for (size_t i = 0; i < n_threads; i++)
  51. {
  52. target_share_read[i] = new uint8_t[64];
  53. }
  54. for (size_t j = 0; j < 64; ++j)
  55. {
  56. for (size_t i = 0; i < n_threads; ++i)
  57. {
  58. srand(3);
  59. uint8_t random_value;
  60. arc4random_buf(&random_value, sizeof(uint8_t));
  61. target_share_read[i][j] = random_value; // rand();
  62. target_share_read[i][j] = 0; // target_share_read[i][j] % 2;
  63. if (party)
  64. target_share_read[i][expo - 2] = 1;
  65. }
  66. }
  67. }
  68. void compute_CW(bool party, tcp::socket &sout, __m128i L, __m128i R, uint8_t bit, __m128i &CW)
  69. {
  70. // struct cw_construction
  71. //{
  72. __m128i rand_b, gamma_b;
  73. uint8_t bit_b;
  74. //};
  75. __m128i *X, *Y;
  76. if (party)
  77. {
  78. std::string qfile = std::string("./gamma1");
  79. int qfd = open(qfile.c_str(), O_RDWR);
  80. X = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),
  81. PROT_READ, MAP_PRIVATE, qfd, 0);
  82. qfile = std::string("./x1");
  83. qfd = open(qfile.c_str(), O_RDWR);
  84. Y = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),
  85. PROT_READ, MAP_PRIVATE, qfd, 0);
  86. }
  87. if (!party)
  88. {
  89. std::string qfile = std::string("./gamma0");
  90. int qfd = open(qfile.c_str(), O_RDWR);
  91. X = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),
  92. PROT_READ, MAP_PRIVATE, qfd, 0);
  93. qfile = std::string("./x0");
  94. qfd = open(qfile.c_str(), O_RDWR);
  95. Y = (__m128i *)mmap(NULL, 8 * sizeof(__m128i),
  96. PROT_READ, MAP_PRIVATE, qfd, 0);
  97. }
  98. // cw_construction computecw;
  99. // read(sin, boost::asio::buffer(&computecw, sizeof(computecw)));
  100. // computecw.rand_b;
  101. //__m128i gamma_b = computecw.gamma_b;
  102. if (party)
  103. {
  104. rand_b = Y[0]; //_mm_set_epi32(0x6fef9434, 0x6768121e, 0x20942286, 0x1b59f7a7);
  105. gamma_b = X[0]; // _mm_set_epi32(0x6a499109 , 0x803067dd , 0xd1e2281b , 0xe71b6262);
  106. bit_b = 1; // computecw.bit_b;
  107. }
  108. else
  109. {
  110. rand_b = Y[0]; // _mm_set_epi32(0xb29747df, 0xf7300f6d, 0x9476d971, 0xd5f75d98);
  111. gamma_b = X[0]; // _mm_set_epi32(0xb73142e2 , 0x10687aae , 0x06500d3ec , 0x29b5c85d);
  112. bit_b = 1; // computecw.bit_b;
  113. }
  114. uint8_t blinded_bit, blinded_bit_read;
  115. blinded_bit = bit ^ bit_b;
  116. __m128i blinded_L = L ^ R ^ rand_b;
  117. __m128i blinded_L_read;
  118. struct BlindsCW
  119. {
  120. __m128i blinded_message;
  121. uint8_t blinded_bit;
  122. };
  123. BlindsCW blinds_sent, blinds_recv;
  124. blinds_sent.blinded_bit = blinded_bit;
  125. blinds_sent.blinded_message = blinded_L;
  126. boost::asio::write(sout, boost::asio::buffer(&blinds_sent, sizeof(blinds_sent)));
  127. boost::asio::read(sout, boost::asio::buffer(&blinds_recv, sizeof(blinds_recv)));
  128. blinded_bit_read = blinds_recv.blinded_bit;
  129. blinded_L_read = blinds_recv.blinded_message;
  130. __m128i out_ = R ^ gamma_b; //_mm_setzero_si128;
  131. if (bit)
  132. {
  133. out_ ^= (L ^ R ^ blinded_L_read);
  134. }
  135. if (blinded_bit_read)
  136. {
  137. out_ ^= rand_b;
  138. }
  139. __m128i out_reconstruction;
  140. boost::asio::write(sout, boost::asio::buffer(&out_, sizeof(out_)));
  141. boost::asio::read(sout, boost::asio::buffer(&out_reconstruction, sizeof(out_reconstruction)));
  142. out_reconstruction = out_ ^ out_reconstruction;
  143. CW = out_reconstruction;
  144. #ifdef DEBUG
  145. uint8_t bit_reconstruction;
  146. boost::asio::write(sout, boost::asio::buffer(&bit, sizeof(bit)));
  147. boost::asio::read(sout, boost::asio::buffer(&bit_reconstruction, sizeof(bit_reconstruction)));
  148. bit_reconstruction = bit ^ bit_reconstruction;
  149. __m128i L_reconstruction;
  150. boost::asio::write(sout, boost::asio::buffer(&L, sizeof(L)));
  151. boost::asio::read(sout, boost::asio::buffer(&L_reconstruction, sizeof(L_reconstruction)));
  152. L_reconstruction = L ^ L_reconstruction;
  153. __m128i R_reconstruction;
  154. boost::asio::write(sout, boost::asio::buffer(&R, sizeof(R)));
  155. boost::asio::read(sout, boost::asio::buffer(&R_reconstruction, sizeof(R_reconstruction)));
  156. R_reconstruction = R ^ R_reconstruction;
  157. __m128i CW_debug;
  158. if (bit_reconstruction != 0)
  159. {
  160. CW_debug = L_reconstruction;
  161. }
  162. else
  163. {
  164. CW_debug = R_reconstruction;
  165. }
  166. assert(CW_debug[0] == CW[0]);
  167. assert(CW_debug[1] == CW[1]);
  168. #endif
  169. }
  170. __m128i bit_mask_avx2_msb(unsigned int n)
  171. {
  172. __m128i ones = _mm_set1_epi32(-1);
  173. __m128i cnst32_128 = _mm_set_epi32(32, 64, 96, 128);
  174. __m128i shift = _mm_set1_epi32(n);
  175. shift = _mm_subs_epu16(cnst32_128, shift);
  176. return _mm_sllv_epi32(ones, shift);
  177. }
  178. __m128i bit_mask_avx2_lsb(unsigned int n)
  179. {
  180. __m128i ones = _mm_set1_epi32(-1);
  181. __m128i cnst32_128 = _mm_set_epi32(128, 96, 64, 32);
  182. __m128i shift = _mm_set1_epi32(n);
  183. shift = _mm_subs_epu16(cnst32_128, shift);
  184. return _mm_srlv_epi32(ones, shift);
  185. }
  186. template <typename node_t, typename prgkey_t>
  187. static inline void traverse(const prgkey_t &prgkey, const node_t &seed, node_t s[2])
  188. {
  189. dpf::PRG(prgkey, clear_lsb(seed, 0b11), s, 2);
  190. } // dpf::expand
  191. inline void evalfull_mpc(const size_t &nodes_per_leaf, const size_t &depth, const size_t &nbits, const size_t &nodes_in_interval,
  192. const AES_KEY &prgkey, uint8_t target_share[64], std::vector<socket_t> &socketsPb,
  193. const size_t from, const size_t to, __m128i *output, int8_t *_t, __m128i &final_correction_word, bool party, size_t socket_no = 0)
  194. {
  195. __m128i root;
  196. arc4random_buf(&root, sizeof(root));
  197. root = set_lsb(root, party);
  198. const size_t from_node = std::floor(static_cast<double>(from) / nodes_per_leaf);
  199. __m128i *s[2] = {
  200. reinterpret_cast<__m128i *>(output) + nodes_in_interval * (nodes_per_leaf - 1),
  201. s[0] + nodes_in_interval / 2};
  202. int8_t *t[2] = {_t, _t + nodes_in_interval / 2};
  203. int curlayer = depth % 2;
  204. s[curlayer][0] = root;
  205. t[curlayer][0] = get_lsb(root, 0b01);
  206. __m128i *CW = (__m128i *)std::aligned_alloc(sizeof(__m256i), depth * sizeof(__m128i));
  207. for (size_t layer = 0; layer < depth; ++layer)
  208. {
  209. #ifdef VERBOSE
  210. printf("layer = %zu\n", layer);
  211. #endif
  212. curlayer = 1 - curlayer;
  213. size_t i = 0, j = 0;
  214. auto nextbit = (from_node >> (nbits - layer - 1)) & 1;
  215. size_t nodes_in_prev_layer = std::ceil(static_cast<double>(nodes_in_interval) / (1ULL << (depth - layer)));
  216. size_t nodes_in_cur_layer = std::ceil(static_cast<double>(nodes_in_interval) / (1ULL << (depth - layer - 1)));
  217. __m128i L = _mm_setzero_si128();
  218. __m128i R = _mm_setzero_si128();
  219. for (i = nextbit, j = nextbit; j < nodes_in_prev_layer - 1; ++j, i += 2)
  220. {
  221. traverse(prgkey, s[1 - curlayer][j], &s[curlayer][i]);
  222. L ^= s[curlayer][i];
  223. R ^= s[curlayer][i + 1];
  224. }
  225. if (nodes_in_prev_layer > j)
  226. {
  227. if (i < nodes_in_cur_layer - 1)
  228. {
  229. traverse(prgkey, s[1 - curlayer][j], &s[curlayer][i]);
  230. L ^= s[curlayer][i];
  231. R ^= s[curlayer][i + 1];
  232. }
  233. }
  234. compute_CW(party, socketsPb[socket_no], L, R, target_share[layer], CW[layer]);
  235. uint8_t advice_L = get_lsb(L) ^ target_share[layer];
  236. uint8_t advice_R = get_lsb(R) ^ target_share[layer];
  237. uint8_t cwt_L, cwt_R;
  238. uint8_t advice[2];
  239. uint8_t cwts[2];
  240. advice[0] = advice_L;
  241. advice[1] = advice_R;
  242. boost::asio::write(socketsPb[socket_no + 1], boost::asio::buffer(&advice, sizeof(advice)));
  243. boost::asio::read(socketsPb[socket_no + 1], boost::asio::buffer(&cwts, sizeof(cwts)));
  244. cwt_L = cwts[0];
  245. cwt_R = cwts[1];
  246. cwt_L = cwt_L ^ advice_L ^ 1;
  247. cwt_R = cwt_R ^ advice_R;
  248. for (size_t j = 0; j < nodes_in_prev_layer; ++j)
  249. {
  250. t[curlayer][2 * j] = get_lsb(s[curlayer][2 * j]) ^ (cwt_L & t[1 - curlayer][j]);
  251. s[curlayer][2 * j] = clear_lsb(xor_if(s[curlayer][2 * j], CW[layer], !t[1 - curlayer][j]), 0b11);
  252. t[curlayer][(2 * j) + 1] = get_lsb(s[curlayer][(2 * j) + 1]) ^ (cwt_R & t[1 - curlayer][j]);
  253. s[curlayer][(2 * j) + 1] = clear_lsb(xor_if(s[curlayer][(2 * j) + 1], CW[layer], !t[1 - curlayer][j]), 0b11);
  254. }
  255. }
  256. __m128i Gamma = _mm_setzero_si128();
  257. for (size_t i = 0; i < to + 1; ++i)
  258. {
  259. Gamma[0] += output[i][0];
  260. Gamma[1] += output[i][1];
  261. }
  262. if (party)
  263. {
  264. Gamma[0] = -Gamma[0];
  265. Gamma[1] = -Gamma[1];
  266. }
  267. boost::asio::write(socketsPb[socket_no + 3], boost::asio::buffer(&Gamma, sizeof(Gamma)));
  268. boost::asio::read(socketsPb[socket_no + 3], boost::asio::buffer(&final_correction_word, sizeof(final_correction_word)));
  269. final_correction_word = Gamma; // final_correction_word + Gamma;
  270. } // dpf::__evalinterval
  271. void convert_shares(__m128i **output, int8_t **flags, size_t n_threads, size_t db_nitems, __m128i *final_correction_word, tcp::socket &sb, bool party)
  272. {
  273. for (size_t j = 0; j < db_nitems; ++j)
  274. {
  275. for (size_t k = 0; k < n_threads; ++k)
  276. {
  277. if (party)
  278. {
  279. output[k][j] = -output[k][j];
  280. flags[k][j] = -flags[k][j];
  281. }
  282. }
  283. #ifdef DEBUG
  284. int8_t out = flags[0][j];
  285. int8_t out_rec;
  286. boost::asio::write(sb, boost::asio::buffer(&out, sizeof(out)));
  287. boost::asio::read(sb, boost::asio::buffer(&out_rec, sizeof(out_rec)));
  288. out_rec = out_rec + out;
  289. if (out_rec != 0)
  290. std::cout << j << "(flags) --> " << (int)out_rec << std::endl
  291. << std::endl;
  292. __m128i out2 = output[0][j];
  293. __m128i out_rec2;
  294. boost::asio::write(sb, boost::asio::buffer(&out2, sizeof(out2)));
  295. boost::asio::read(sb, boost::asio::buffer(&out_rec2, sizeof(out_rec2)));
  296. out_rec2 = out_rec2 + out2;
  297. if (out_rec2[0] != 0)
  298. std::cout << j << "--> " << out_rec2[0] << std::endl;
  299. #endif
  300. }
  301. for (size_t i = 0; i < n_threads; ++i)
  302. {
  303. int64_t pm = 0;
  304. int64_t rb;
  305. arc4random_buf(&rb, sizeof(rb));
  306. for (size_t j = 0; j < db_nitems; ++j)
  307. {
  308. if (party)
  309. {
  310. if (flags[i][j] != 0)
  311. pm -= 1;
  312. }
  313. if (!party)
  314. {
  315. if (flags[i][j] != 0)
  316. pm += 1; // flags[0][j];
  317. }
  318. }
  319. }
  320. }
  321. void accept_conncections_from_Pb(boost::asio::io_context &io_context, std::vector<socket_t> &socketsPb, int port, size_t j)
  322. {
  323. tcp::acceptor acceptor_a(io_context, tcp::endpoint(tcp::v4(), port));
  324. tcp::socket sb_a(acceptor_a.accept());
  325. socketsPb[j] = std::move(sb_a);
  326. }
  327. int main(int argc, char *argv[])
  328. {
  329. boost::asio::io_context io_context;
  330. tcp::resolver resolver(io_context);
  331. const std::string host1 = argv[1];
  332. const size_t n_threads = atoi(argv[2]);
  333. const size_t number_of_sockets = 5 * n_threads;
  334. const size_t expo = atoi(argv[3]);
  335. const size_t db_nitems = 1ULL << expo;
  336. std::vector<socket_t> socketsPb;
  337. for (size_t j = 0; j < number_of_sockets + 1; ++j)
  338. {
  339. tcp::socket emptysocket(io_context);
  340. socketsPb.emplace_back(std::move(emptysocket));
  341. }
  342. socketsPb.reserve(number_of_sockets + 1);
  343. std::vector<int> ports;
  344. for (size_t j = 0; j < number_of_sockets; ++j)
  345. {
  346. int port = 6000;
  347. ports.push_back(port + j);
  348. }
  349. std::vector<int> ports2_0;
  350. for (size_t j = 0; j < number_of_sockets; ++j)
  351. {
  352. int port = 20000;
  353. ports2_0.push_back(port + j);
  354. }
  355. std::vector<int> ports2_1;
  356. for (size_t j = 0; j < number_of_sockets; ++j)
  357. {
  358. int port = 40000;
  359. ports2_1.push_back(port + j);
  360. }
  361. bool party;
  362. #if (PARTY == 0)
  363. party = false;
  364. for (size_t j = 0; j < number_of_sockets; ++j)
  365. {
  366. tcp::socket sb_a(io_context);
  367. boost::asio::connect(sb_a, resolver.resolve({host1, std::to_string(ports[j])}));
  368. socketsPb[j] = std::move(sb_a);
  369. }
  370. #else
  371. party = true;
  372. boost::asio::thread_pool pool2(number_of_sockets);
  373. for (size_t j = 0; j < number_of_sockets; ++j)
  374. {
  375. boost::asio::post(pool2, std::bind(accept_conncections_from_Pb, std::ref(io_context), std::ref(socketsPb), ports[j], j));
  376. }
  377. pool2.join();
  378. #endif
  379. __m128i *final_correction_word = (__m128i *)std::aligned_alloc(sizeof(__m256i), n_threads * sizeof(__m128i));
  380. AES_KEY aeskey;
  381. __m128i **output = (__m128i **)malloc(sizeof(__m128i *) * n_threads);
  382. int8_t **flags = (int8_t **)malloc(sizeof(uint8_t *) * n_threads);
  383. for (size_t j = 0; j < n_threads; ++j)
  384. {
  385. output[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i));
  386. flags[j] = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t));
  387. }
  388. const size_t bits_per_leaf = std::is_same<leaf_t, bool>::value ? 1 : sizeof(leaf_t) * CHAR_BIT;
  389. const bool is_packed = (sizeof(leaf_t) < sizeof(node_t));
  390. const size_t nodes_per_leaf = is_packed ? 1 : std::ceil(static_cast<double>(bits_per_leaf) / (sizeof(node_t) * CHAR_BIT));
  391. const size_t depth = std::ceil(std::log2(db_nitems));
  392. const size_t nbits = std::ceil(std::log2(db_nitems));
  393. const size_t nodes_in_interval = db_nitems - 1;
  394. boost::asio::thread_pool pool(n_threads);
  395. #ifdef VERBOSE
  396. printf("n_threads = %zu\n\n", n_threads);
  397. #endif
  398. auto start = std::chrono::steady_clock::now();
  399. uint8_t **target_share_read = new uint8_t *[n_threads];
  400. generate_random_targets(target_share_read, n_threads, party, expo);
  401. for (size_t j = 0; j < n_threads; ++j)
  402. {
  403. boost::asio::post(pool, std::bind(evalfull_mpc, std::ref(nodes_per_leaf), std::ref(depth), std::ref(nbits), std::ref(nodes_in_interval),
  404. std::ref(aeskey), target_share_read[j], std::ref(socketsPb), 0, db_nitems - 1, output[j],
  405. flags[j], std::ref(final_correction_word[j]), party, 5 * j));
  406. }
  407. pool.join();
  408. auto end = std::chrono::steady_clock::now();
  409. std::chrono::duration<double> elapsed_seconds = end - start;
  410. std::cout << "time to generate and evaluate " << n_threads << " dpfs of size 2^" << expo << " is: " << elapsed_seconds.count() << "s\n";
  411. convert_shares(output, flags, n_threads, db_nitems, final_correction_word, socketsPb[0], party);
  412. if (!party)
  413. {
  414. char const *p0_filename0;
  415. p0_filename0 = "../duoram-online/preprocflags/party0_read_flags_b";
  416. int w0 = open(p0_filename0, O_WRONLY | O_CREAT, S_IWRITE | S_IREAD);
  417. int written = write(w0, flags[0], db_nitems * sizeof(flags[0][0]));
  418. if (written < 0) perror("Write error");
  419. close(w0);
  420. }
  421. else
  422. {
  423. char const *p0_filename0;
  424. p0_filename0 = "../duoram-online/preprocflags/party1_read_flags_b";
  425. int w0 = open(p0_filename0, O_WRONLY | O_CREAT, S_IWRITE | S_IREAD);
  426. int written = write(w0, flags[0], db_nitems * sizeof(flags[0][0]));
  427. if (written < 0) perror("Write error");
  428. close(w0);
  429. }
  430. return 0;
  431. }