#include // std::is_same<> #include // std::numeric_limits<> #include // CHAR_BIT #include // std::log2, std::ceil, std::floor #include // std::runtime_error #include // std::array<> #include // std::istream and std::ostream #include // std::vector<> #include // std::shared_ptr<> #include // std::move #include // std::copy #include // std::memcpy #include // arc4random_buf #include // SSE and AVX intrinsics #include #include "bitutils.h" #include "block.h" #include "prg.h" #include "prg_aes_impl.h" #include #include #include #include "block.h" #include #include #include #include #include #include #include using boost::asio::ip::tcp; #include #include using socket_t = boost::asio::ip::tcp::socket; typedef unsigned char byte_t; typedef __m128i node_t; block<__m128i> seed_for_blinds; constexpr size_t leaf_size = 1; typedef __m128i leaf_type; typedef std::array leaf_t; size_t bits_per_leaf = std::is_same::value ? 1 : sizeof(leaf_t) * CHAR_BIT; bool is_packed = (sizeof(leaf_t) < sizeof(node_t)); size_t leaves_per_node = is_packed ? sizeof(node_t) * CHAR_BIT / bits_per_leaf : 1; size_t input_bits(const size_t nitems) { return std::ceil(std::log2(nitems)); } leaf_t val; using namespace dpf; #include "mpc.h" void compute_CW(bool party, tcp::socket& sout, __m128i L, __m128i R, uint8_t bit, __m128i & CW) { //struct cw_construction //{ __m128i rand_b, gamma_b; uint8_t bit_b; //}; //cw_construction computecw; // read(sin, boost::asio::buffer(&computecw, sizeof(computecw))); //computecw.rand_b; //__m128i gamma_b = computecw.gamma_b; if(party) { rand_b = _mm_set_epi32(0x6fef9434, 0x6768121e, 0x20942286, 0x1b59f7a7); gamma_b = _mm_set_epi32(0x6a499109 , 0x803067dd , 0xd1e2281b , 0xe71b6262); bit_b = 1;// computecw.bit_b; } else { rand_b = _mm_set_epi32(0xb29747df, 0xf7300f6d, 0x9476d971, 0xd5f75d98); gamma_b = _mm_set_epi32(0xb73142e2 , 0x10687aae , 0x06500d3ec , 0x29b5c85d); bit_b = 1;// computecw.bit_b; } // #ifdef DEBUG // __m128i rand_b2, gamma_b2; // uint8_t bit_b2; // read(sin, boost::asio::buffer(&rand_b2, sizeof(rand_b))); // read(sin, boost::asio::buffer(&gamma_b2, sizeof(gamma_b))); // read(sin, boost::asio::buffer(&bit_b2, sizeof(bit_b))); // assert(rand_b2[0] == rand_b[0]); // assert(rand_b2[1] == rand_b[1]); // assert(gamma_b2[0] == gamma_b[0]); // assert(gamma_b2[1] == gamma_b[1]); // assert(bit_b2 == bit_b); // #endif uint8_t blinded_bit, blinded_bit_read; blinded_bit = bit ^ bit_b; __m128i blinded_L = L ^ R ^ rand_b; __m128i blinded_L_read; struct BlindsCW { __m128i blinded_message; uint8_t blinded_bit; }; BlindsCW blinds_sent, blinds_recv; blinds_sent.blinded_bit = blinded_bit; blinds_sent.blinded_message = blinded_L; boost::asio::write(sout, boost::asio::buffer(&blinds_sent, sizeof(blinds_sent))); boost::asio::read(sout, boost::asio::buffer(&blinds_recv, sizeof(blinds_recv))); blinded_bit_read = blinds_recv.blinded_bit; blinded_L_read = blinds_recv.blinded_message; __m128i out_ = R ^ gamma_b;//_mm_setzero_si128; if(bit) { out_ ^= (L ^ R ^ blinded_L_read); } if(blinded_bit_read) { out_ ^= rand_b; } __m128i out_reconstruction; boost::asio::write(sout, boost::asio::buffer(&out_, sizeof(out_))); boost::asio::read(sout, boost::asio::buffer(&out_reconstruction, sizeof(out_reconstruction))); out_reconstruction = out_ ^ out_reconstruction; CW = out_reconstruction; // #ifdef DEBUG // uint8_t bit_reconstruction; // boost::asio::write(sout, boost::asio::buffer(&bit, sizeof(bit))); // boost::asio::read(sout, boost::asio::buffer(&bit_reconstruction, sizeof(bit_reconstruction))); // bit_reconstruction = bit ^ bit_reconstruction; // __m128i L_reconstruction; // boost::asio::write(sout, boost::asio::buffer(&L, sizeof(L))); // boost::asio::read(sout, boost::asio::buffer(&L_reconstruction, sizeof(L_reconstruction))); // L_reconstruction = L ^ L_reconstruction; // __m128i R_reconstruction; // boost::asio::write(sout, boost::asio::buffer(&R, sizeof(R))); // boost::asio::read(sout, boost::asio::buffer(&R_reconstruction, sizeof(R_reconstruction))); // R_reconstruction = R ^ R_reconstruction; // __m128i CW_debug; // if(bit_reconstruction != 0) // { // CW_debug = L_reconstruction; // } // else // { // CW_debug = R_reconstruction; // } // assert(CW_debug[0] == CW[0]); // assert(CW_debug[1] == CW[1]); // #endif } __m128i bit_mask_avx2_msb(unsigned int n) { __m128i ones = _mm_set1_epi32(-1); __m128i cnst32_128 = _mm_set_epi32(32,64,96,128); __m128i shift = _mm_set1_epi32(n); shift = _mm_subs_epu16(cnst32_128,shift); return _mm_sllv_epi32(ones,shift); } __m128i bit_mask_avx2_lsb(unsigned int n) { __m128i ones = _mm_set1_epi32(-1); __m128i cnst32_128 = _mm_set_epi32(128,96,64,32); __m128i shift = _mm_set1_epi32(n); shift = _mm_subs_epu16(cnst32_128,shift); return _mm_srlv_epi32(ones,shift); } template static inline void traverse(const prgkey_t & prgkey, const node_t & seed, node_t s[2]) { dpf::PRG(prgkey, clear_lsb(seed, 0b11), s, 2); } // dpf::expand inline void evalfull_mpc(const size_t& nodes_per_leaf, const size_t& depth, const size_t& nbits, const size_t& nodes_in_interval, const AES_KEY& prgkey, uint8_t target_share[64], std::vector& socketsPb, const size_t from, const size_t to, __m128i * output, int8_t * _t, __m128i& final_correction_word, bool party, size_t socket_no = 0) { __m128i root; arc4random_buf(&root, sizeof(root)); root = set_lsb(root, party); const size_t from_node = std::floor(static_cast(from) / nodes_per_leaf); __m128i * s[2] = { reinterpret_cast<__m128i *>(output) + nodes_in_interval * (nodes_per_leaf - 1), s[0] + nodes_in_interval / 2 }; int8_t * t[2] = { _t, _t + nodes_in_interval / 2}; int curlayer = depth % 2; s[curlayer][0] = root; t[curlayer][0] = get_lsb(root, 0b01); __m128i * CW = (__m128i *) std::aligned_alloc(sizeof(__m256i), depth * sizeof(__m128i)); for (size_t layer = 0; layer < depth; ++layer) { #ifdef VERBOSE printf("layer = %zu\n", layer); #endif curlayer = 1-curlayer; size_t i=0, j=0; auto nextbit = (from_node >> (nbits-layer-1)) & 1; size_t nodes_in_prev_layer = std::ceil(static_cast(nodes_in_interval) / (1ULL << (depth-layer))); size_t nodes_in_cur_layer = std::ceil(static_cast(nodes_in_interval) / (1ULL << (depth-layer-1))); __m128i L = _mm_setzero_si128(); __m128i R = _mm_setzero_si128(); for (i = nextbit, j = nextbit; j < nodes_in_prev_layer-1; ++j, i+=2) { traverse(prgkey, s[1-curlayer][j], &s[curlayer][i]); L ^= s[curlayer][i]; R ^= s[curlayer][i+1]; } if (nodes_in_prev_layer > j) { if (i < nodes_in_cur_layer - 1) { traverse(prgkey, s[1-curlayer][j], &s[curlayer][i]); L ^= s[curlayer][i]; R ^= s[curlayer][i+1]; } } compute_CW(party, socketsPb[socket_no], L, R, target_share[layer], CW[layer]); uint8_t advice_L = get_lsb(L) ^ target_share[layer]; uint8_t advice_R = get_lsb(R) ^ target_share[layer]; uint8_t cwt_L, cwt_R; uint8_t advice[2]; uint8_t cwts[2]; advice[0] = advice_L; advice[1] = advice_R; boost::asio::write(socketsPb[socket_no+1], boost::asio::buffer(&advice, sizeof(advice))); boost::asio::read(socketsPb[socket_no+1], boost::asio::buffer(&cwts, sizeof(cwts))); cwt_L = cwts[0]; cwt_R = cwts[1]; cwt_L = cwt_L ^ advice_L ^ 1; cwt_R = cwt_R ^ advice_R; for(size_t j = 0; j < nodes_in_prev_layer; ++j) { t[curlayer][2*j] = get_lsb(s[curlayer][2*j]) ^ (cwt_L & t[1-curlayer][j]); s[curlayer][2*j] = clear_lsb(xor_if(s[curlayer][2*j], CW[layer], !t[1-curlayer][j]), 0b11); t[curlayer][(2*j)+1] = get_lsb(s[curlayer][(2*j)+1]) ^ (cwt_R & t[1-curlayer][j]); s[curlayer][(2*j)+1] = clear_lsb(xor_if(s[curlayer][(2*j)+1], CW[layer], !t[1-curlayer][j]), 0b11); } } __m128i Gamma = _mm_setzero_si128(); for (size_t i = 0; i < to + 1; ++i) { Gamma[0] += output[i][0]; Gamma[1] += output[i][1]; } if(party) { Gamma[0] = -Gamma[0]; Gamma[1] = -Gamma[1]; } boost::asio::write(socketsPb[socket_no + 3], boost::asio::buffer(&Gamma, sizeof(Gamma))); boost::asio::read(socketsPb[socket_no + 3], boost::asio::buffer(&final_correction_word, sizeof(final_correction_word))); final_correction_word = Gamma; // final_correction_word + Gamma; } // dpf::__evalinterval void convert_shares(__m128i ** output, int8_t ** flags, size_t n_threads, size_t db_nitems, __m128i * final_correction_word, tcp::socket& sb, bool party) { for(size_t j = 0; j < db_nitems; ++j) { for(size_t k = 0; k < n_threads; ++k) { if(party) { output[k][j] = -output[k][j]; flags[k][j] = -flags[k][j]; } } //#ifdef DEBUG int8_t out = flags[0][j]; int8_t out_rec; boost::asio::write(sb, boost::asio::buffer(&out, sizeof(out))); boost::asio::read(sb, boost::asio::buffer(&out_rec, sizeof(out_rec))); out_rec = out_rec + out; if(out_rec != 0) std::cout << j << "(flags) --> " << (int) out_rec << std::endl << std::endl; __m128i out2 = output[0][j]; __m128i out_rec2; boost::asio::write(sb, boost::asio::buffer(&out2, sizeof(out2))); boost::asio::read(sb, boost::asio::buffer(&out_rec2, sizeof(out_rec2))); out_rec2 = out_rec2 + out2; if(out_rec2[0] != 0)std::cout << j << "--> " << out_rec2[0] << std::endl; //#endif } for(size_t i = 0; i < n_threads; ++i) { int64_t pm = 0; int64_t rb; arc4random_buf(&rb, sizeof(rb)); for(size_t j = 0; j < db_nitems; ++j) { if(party) { if(flags[i][j] != 0) pm -= 1; } if(!party) { if(flags[i][j] != 0) pm += 1;//flags[0][j]; } } // int64_t rp_prime; // rb_prime = du_attalah_Pb(rb, pm, s2, sb); // int64_t FCWshare = du_attalah_Pb(final_correction_word[i][1] + rb_prime, pm, s2, sb); // int64_t FCWshare_reconstruction; // boost::asio::write(sb, boost::asio::buffer(&FCWshare, sizeof(FCWshare))); // boost::asio::read(sb, boost::asio::buffer(&FCWshare_reconstruction, sizeof(FCWshare_reconstruction))); // FCWshare_reconstruction = FCWshare_reconstruction + FCWshare; // int64_t PM = pm + rb; // int64_t PM_recv; // boost::asio::write(sb, boost::asio::buffer(&PM, sizeof(PM))); // boost::asio::read(sb, boost::asio::buffer(&PM_recv, sizeof(PM_recv))); // int64_t * flags_ = (int64_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(int64_t)); // for(size_t j = 0; j < db_nitems; ++j) // { // flags_[j] = (flags[i][j] * pm) + (flags[i][j] * PM_recv) + (flags[i][j] * rb); // #ifdef DEBUG // // int64_t flags_rec; // // boost::asio::write(sb, boost::asio::buffer(&flags_[j], sizeof(flags_[j]))); // // boost::asio::read(sb, boost::asio::buffer(&flags_rec, sizeof(flags_rec))); // // flags_rec = flags_rec + flags_[j]; // // if(flags_rec != 0) std::cout << "intermediate value = " << flags_rec << std::endl; // #endif // } // for(size_t j = 0; j < db_nitems; ++j) // { // flags_[j] += output[i][j][1]; // if(!party) // { // if(flags[i][j] != 0) flags_[j] -= FCWshare_reconstruction;//(rb_reconstruction) + rbpm_fcw; // } // if(party) // { // if(flags[i][j] != 0) flags_[j] += FCWshare_reconstruction;// (rb_reconstruction) + rbpm_fcw; // } // #ifdef DEBUG // int64_t flags_rec; // boost::asio::write(sb, boost::asio::buffer(&flags_[j], sizeof(flags_[j]))); // boost::asio::read(sb, boost::asio::buffer(&flags_rec, sizeof(flags_rec))); // flags_rec = flags_rec + flags_[j]; // if(flags_rec != 0) // { // printf("flag reconstruction = %ld\n", flags_rec); // } // #endif // } //std::cout << std::endl << std::endl << " ------------------------------------------------------------------------------------------ " << std::endl << std::endl; } } void accept_conncections_from_Pb(boost::asio::io_context&io_context, std::vector& socketsPb, int port, size_t j) { tcp::acceptor acceptor_a(io_context, tcp::endpoint(tcp::v4(), port)); tcp::socket sb_a(acceptor_a.accept()); socketsPb[j] = std::move(sb_a); } int main(int argc, char * argv[]) { boost::asio::io_context io_context; tcp::resolver resolver(io_context); std::string addr = "127.0.0.1"; const std::string host1 = (argc < 2) ? "127.0.0.1" : argv[1]; const std::string host2 = (argc < 3) ? "127.0.0.1" : argv[2]; const size_t n_threads = atoi(argv[3]); const size_t number_of_sockets = 5 * n_threads; std::vector socketsPb; for(size_t j = 0; j < number_of_sockets + 1; ++j) { tcp::socket emptysocket(io_context); socketsPb.emplace_back(std::move(emptysocket)); } socketsPb.reserve(number_of_sockets + 1); //std::vector socketsP2; std::vector ports; for(size_t j = 0; j < number_of_sockets; ++j) { int port = 6000; ports.push_back(port + j); } std::vector ports2_0; for(size_t j = 0; j < number_of_sockets; ++j) { int port = 20000; ports2_0.push_back(port + j); } std::vector ports2_1; for(size_t j = 0; j < number_of_sockets; ++j) { int port = 40000; ports2_1.push_back(port + j); } bool party; #if (PARTY == 0) party = false; // for(size_t j = 0; j < number_of_sockets; ++j) // { // tcp::socket sb_a(io_context); // boost::asio::connect(sb_a, resolver.resolve({host2, std::to_string(ports2_0[j])})); // socketsP2.emplace_back(std::move(sb_a)); // } for(size_t j = 0; j < number_of_sockets; ++j) { tcp::socket sb_a(io_context); boost::asio::connect(sb_a, resolver.resolve({host1, std::to_string(ports[j])})); socketsPb[j] = std::move(sb_a); } #else party = true; for(size_t j = 0; j < number_of_sockets; ++j) { // tcp::socket sb_a(io_context); // boost::asio::connect(sb_a, resolver.resolve({host2, std::to_string(ports2_1[j])})); // socketsP2.emplace_back(std::move(sb_a)); } boost::asio::thread_pool pool2(number_of_sockets); for(size_t j = 0; j < number_of_sockets; ++j) { boost::asio::post(pool2, std::bind(accept_conncections_from_Pb, std::ref(io_context), std::ref(socketsPb), ports[j], j)); } pool2.join(); #endif const size_t db_nitems = 1ULL << atoi(argv[4]); const size_t n_writes = atoi(argv[5]); const size_t n_reads = atoi(argv[6]); std::cout << "n_reads = " << n_reads << std::endl; std::cout << "n_writes = " << n_writes << std::endl; __m128i * final_correction_word = (__m128i *) std::aligned_alloc(sizeof(__m256i), n_threads * sizeof(__m128i)); uint8_t target_share[64]; int** target_share_written = new int*[n_writes]; for(size_t i = 0; i < n_writes; i++) { target_share_written[i] = new int[64]; } int** target_share_read = new int*[n_reads]; for(size_t i = 0; i < n_reads; i++) { target_share_read[i] = new int[64]; } for(size_t j = 0; j < 64; ++j) { target_share[j] = rand(); target_share[j] = target_share[j] % 2; for(size_t i = 0; i < n_writes; ++i) { srand(2); target_share_written[i][j] = rand(); target_share_written[i][j] = target_share_written[i][j] % 2; } for(size_t i = 0; i < n_reads; ++i) { srand(3); target_share_read[i][j] = rand(); target_share_read[i][j] = target_share_read[i][j] % 2; } } AES_KEY aeskey; __m128i ** output = (__m128i ** ) malloc(sizeof(__m128i *) * n_threads); int8_t ** flags = (int8_t ** ) malloc(sizeof(uint8_t *) * n_threads); for(size_t j = 0; j < n_threads; ++j) { output[j] = (__m128i *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(__m128i)); flags[j] = (int8_t *)std::aligned_alloc(sizeof(node_t), db_nitems * sizeof(uint8_t)); } const size_t bits_per_leaf = std::is_same::value ? 1 : sizeof(leaf_t) * CHAR_BIT; const bool is_packed = (sizeof(leaf_t) < sizeof(node_t)); const size_t nodes_per_leaf = is_packed ? 1 : std::ceil(static_cast(bits_per_leaf) / (sizeof(node_t) * CHAR_BIT)); const size_t depth = std::ceil(std::log2(db_nitems)); const size_t nbits = std::ceil(std::log2(db_nitems)); const size_t nodes_in_interval = db_nitems-1; boost::asio::thread_pool pool(n_threads); printf("n_threads = %zu\n\n", n_threads); auto start = std::chrono::steady_clock::now(); for(size_t j = 0; j < n_threads; ++j) { boost::asio::post(pool, std::bind(evalfull_mpc, std::ref(nodes_per_leaf), std::ref(depth), std::ref(nbits), std::ref(nodes_in_interval), std::ref(aeskey), target_share, std::ref(socketsPb), 0, db_nitems-1, output[j], flags[j], std::ref(final_correction_word[j]), party, 5 * j)); } pool.join(); auto end = std::chrono::steady_clock::now(); std::chrono::duration elapsed_seconds = end-start; std::cout << "time to generate and evaluate " << n_threads << " dpfs of size 2^" << atoi(argv[4]) << " is: " << elapsed_seconds.count() << "s\n"; convert_shares(output, flags, n_threads, db_nitems ,final_correction_word, socketsPb[0], party); if(!party) { char const * p0_filename0; p0_filename0 = "party0_read_flags_b"; int w0 = open( p0_filename0, O_WRONLY | O_CREAT, S_IWRITE | S_IREAD); int written = write(w0, flags[0], db_nitems * sizeof(flags[0][0])); if(written<0) { perror("Write error"); } close(w0); } else { char const * p0_filename0; p0_filename0 = "party1_read_flags_b"; int w0 = open( p0_filename0, O_WRONLY | O_CREAT, S_IWRITE | S_IREAD); int written = write(w0, flags[0], db_nitems * sizeof(flags[0][0])); if(written<0) { perror("Write error"); } close(w0); } return 0; }