#include // arc4random_buf #include "rdpf.hpp" #include "bitutils.hpp" #include "mpcops.hpp" // Compute the multiplicative inverse of x mod 2^{VALUE_BITS} // This is the same as computing x to the power of // 2^{VALUE_BITS-1}-1. static value_t inverse_value_t(value_t x) { int expon = 1; value_t xe = x; // Invariant: xe = x^(2^expon - 1) mod 2^{VALUE_BITS} // Goal: compute x^(2^{VALUE_BITS-1} - 1) while (expon < VALUE_BITS-1) { xe = xe * xe * x; ++expon; } return xe; } #undef RDPF_MTGEN_TIMING_1 #ifdef RDPF_MTGEN_TIMING_1 // Timing tests for multithreaded generation of RDPFs // nthreads = 0 to not launch threads at all // run for num_iters iterations, output the number of millisections // total for all of the iterations // // Results: roughly 50 µs to launch the thread pool with 1 thread, and // roughly 30 additional µs for each additional thread. Each iteration // of the inner loop takes about 4 to 5 ns. This works out to around // level 19 where it starts being worth it to multithread, and you // should use at most sqrt(2^{level}/6000) threads. static void mtgen_timetest_1(nbits_t level, int nthreads, size_t num_iters, const DPFnode *curlevel, DPFnode *nextlevel, size_t &aes_ops) { if (num_iters == 0) { num_iters = 1; } size_t prev_aes_ops = aes_ops; DPFnode L = _mm_setzero_si128(); DPFnode R = _mm_setzero_si128(); // The tweak causes us to compute something slightly different every // iteration of the loop, so that the compiler doesn't notice we're // doing the same thing num_iters times and optimize it away DPFnode tweak = _mm_setzero_si128(); auto start = boost::chrono::steady_clock::now(); for(size_t iter=0;iter(elapsed) << " " << (aes_ops-prev_aes_ops) << " AES\n"; dump_node(L); dump_node(R); } #endif // Construct a DPF with the given (XOR-shared) target location, and // of the given depth, to be used for random-access memory reads and // writes. The DPF is construction collaboratively by P0 and P1, // with the server P2 helping by providing various kinds of // correlated randomness, such as MultTriples and AndTriples. // // This algorithm is based on Appendix C from the Duoram paper, with a // small optimization noted below. RDPF::RDPF(MPCTIO &tio, yield_t &yield, RegXS target, nbits_t depth, bool save_expansion) { int player = tio.player(); size_t &aes_ops = tio.aes_ops(); // Choose a random seed arc4random_buf(&seed, sizeof(seed)); // Ensure the flag bits (the lsb of each node) are different seed = set_lsb(seed, !!player); cfbits = 0; whichhalf = (player == 1); // The root level is just the seed nbits_t level = 0; DPFnode *curlevel = NULL; DPFnode *nextlevel = new DPFnode[1]; nextlevel[0] = seed; // Construct each intermediate level while(level < depth) { if (player < 2) { delete[] curlevel; curlevel = nextlevel; if (save_expansion && level == depth-1) { expansion.resize(1<>level, curlevel, nextlevel, aes_ops); size_t niters = 2048; if (level > 8) niters = (1<<20)>>level; for(int t=1;t<=8;++t) { mtgen_timetest_1(level, t, niters, curlevel, nextlevel, aes_ops); } mtgen_timetest_1(level, 0, (1<<23)>>level, curlevel, nextlevel, aes_ops); } #endif // Using the timing results gathered above, decide whether // to multithread, and if so, how many threads to use. // tio.cpu_nthreads() is the maximum number we have // available. int max_nthreads = tio.cpu_nthreads(); if (max_nthreads == 1 || level < 19) { // No threading size_t laes_ops = 0; for(size_t i=0;i max_nthreads) { nthreads = max_nthreads; } DPFnode tL[nthreads]; DPFnode tR[nthreads]; size_t taes_ops[nthreads]; size_t threadstart = 0; size_t threadchunk = curlevel_size / nthreads; size_t threadextra = curlevel_size % nthreads; boost::asio::thread_pool pool(nthreads); for (int t=0;t max_nthreads) { nthreads = max_nthreads; } size_t threadstart = 0; size_t threadchunk = curlevel_size / nthreads; size_t threadextra = curlevel_size % nthreads; boost::asio::thread_pool pool(nthreads); for (int t=0;t max_nthreads) { nthreads = max_nthreads; } value_t tlow_sum[nthreads]; value_t thigh_sum[nthreads]; value_t thigh_xor[nthreads]; size_t threadstart = 0; size_t threadchunk = curlevel_size / nthreads; size_t threadextra = curlevel_size % nthreads; boost::asio::thread_pool pool(nthreads); for (int t=0;tdepth(); size_t num_leaves = size_t(1)< index goes for example from (in binary) // 010010110 -> 010011000, then index_xor will be // 000001110 and how_many_1_bits will be 3. // That indicates that path[depth-3] was a left child, and now // we need to change it to a right child by descending right // from path[depth-4], and then filling the path after that with // left children. path[depth-how_many_1_bits] = descend(path[depth-how_many_1_bits-1], depth-how_many_1_bits-1, 1, aes_ops); for (nbits_t i = depth-how_many_1_bits; i < depth-1; ++i) { path[i+1] = descend(path[i], i, 0, aes_ops); } lastindex = index; expansion[index++] = descend(path[depth-1], depth-1, 0, aes_ops); expansion[index++] = descend(path[depth-1], depth-1, 1, aes_ops); } delete[] path; } // Construct three RDPFs of the given depth all with the same randomly // generated target index. RDPFTriple::RDPFTriple(MPCTIO &tio, yield_t &yield, nbits_t depth, bool save_expansion) { // Pick a random XOR share of the target xs_target.randomize(depth); // Now create three RDPFs with that target, and also convert the XOR // shares of the target to additive shares std::vector coroutines; for (int i=0;i<3;++i) { coroutines.emplace_back( [this, &tio, depth, i, save_expansion](yield_t &yield) { dpf[i] = RDPF(tio, yield, xs_target, depth, save_expansion); }); } coroutines.emplace_back( [this, &tio, depth](yield_t &yield) { mpc_xs_to_as(tio, yield, as_target, xs_target, depth, false); }); run_coroutines(yield, coroutines); } RDPFTriple::node RDPFTriple::descend(const RDPFTriple::node &parent, nbits_t parentdepth, bit_t whichchild, size_t &aes_ops) const { auto [P0, P1, P2] = parent; DPFnode C0, C1, C2; C0 = dpf[0].descend(P0, parentdepth, whichchild, aes_ops); C1 = dpf[1].descend(P1, parentdepth, whichchild, aes_ops); C2 = dpf[2].descend(P2, parentdepth, whichchild, aes_ops); return std::make_tuple(C0,C1,C2); } RDPFPair::node RDPFPair::descend(const RDPFPair::node &parent, nbits_t parentdepth, bit_t whichchild, size_t &aes_ops) const { auto [P0, P1] = parent; DPFnode C0, C1; C0 = dpf[0].descend(P0, parentdepth, whichchild, aes_ops); C1 = dpf[1].descend(P1, parentdepth, whichchild, aes_ops); return std::make_tuple(C0,C1); }