123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254 |
- #include <bsd/stdlib.h> // arc4random_buf
- #include "rdpf.hpp"
- #include "bitutils.hpp"
- #include "mpcops.hpp"
- #include "aes.hpp"
- #include "prg.hpp"
- // Don't warn if we never actually use these functions
- static void dump_node(DPFnode node, const char *label = NULL)
- __attribute__ ((unused));
- static void dump_level(DPFnode *nodes, size_t num, const char *label = NULL)
- __attribute__ ((unused));
- static void dump_node(DPFnode node, const char *label)
- {
- if (label) printf("%s: ", label);
- for(int i=0;i<16;++i) { printf("%02x", ((unsigned char *)&node)[15-i]); } printf("\n");
- }
- static void dump_level(DPFnode *nodes, size_t num, const char *label)
- {
- if (label) printf("%s:\n", label);
- for (size_t i=0;i<num;++i) {
- dump_node(nodes[i]);
- }
- printf("\n");
- }
- // Compute the multiplicative inverse of x mod 2^{VALUE_BITS}
- // This is the same as computing x to the power of
- // 2^{VALUE_BITS-1}-1.
- static value_t inverse_value_t(value_t x)
- {
- int expon = 1;
- value_t xe = x;
- // Invariant: xe = x^(2^expon - 1) mod 2^{VALUE_BITS}
- // Goal: compute x^(2^{VALUE_BITS-1} - 1)
- while (expon < VALUE_BITS-1) {
- xe = xe * xe * x;
- ++expon;
- }
- return xe;
- }
- // Construct a DPF with the given (XOR-shared) target location, and
- // of the given depth, to be used for random-access memory reads and
- // writes. The DPF is construction collaboratively by P0 and P1,
- // with the server P2 helping by providing various kinds of
- // correlated randomness, such as MultTriples and AndTriples.
- //
- // This algorithm is based on Appendix C from the Duoram paper, with a
- // small optimization noted below.
- RDPF::RDPF(MPCTIO &tio, yield_t &yield,
- RegXS target, nbits_t depth)
- {
- int player = tio.player();
- size_t &aesops = tio.aes_ops();
- // Choose a random seed
- arc4random_buf(&seed, sizeof(seed));
- // Ensure the flag bits (the lsb of each node) are different
- seed = set_lsb(seed, !!player);
- cfbits = 0;
- // The root level is just the seed
- nbits_t level = 0;
- DPFnode *curlevel = NULL;
- DPFnode *nextlevel = new DPFnode[1];
- nextlevel[0] = seed;
- // Construct each intermediate level
- while(level < depth) {
- delete[] curlevel;
- curlevel = nextlevel;
- nextlevel = new DPFnode[1<<(level+1)];
- // Invariant: curlevel has 2^level elements; nextlevel has
- // 2^{level+1} elements
- // The bit-shared choice bit is bit (depth-level-1) of the
- // XOR-shared target index
- RegBS bs_choice = target.bit(depth-level-1);
- size_t curlevel_size = (size_t(1)<<level);
- DPFnode L = _mm_setzero_si128();
- DPFnode R = _mm_setzero_si128();
- // The server doesn't need to do this computation, but it does
- // need to execute mpc_reconstruct_choice so that it sends
- // the AndTriples at the appropriate time.
- if (player < 2) {
- for(size_t i=0;i<curlevel_size;++i) {
- DPFnode lchild, rchild;
- prgboth(lchild, rchild, curlevel[i], aesops);
- L = (L ^ lchild);
- R = (R ^ rchild);
- if (nextlevel) {
- nextlevel[2*i] = lchild;
- nextlevel[2*i+1] = rchild;
- }
- }
- }
- // If we're going left (bs_choice = 0), we want the correction
- // word to be the XOR of our right side and our peer's right
- // side; if bs_choice = 1, it should be the XOR or our left side
- // and our peer's left side.
- // We also have to ensure that the flag bits (the lsb) of the
- // side that will end up the same be of course the same, but
- // also that the flag bits (the lsb) of the side that will end
- // up different _must_ be different. That is, it's not enough
- // for the nodes of the child selected by choice to be different
- // as 128-bit values; they also have to be different in their
- // lsb.
- // This is where we make a small optimization over Appendix C of
- // the Duoram paper: instead of keeping separate correction flag
- // bits for the left and right children, we observe that the low
- // bit of the overall correction word effectively serves as one
- // of those bits, so we just need to store one extra bit per
- // level, not two. (We arbitrarily choose the one for the right
- // child.)
- // Note that the XOR of our left and right child before and
- // after applying the correction word won't change, since the
- // correction word is applied to either both children or
- // neither, depending on the value of the parent's flag. So in
- // particular, the XOR of the flag bits won't change, and if our
- // children's flag's XOR equals our peer's children's flag's
- // XOR, then we won't have different flag bits even for the
- // children that have different 128-bit values.
- // So we compute our_parity = lsb(L^R)^player, and we XOR that
- // into the R value in the correction word computation. At the
- // same time, we exchange these parity values to compute the
- // combined parity, which we store in the DPF. Then when the
- // DPF is evaluated, if the parent's flag is set, not only apply
- // the correction work to both children, but also apply the
- // (combined) parity bit to just the right child. Then for
- // unequal nodes (where the flag bit is different), exactly one
- // of the four children (two for P0 and two for P1) will have
- // the parity bit applied, which will set the XOR of the lsb of
- // those four nodes to just L0^R0^L1^R1^our_parity^peer_parity
- // = 1 because everything cancels out except player (for which
- // one player is 0 and the other is 1).
- bool our_parity_bit = get_lsb(L ^ R) ^ !!player;
- DPFnode our_parity = lsb128_mask[our_parity_bit];
- DPFnode CW;
- bool peer_parity_bit;
- // Exchange the parities and do mpc_reconstruct_choice at the
- // same time (bundled into the same rounds)
- std::vector<coro_t> coroutines;
- coroutines.emplace_back(
- [&](yield_t &yield) {
- tio.queue_peer(&our_parity_bit, 1);
- yield();
- tio.recv_peer(&peer_parity_bit, 1);
- });
- coroutines.emplace_back(
- [&](yield_t &yield) {
- mpc_reconstruct_choice(tio, yield, CW, bs_choice,
- (R ^ our_parity), L);
- });
- run_coroutines(yield, coroutines);
- bool parity_bit = our_parity_bit ^ peer_parity_bit;
- cfbits |= (size_t(parity_bit)<<level);
- DPFnode CWR = CW ^ lsb128_mask[parity_bit];
- if (player < 2) {
- if (level < depth-1) {
- for(size_t i=0;i<curlevel_size;++i) {
- bool flag = get_lsb(curlevel[i]);
- nextlevel[2*i] = xor_if(nextlevel[2*i], CW, flag);
- nextlevel[2*i+1] = xor_if(nextlevel[2*i+1], CWR, flag);
- }
- } else {
- // Recall there are four potentially useful vectors that
- // can come out of a DPF:
- // - (single-bit) bitwise unit vector
- // - additive-shared unit vector
- // - XOR-shared scaled unit vector
- // - additive-shared scaled unit vector
- //
- // (No single DPF should be used for both of the first
- // two or both of the last two, though, since they're
- // correlated; you _can_ use one of the first two and
- // one of the last two.)
- //
- // For each 128-bit leaf, the low bit is the flag bit,
- // and we're guaranteed that the flag bits (and indeed
- // the whole 128-bit value) for P0 and P1 are the same
- // for every leaf except the target, and that the flag
- // bits definitely differ for the target (and the other
- // 127 bits are independently random on each side).
- //
- // We divide the 128-bit leaf into a low 64-bit word and
- // a high 64-bit word. We use the low word for the unit
- // vector and the high word for the scaled vector; this
- // choice is not arbitrary: the flag bit in the low word
- // means that the sum of all the low words (with P1's
- // low words negated) across both P0 and P1 is
- // definitely odd, so we can compute that sum's inverse
- // mod 2^64, and store it now during precomputation. At
- // evaluation time for the additive-shared unit vector,
- // we will output this global inverse times the low word
- // of each leaf, which will make the sum of all of those
- // values 1.
- //
- // For the scaled vector, we just have to compute shares
- // of what the scaled vector is a sharing _of_, but
- // that's just XORing or adding all of each party's
- // local high words; no communication needed.
- value_t low_sum = 0;
- value_t high_sum = 0;
- value_t high_xor = 0;
- for(size_t i=0;i<curlevel_size;++i) {
- bool flag = get_lsb(curlevel[i]);
- DPFnode leftchild = xor_if(nextlevel[2*i], CW, flag);
- DPFnode rightchild = xor_if(nextlevel[2*i+1], CWR, flag);
- value_t leftlow = value_t(_mm_cvtsi128_si64x(leftchild));
- value_t rightlow = value_t(_mm_cvtsi128_si64x(rightchild));
- value_t lefthigh =
- value_t(_mm_cvtsi128_si64x(_mm_srli_si128(leftchild,8)));
- value_t righthigh =
- value_t(_mm_cvtsi128_si64x(_mm_srli_si128(rightchild,8)));
- low_sum += (leftlow + rightlow);
- high_sum += (lefthigh + righthigh);
- high_xor ^= (lefthigh ^ righthigh);
- }
- if (player == 1) {
- low_sum = -low_sum;
- high_sum = -high_sum;
- }
- scaled_sum.ashare = high_sum;
- scaled_xor.xshare = high_xor;
- // Exchange low_sum and add them up
- tio.queue_peer(&low_sum, sizeof(low_sum));
- yield();
- value_t peer_low_sum;
- tio.recv_peer(&peer_low_sum, sizeof(peer_low_sum));
- low_sum += peer_low_sum;
- // The low_sum had better be odd
- assert(low_sum & 1);
- unit_sum_inverse = inverse_value_t(low_sum);
- }
- cw.push_back(CW);
- }
- ++level;
- }
- delete[] curlevel;
- delete[] nextlevel;
- }
|