преди 1 година · e9bbb4b09f
--- a/rdpf.cpp
+++ b/rdpf.cpp
@@ -6,14 +6,19 @@
 
				 #include "aes.hpp"
			
 
				 #include "prg.hpp"
			
 
				 
			
 
				-#ifdef DPF_DEBUG
			
 
				+// Don't warn if we never actually use these functions
			
 
				 static void dump_node(DPFnode node, const char *label = NULL)
			
 
				+__attribute__ ((unused));
			
 
				+static void dump_level(DPFnode *nodes, size_t num, const char *label = NULL)
			
 
				+__attribute__ ((unused));
			
 
				+
			
 
				+static void dump_node(DPFnode node, const char *label)
			
 
				 {
			
 
				     if (label) printf("%s: ", label);
			
 
				     for(int i=0;i<16;++i) { printf("%02x", ((unsigned char *)&node)[15-i]); } printf("\n");
			
 
				 }
			
 
				 
			
 
				-static void dump_level(DPFnode *nodes, size_t num, const char *label = NULL)
			
 
				+static void dump_level(DPFnode *nodes, size_t num, const char *label)
			
 
				 {
			
 
				     if (label) printf("%s:\n", label);
			
 
				     for (size_t i=0;i<num;++i) {
			
@@ -21,7 +26,22 @@ static void dump_level(DPFnode *nodes, size_t num, const char *label = NULL)
 
				     }
			
 
				     printf("\n");
			
 
				 }
			
 
				-#endif
			
 
				+
			
 
				+// Compute the multiplicative inverse of x mod 2^{VALUE_BITS}
			
 
				+// This is the same as computing x to the power of
			
 
				+// 2^{VALUE_BITS-1}-1.
			
 
				+static value_t inverse_value_t(value_t x)
			
 
				+{
			
 
				+    int expon = 1;
			
 
				+    value_t xe = x;
			
 
				+    // Invariant: xe = x^(2^expon - 1) mod 2^{VALUE_BITS}
			
 
				+    // Goal: compute x^(2^{VALUE_BITS-1} - 1)
			
 
				+    while (expon < VALUE_BITS-1) {
			
 
				+        xe = xe * xe * x;
			
 
				+        ++expon;
			
 
				+    }
			
 
				+    return xe;
			
 
				+}
			
 
				 
			
 
				 // Construct a DPF with the given (XOR-shared) target location, and
			
 
				 // of the given depth, to be used for random-access memory reads and
			
@@ -53,12 +73,7 @@ RDPF::RDPF(MPCTIO &tio, yield_t &yield,
 
				     while(level < depth) {
			
 
				         delete[] curlevel;
			
 
				         curlevel = nextlevel;
			
 
				-        // We don't need to store the last level
			
 
				-        if (level < depth-1) {
			
 
				-            nextlevel = new DPFnode[1<<(level+1)];
			
 
				-        } else {
			
 
				-            nextlevel = NULL;
			
 
				-        }
			
 
				+        nextlevel = new DPFnode[1<<(level+1)];
			
 
				         // Invariant: curlevel has 2^level elements; nextlevel has
			
 
				         // 2^{level+1} elements
			
 
				 
			
@@ -88,12 +103,13 @@ RDPF::RDPF(MPCTIO &tio, yield_t &yield,
 
				         // side; if bs_choice = 1, it should be the XOR or our left side
			
 
				         // and our peer's left side.
			
 
				 
			
 
				-        // We have to ensure that the flag bits (the lsb) of the side
			
 
				-        // that will end up the same be of course the same, but also
			
 
				-        // that the flag bits (the lsb) of the side that will end up
			
 
				-        // different _must_ be different.  That is, it's not enough for
			
 
				-        // the nodes of the child selected by choice to be different as
			
 
				-        // 128-bit values; they also have to be different in their lsb.
			
 
				+        // We also have to ensure that the flag bits (the lsb) of the
			
 
				+        // side that will end up the same be of course the same, but
			
 
				+        // also that the flag bits (the lsb) of the side that will end
			
 
				+        // up different _must_ be different.  That is, it's not enough
			
 
				+        // for the nodes of the child selected by choice to be different
			
 
				+        // as 128-bit values; they also have to be different in their
			
 
				+        // lsb.
			
 
				 
			
 
				         // This is where we make a small optimization over Appendix C of
			
 
				         // the Duoram paper: instead of keeping separate correction flag
			
@@ -150,12 +166,82 @@ RDPF::RDPF(MPCTIO &tio, yield_t &yield,
 
				         cfbits |= (size_t(parity_bit)<<level);
			
 
				         DPFnode CWR = _mm_xor_si128(CW,lsb128_mask[parity_bit]);
			
 
				         if (player < 2) {
			
 
				-            if (nextlevel) {
			
 
				+            if (level < depth-1) {
			
 
				                 for(size_t i=0;i<curlevel_size;++i) {
			
 
				                     bool flag = get_lsb(curlevel[i]);
			
 
				                     nextlevel[2*i] = xor_if(nextlevel[2*i], CW, flag);
			
 
				                     nextlevel[2*i+1] = xor_if(nextlevel[2*i+1], CWR, flag);
			
 
				                 }
			
 
				+            } else {
			
 
				+                // Recall there are four potentially useful vectors that
			
 
				+                // can come out of a DPF:
			
 
				+                // - (single-bit) bitwise unit vector
			
 
				+                // - additive-shared unit vector
			
 
				+                // - XOR-shared scaled unit vector
			
 
				+                // - additive-shared scaled unit vector
			
 
				+                //
			
 
				+                // (No single DPF should be used for both of the first
			
 
				+                // two or both of the last two, though, since they're
			
 
				+                // correlated; you _can_ use one of the first two and
			
 
				+                // one of the last two.)
			
 
				+                //
			
 
				+                // For each 128-bit leaf, the low bit is the flag bit,
			
 
				+                // and we're guaranteed that the flag bits (and indeed
			
 
				+                // the whole 128-bit value) for P0 and P1 are the same
			
 
				+                // for every leaf except the target, and that the flag
			
 
				+                // bits definitely differ for the target (and the other
			
 
				+                // 127 bits are independently random on each side).
			
 
				+                //
			
 
				+                // We divide the 128-bit leaf into a low 64-bit word and
			
 
				+                // a high 64-bit word.  We use the low word for the unit
			
 
				+                // vector and the high word for the scaled vector; this
			
 
				+                // choice is not arbitrary: the flag bit in the low word
			
 
				+                // means that the sum of all the low words (with P1's
			
 
				+                // low words negated) across both P0 and P1 is
			
 
				+                // definitely odd, so we can compute that sum's inverse
			
 
				+                // mod 2^64, and store it now during precomputation.  At
			
 
				+                // evaluation time for the additive-shared unit vector,
			
 
				+                // we will output this global inverse times the low word
			
 
				+                // of each leaf, which will make the sum of all of those
			
 
				+                // values 1.
			
 
				+                //
			
 
				+                // For the scaled vector, we just have to compute shares
			
 
				+                // of what the scaled vector is a sharing _of_, but
			
 
				+                // that's just XORing or adding all of each party's
			
 
				+                // local high words; no communication needed.
			
 
				+
			
 
				+                value_t low_sum = 0;
			
 
				+                value_t high_sum = 0;
			
 
				+                value_t high_xor = 0;
			
 
				+                for(size_t i=0;i<curlevel_size;++i) {
			
 
				+                    bool flag = get_lsb(curlevel[i]);
			
 
				+                    DPFnode leftchild = xor_if(nextlevel[2*i], CW, flag);
			
 
				+                    DPFnode rightchild = xor_if(nextlevel[2*i+1], CWR, flag);
			
 
				+                    value_t leftlow = value_t(_mm_cvtsi128_si64x(leftchild));
			
 
				+                    value_t rightlow = value_t(_mm_cvtsi128_si64x(rightchild));
			
 
				+                    value_t lefthigh =
			
 
				+                        value_t(_mm_cvtsi128_si64x(_mm_srli_si128(leftchild,8)));
			
 
				+                    value_t righthigh =
			
 
				+                        value_t(_mm_cvtsi128_si64x(_mm_srli_si128(rightchild,8)));
			
 
				+                    low_sum += (leftlow + rightlow);
			
 
				+                    high_sum += (lefthigh + righthigh);
			
 
				+                    high_xor ^= (lefthigh ^ righthigh);
			
 
				+                }
			
 
				+                if (player == 1) {
			
 
				+                    low_sum = -low_sum;
			
 
				+                    high_sum = -high_sum;
			
 
				+                }
			
 
				+                scaled_sum.ashare = high_sum;
			
 
				+                scaled_xor.xshare = high_xor;
			
 
				+                // Exchange low_sum and add them up
			
 
				+                tio.queue_peer(&low_sum, sizeof(low_sum));
			
 
				+                yield();
			
 
				+                value_t peer_low_sum;
			
 
				+                tio.recv_peer(&peer_low_sum, sizeof(peer_low_sum));
			
 
				+                low_sum += peer_low_sum;
			
 
				+                // The low_sum had better be odd
			
 
				+                assert(low_sum & 1);
			
 
				+                unit_sum_inverse = inverse_value_t(low_sum);
			
 
				             }
			
 
				             cw.push_back(CW);
			
 
				         }
			
--- a/rdpf.hpp
+++ b/rdpf.hpp
@@ -15,12 +15,27 @@ struct RDPF {
 
				     std::vector<DPFnode> cw;
			
 
				     // correction flag bits: the one for level i is bit i of this word
			
 
				     value_t cfbits;
			
 
				+    // The amount we have to scale the low words of the leaf values by
			
 
				+    // to get additive shares of a unit vector
			
 
				+    value_t unit_sum_inverse;
			
 
				+    // Additive share of the scaling value M_as such that the high words
			
 
				+    // of the leaf values for P0 and P1 add to M_as * e_{target}
			
 
				+    RegAS scaled_sum;
			
 
				+    // XOR share of the scaling value M_xs such that the high words
			
 
				+    // of the leaf values for P0 and P1 XOR to M_xs * e_{target}
			
 
				+    RegXS scaled_xor;
			
 
				 
			
 
				     // Construct a DPF with the given (XOR-shared) target location, and
			
 
				     // of the given depth, to be used for random-access memory reads and
			
 
				     // writes.  The DPF is construction collaboratively by P0 and P1,
			
 
				     // with the server P2 helping by providing various kinds of
			
 
				     // correlated randomness, such as MultTriples and AndTriples.
			
 
				+    //
			
 
				+    // Cost:
			
 
				+    // (3 DPFnode + 1 byte)*depth + 1 word communication in
			
 
				+    // 2*depth + 1 messages
			
 
				+    // 3*depth DPFnode communication from P2 to each party
			
 
				+    // 2^{depth+1}-2 local AES operations
			
 
				     RDPF(MPCTIO &tio, yield_t &yield,
			
 
				         RegXS target, nbits_t depth);
			
 
				 };