|
@@ -6,14 +6,19 @@
|
|
|
#include "aes.hpp"
|
|
|
#include "prg.hpp"
|
|
|
|
|
|
-#ifdef DPF_DEBUG
|
|
|
+// Don't warn if we never actually use these functions
|
|
|
static void dump_node(DPFnode node, const char *label = NULL)
|
|
|
+__attribute__ ((unused));
|
|
|
+static void dump_level(DPFnode *nodes, size_t num, const char *label = NULL)
|
|
|
+__attribute__ ((unused));
|
|
|
+
|
|
|
+static void dump_node(DPFnode node, const char *label)
|
|
|
{
|
|
|
if (label) printf("%s: ", label);
|
|
|
for(int i=0;i<16;++i) { printf("%02x", ((unsigned char *)&node)[15-i]); } printf("\n");
|
|
|
}
|
|
|
|
|
|
-static void dump_level(DPFnode *nodes, size_t num, const char *label = NULL)
|
|
|
+static void dump_level(DPFnode *nodes, size_t num, const char *label)
|
|
|
{
|
|
|
if (label) printf("%s:\n", label);
|
|
|
for (size_t i=0;i<num;++i) {
|
|
@@ -21,7 +26,22 @@ static void dump_level(DPFnode *nodes, size_t num, const char *label = NULL)
|
|
|
}
|
|
|
printf("\n");
|
|
|
}
|
|
|
-#endif
|
|
|
+
|
|
|
+// Compute the multiplicative inverse of x mod 2^{VALUE_BITS}
|
|
|
+// This is the same as computing x to the power of
|
|
|
+// 2^{VALUE_BITS-1}-1.
|
|
|
+static value_t inverse_value_t(value_t x)
|
|
|
+{
|
|
|
+ int expon = 1;
|
|
|
+ value_t xe = x;
|
|
|
+ // Invariant: xe = x^(2^expon - 1) mod 2^{VALUE_BITS}
|
|
|
+ // Goal: compute x^(2^{VALUE_BITS-1} - 1)
|
|
|
+ while (expon < VALUE_BITS-1) {
|
|
|
+ xe = xe * xe * x;
|
|
|
+ ++expon;
|
|
|
+ }
|
|
|
+ return xe;
|
|
|
+}
|
|
|
|
|
|
// Construct a DPF with the given (XOR-shared) target location, and
|
|
|
// of the given depth, to be used for random-access memory reads and
|
|
@@ -53,12 +73,7 @@ RDPF::RDPF(MPCTIO &tio, yield_t &yield,
|
|
|
while(level < depth) {
|
|
|
delete[] curlevel;
|
|
|
curlevel = nextlevel;
|
|
|
- // We don't need to store the last level
|
|
|
- if (level < depth-1) {
|
|
|
- nextlevel = new DPFnode[1<<(level+1)];
|
|
|
- } else {
|
|
|
- nextlevel = NULL;
|
|
|
- }
|
|
|
+ nextlevel = new DPFnode[1<<(level+1)];
|
|
|
// Invariant: curlevel has 2^level elements; nextlevel has
|
|
|
// 2^{level+1} elements
|
|
|
|
|
@@ -88,12 +103,13 @@ RDPF::RDPF(MPCTIO &tio, yield_t &yield,
|
|
|
// side; if bs_choice = 1, it should be the XOR or our left side
|
|
|
// and our peer's left side.
|
|
|
|
|
|
- // We have to ensure that the flag bits (the lsb) of the side
|
|
|
- // that will end up the same be of course the same, but also
|
|
|
- // that the flag bits (the lsb) of the side that will end up
|
|
|
- // different _must_ be different. That is, it's not enough for
|
|
|
- // the nodes of the child selected by choice to be different as
|
|
|
- // 128-bit values; they also have to be different in their lsb.
|
|
|
+ // We also have to ensure that the flag bits (the lsb) of the
|
|
|
+ // side that will end up the same be of course the same, but
|
|
|
+ // also that the flag bits (the lsb) of the side that will end
|
|
|
+ // up different _must_ be different. That is, it's not enough
|
|
|
+ // for the nodes of the child selected by choice to be different
|
|
|
+ // as 128-bit values; they also have to be different in their
|
|
|
+ // lsb.
|
|
|
|
|
|
// This is where we make a small optimization over Appendix C of
|
|
|
// the Duoram paper: instead of keeping separate correction flag
|
|
@@ -150,12 +166,82 @@ RDPF::RDPF(MPCTIO &tio, yield_t &yield,
|
|
|
cfbits |= (size_t(parity_bit)<<level);
|
|
|
DPFnode CWR = _mm_xor_si128(CW,lsb128_mask[parity_bit]);
|
|
|
if (player < 2) {
|
|
|
- if (nextlevel) {
|
|
|
+ if (level < depth-1) {
|
|
|
for(size_t i=0;i<curlevel_size;++i) {
|
|
|
bool flag = get_lsb(curlevel[i]);
|
|
|
nextlevel[2*i] = xor_if(nextlevel[2*i], CW, flag);
|
|
|
nextlevel[2*i+1] = xor_if(nextlevel[2*i+1], CWR, flag);
|
|
|
}
|
|
|
+ } else {
|
|
|
+ // Recall there are four potentially useful vectors that
|
|
|
+ // can come out of a DPF:
|
|
|
+ // - (single-bit) bitwise unit vector
|
|
|
+ // - additive-shared unit vector
|
|
|
+ // - XOR-shared scaled unit vector
|
|
|
+ // - additive-shared scaled unit vector
|
|
|
+ //
|
|
|
+ // (No single DPF should be used for both of the first
|
|
|
+ // two or both of the last two, though, since they're
|
|
|
+ // correlated; you _can_ use one of the first two and
|
|
|
+ // one of the last two.)
|
|
|
+ //
|
|
|
+ // For each 128-bit leaf, the low bit is the flag bit,
|
|
|
+ // and we're guaranteed that the flag bits (and indeed
|
|
|
+ // the whole 128-bit value) for P0 and P1 are the same
|
|
|
+ // for every leaf except the target, and that the flag
|
|
|
+ // bits definitely differ for the target (and the other
|
|
|
+ // 127 bits are independently random on each side).
|
|
|
+ //
|
|
|
+ // We divide the 128-bit leaf into a low 64-bit word and
|
|
|
+ // a high 64-bit word. We use the low word for the unit
|
|
|
+ // vector and the high word for the scaled vector; this
|
|
|
+ // choice is not arbitrary: the flag bit in the low word
|
|
|
+ // means that the sum of all the low words (with P1's
|
|
|
+ // low words negated) across both P0 and P1 is
|
|
|
+ // definitely odd, so we can compute that sum's inverse
|
|
|
+ // mod 2^64, and store it now during precomputation. At
|
|
|
+ // evaluation time for the additive-shared unit vector,
|
|
|
+ // we will output this global inverse times the low word
|
|
|
+ // of each leaf, which will make the sum of all of those
|
|
|
+ // values 1.
|
|
|
+ //
|
|
|
+ // For the scaled vector, we just have to compute shares
|
|
|
+ // of what the scaled vector is a sharing _of_, but
|
|
|
+ // that's just XORing or adding all of each party's
|
|
|
+ // local high words; no communication needed.
|
|
|
+
|
|
|
+ value_t low_sum = 0;
|
|
|
+ value_t high_sum = 0;
|
|
|
+ value_t high_xor = 0;
|
|
|
+ for(size_t i=0;i<curlevel_size;++i) {
|
|
|
+ bool flag = get_lsb(curlevel[i]);
|
|
|
+ DPFnode leftchild = xor_if(nextlevel[2*i], CW, flag);
|
|
|
+ DPFnode rightchild = xor_if(nextlevel[2*i+1], CWR, flag);
|
|
|
+ value_t leftlow = value_t(_mm_cvtsi128_si64x(leftchild));
|
|
|
+ value_t rightlow = value_t(_mm_cvtsi128_si64x(rightchild));
|
|
|
+ value_t lefthigh =
|
|
|
+ value_t(_mm_cvtsi128_si64x(_mm_srli_si128(leftchild,8)));
|
|
|
+ value_t righthigh =
|
|
|
+ value_t(_mm_cvtsi128_si64x(_mm_srli_si128(rightchild,8)));
|
|
|
+ low_sum += (leftlow + rightlow);
|
|
|
+ high_sum += (lefthigh + righthigh);
|
|
|
+ high_xor ^= (lefthigh ^ righthigh);
|
|
|
+ }
|
|
|
+ if (player == 1) {
|
|
|
+ low_sum = -low_sum;
|
|
|
+ high_sum = -high_sum;
|
|
|
+ }
|
|
|
+ scaled_sum.ashare = high_sum;
|
|
|
+ scaled_xor.xshare = high_xor;
|
|
|
+ // Exchange low_sum and add them up
|
|
|
+ tio.queue_peer(&low_sum, sizeof(low_sum));
|
|
|
+ yield();
|
|
|
+ value_t peer_low_sum;
|
|
|
+ tio.recv_peer(&peer_low_sum, sizeof(peer_low_sum));
|
|
|
+ low_sum += peer_low_sum;
|
|
|
+ // The low_sum had better be odd
|
|
|
+ assert(low_sum & 1);
|
|
|
+ unit_sum_inverse = inverse_value_t(low_sum);
|
|
|
}
|
|
|
cw.push_back(CW);
|
|
|
}
|