Quellcode durchsuchen

It's faster to do evaluations of RDPFPairs and RDPFTriples in lockstep

Having a unified evaluator that outputs tuples of leaves is faster
than evaluating leaves one at a time.
Ian Goldberg vor 2 Jahren
Ursprung
Commit
7d614a5eb4
3 geänderte Dateien mit 279 neuen und 95 gelöschten Zeilen
  1. 76 2
      online.cpp
  2. 24 91
      rdpf.cpp
  3. 179 2
      rdpf.hpp

+ 76 - 2
online.cpp

@@ -312,7 +312,7 @@ static void rdpfeval_timing(MPCIO &mpcio, const PRACOptions &opts, char **args)
                     RDPF &dpf = dp.dpf[i];
                     RegXS scaled_xor;
                     scaled_xor.xshare = 0;
-                    RDPF::Eval ev = dpf.eval(start, op_counter, false);
+                    auto ev = StreamEval(dpf, start, op_counter, false);
                     for (address_t x=0;x<(address_t(1)<<depth);++x) {
                         DPFnode leaf = ev.next();
                         RegXS sx = dpf.scaled_xs(leaf);
@@ -328,7 +328,7 @@ static void rdpfeval_timing(MPCIO &mpcio, const PRACOptions &opts, char **args)
                     RDPF &dpf = dt.dpf[i];
                     RegXS scaled_xor;
                     scaled_xor.xshare = 0;
-                    RDPF::Eval ev = dpf.eval(start, op_counter, false);
+                    auto ev = StreamEval(dpf, start, op_counter, false);
                     for (address_t x=0;x<(address_t(1)<<depth);++x) {
                         DPFnode leaf = ev.next();
                         RegXS sx = dpf.scaled_xs(leaf);
@@ -345,6 +345,77 @@ static void rdpfeval_timing(MPCIO &mpcio, const PRACOptions &opts, char **args)
     pool.join();
 }
 
+static void tupleeval_timing(MPCIO &mpcio, const PRACOptions &opts, char **args)
+{
+    nbits_t depth=6;
+    address_t start=0;
+
+    if (*args) {
+        depth = atoi(*args);
+        ++args;
+    }
+    if (*args) {
+        start = atoi(*args);
+        ++args;
+    }
+
+    int num_threads = opts.num_threads;
+    boost::asio::thread_pool pool(num_threads);
+    for (int thread_num = 0; thread_num < num_threads; ++thread_num) {
+        boost::asio::post(pool, [&mpcio, thread_num, depth, start] {
+            MPCTIO tio(mpcio, thread_num);
+            size_t &op_counter = tio.aes_ops();
+            if (mpcio.player == 2) {
+                RDPFPair dp = tio.rdpfpair(depth);
+                RegXS scaled_xor0, scaled_xor1;
+                scaled_xor0.xshare = 0;
+                scaled_xor1.xshare = 0;
+                auto ev = StreamEval(dp, start, op_counter, false);
+                for (address_t x=0;x<(address_t(1)<<depth);++x) {
+                    auto [L0, L1] = ev.next();
+                    RegXS sx0 = dp.dpf[0].scaled_xs(L0);
+                    RegXS sx1 = dp.dpf[1].scaled_xs(L1);
+                    scaled_xor0 ^= sx0;
+                    scaled_xor1 ^= sx1;
+                }
+                printf("%016lx\n%016lx\n", scaled_xor0.xshare,
+                    dp.dpf[0].scaled_xor.xshare);
+                printf("\n");
+                printf("%016lx\n%016lx\n", scaled_xor1.xshare,
+                    dp.dpf[1].scaled_xor.xshare);
+                printf("\n");
+            } else {
+                RDPFTriple dt = tio.rdpftriple(depth);
+                RegXS scaled_xor0, scaled_xor1, scaled_xor2;
+                scaled_xor0.xshare = 0;
+                scaled_xor1.xshare = 0;
+                scaled_xor2.xshare = 0;
+                auto ev = StreamEval(dt, start, op_counter, false);
+                for (address_t x=0;x<(address_t(1)<<depth);++x) {
+                    auto [L0, L1, L2] = ev.next();
+                    RegXS sx0 = dt.dpf[0].scaled_xs(L0);
+                    RegXS sx1 = dt.dpf[1].scaled_xs(L1);
+                    RegXS sx2 = dt.dpf[2].scaled_xs(L2);
+                    scaled_xor0 ^= sx0;
+                    scaled_xor1 ^= sx1;
+                    scaled_xor2 ^= sx2;
+                }
+                printf("%016lx\n%016lx\n", scaled_xor0.xshare,
+                    dt.dpf[0].scaled_xor.xshare);
+                printf("\n");
+                printf("%016lx\n%016lx\n", scaled_xor1.xshare,
+                    dt.dpf[1].scaled_xor.xshare);
+                printf("\n");
+                printf("%016lx\n%016lx\n", scaled_xor2.xshare,
+                    dt.dpf[2].scaled_xor.xshare);
+                printf("\n");
+            }
+            tio.send();
+        });
+    }
+    pool.join();
+}
+
 void online_main(MPCIO &mpcio, const PRACOptions &opts, char **args)
 {
     if (!*args) {
@@ -365,6 +436,9 @@ void online_main(MPCIO &mpcio, const PRACOptions &opts, char **args)
     } else if (!strcmp(*args, "evaltime")) {
         ++args;
         rdpfeval_timing(mpcio, opts, args);
+    } else if (!strcmp(*args, "tupletime")) {
+        ++args;
+        tupleeval_timing(mpcio, opts, args);
     } else {
         std::cerr << "Unknown mode " << *args << "\n";
     }

+ 24 - 91
rdpf.cpp

@@ -286,7 +286,7 @@ size_t RDPF::size() const
 // Descend from a node at depth parentdepth to one of its children
 // whichchild = 0: left child
 // whichchild = 1: right child
-DPFnode RDPF::descend(const DPFnode parent, nbits_t parentdepth,
+DPFnode RDPF::descend(const DPFnode &parent, nbits_t parentdepth,
     bit_t whichchild, size_t &op_counter) const
 {
     DPFnode prgout;
@@ -363,96 +363,6 @@ void RDPF::expand(size_t &op_counter)
     delete[] path;
 }
 
-// Create an Eval object that will start its output at index start.
-// It will wrap around to 0 when it hits 2^depth.  If use_expansion
-// is true, then if the DPF has been expanded, just output values
-// from that.  If use_expansion=false or if the DPF has not been
-// expanded, compute the values on the fly.
-RDPF::Eval RDPF::eval(address_t start, size_t &op_counter,
-    bool use_expansion) const
-{
-    RDPF::Eval eval(*this, op_counter, start, use_expansion);
-
-    return eval;
-}
-
-RDPF::Eval::Eval(const RDPF &rdpf, size_t &op_counter, address_t start,
-    bool use_expansion) : rdpf(rdpf), op_counter(op_counter),
-    use_expansion(use_expansion)
-{
-    depth = rdpf.depth();
-    // Prevent overflow of 1<<depth
-    if (depth < ADDRESS_MAX_BITS) {
-        indexmask = (address_t(1)<<depth)-1;
-    } else {
-        indexmask = ~0;
-    }
-    // Record that we haven't actually output the leaf for index start
-    // itself yet
-    nextindex = start;
-    if (use_expansion && rdpf.expansion.size()) {
-        // We just need to keep the counter, not compute anything
-        return;
-    }
-    path.resize(depth);
-    pathindex = start;
-    path[0] = rdpf.seed;
-    for (nbits_t i=1;i<depth;++i) {
-        bool dir = !!(pathindex & (address_t(1)<<(depth-i)));
-        path[i] = rdpf.descend(path[i-1], i-1, dir, op_counter);
-    }
-}
-
-DPFnode RDPF::Eval::next()
-{
-    if (use_expansion && rdpf.expansion.size()) {
-        // Just use the precomputed values
-        DPFnode leaf = rdpf.expansion[nextindex];
-        nextindex = (nextindex + 1) & indexmask;
-        return leaf;
-    }
-    // Invariant: in the first call to next(), nextindex = pathindex.
-    // Otherwise, nextindex = pathindex+1.
-    // Get the XOR of nextindex and pathindex, and strip the low bit.
-    // If nextindex and pathindex are equal, or pathindex is even
-    // and nextindex is the consecutive odd number, index_xor will be 0,
-    // indicating that we don't have to update the path, but just
-    // compute the appropriate leaf given by the low bit of nextindex.
-    //
-    // Otherwise, say for example pathindex is 010010111 and nextindex
-    // is 010011000.  Then their XOR is 000001111, and stripping the low
-    // bit yields 000001110, so how_many_1_bits will be 3.
-    // That indicates (typically) that path[depth-3] was a left child,
-    // and now we need to change it to a right child by descending right
-    // from path[depth-4], and then filling the path after that with
-    // left children.
-    //
-    // When we wrap around, however, index_xor will be 111111110 (after
-    // we strip the low bit), and how_many_1_bits will be depth-1, but
-    // the new top child (of the root seed) we have to compute will be a
-    // left, not a right, child.
-    uint64_t index_xor = (nextindex ^ pathindex) & ~1;
-    nbits_t how_many_1_bits = __builtin_popcount(index_xor);
-    if (how_many_1_bits > 0) {
-        // This will almost always be 1, unless we've just wrapped
-        // around from the right subtree back to the left, in which case
-        // it will be 0.
-        bool top_changed_bit =
-            nextindex & (address_t(1) << how_many_1_bits);
-        path[depth-how_many_1_bits] =
-            rdpf.descend(path[depth-how_many_1_bits-1],
-                depth-how_many_1_bits-1, top_changed_bit, op_counter);
-        for (nbits_t i = depth-how_many_1_bits; i < depth-1; ++i) {
-            path[i+1] = rdpf.descend(path[i], i, 0, op_counter);
-        }
-    }
-    DPFnode leaf = rdpf.descend(path[depth-1], depth-1, nextindex & 1,
-        op_counter);
-    pathindex = nextindex;
-    nextindex = (nextindex + 1) & indexmask;
-    return leaf;
-}
-
 // Construct three RDPFs of the given depth all with the same randomly
 // generated target index.
 RDPFTriple::RDPFTriple(MPCTIO &tio, yield_t &yield,
@@ -477,3 +387,26 @@ RDPFTriple::RDPFTriple(MPCTIO &tio, yield_t &yield,
         });
     run_coroutines(yield, coroutines);
 }
+
+RDPFTriple::node RDPFTriple::descend(const RDPFTriple::node &parent,
+    nbits_t parentdepth, bit_t whichchild,
+    size_t &op_counter) const
+{
+    auto [P0, P1, P2] = parent;
+    DPFnode C0, C1, C2;
+    C0 = dpf[0].descend(P0, parentdepth, whichchild, op_counter);
+    C1 = dpf[1].descend(P1, parentdepth, whichchild, op_counter);
+    C2 = dpf[2].descend(P2, parentdepth, whichchild, op_counter);
+    return std::make_tuple(C0,C1,C2);
+}
+
+RDPFPair::node RDPFPair::descend(const RDPFPair::node &parent,
+    nbits_t parentdepth, bit_t whichchild,
+    size_t &op_counter) const
+{
+    auto [P0, P1] = parent;
+    DPFnode C0, C1;
+    C0 = dpf[0].descend(P0, parentdepth, whichchild, op_counter);
+    C1 = dpf[1].descend(P1, parentdepth, whichchild, op_counter);
+    return std::make_tuple(C0,C1);
+}

+ 179 - 2
rdpf.hpp

@@ -9,7 +9,118 @@
 #include "types.hpp"
 #include "bitutils.hpp"
 
+// Streaming evaluation, to avoid taking up enough memory to store
+// an entire evaluation.  T can be RDPF, RDPFPair, or RDPFTriple.
+template <typename T>
+class StreamEval {
+    const T &rdpf;
+    size_t &op_counter;
+    bool use_expansion;
+    nbits_t depth;
+    address_t indexmask;
+    address_t pathindex;
+    address_t nextindex;
+    std::vector<typename T::node> path;
+public:
+    // Create an Eval object that will start its output at index start.
+    // It will wrap around to 0 when it hits 2^depth.  If use_expansion
+    // is true, then if the DPF has been expanded, just output values
+    // from that.  If use_expansion=false or if the DPF has not been
+    // expanded, compute the values on the fly.
+    StreamEval(const T &rdpf, address_t start, size_t &op_counter,
+        bool use_expansion = true);
+    // Get the next value (or tuple of values) from the evaluator
+    typename T::node next();
+};
+
+// Create a StreamEval object that will start its output at index start.
+// It will wrap around to 0 when it hits 2^depth.  If use_expansion
+// is true, then if the DPF has been expanded, just output values
+// from that.  If use_expansion=false or if the DPF has not been
+// expanded, compute the values on the fly.
+template <typename T>
+StreamEval<T>::StreamEval(const T &rdpf, address_t start,
+    size_t &op_counter, bool use_expansion) : rdpf(rdpf),
+    op_counter(op_counter), use_expansion(use_expansion)
+{
+    depth = rdpf.depth();
+    // Prevent overflow of 1<<depth
+    if (depth < ADDRESS_MAX_BITS) {
+        indexmask = (address_t(1)<<depth)-1;
+    } else {
+        indexmask = ~0;
+    }
+    // Record that we haven't actually output the leaf for index start
+    // itself yet
+    nextindex = start;
+    if (use_expansion && rdpf.has_expansion()) {
+        // We just need to keep the counter, not compute anything
+        return;
+    }
+    path.resize(depth);
+    pathindex = start;
+    path[0] = rdpf.get_seed();
+    for (nbits_t i=1;i<depth;++i) {
+        bool dir = !!(pathindex & (address_t(1)<<(depth-i)));
+        path[i] = rdpf.descend(path[i-1], i-1, dir, op_counter);
+    }
+}
+
+template <typename T>
+typename T::node StreamEval<T>::next()
+{
+    if (use_expansion && rdpf.has_expansion()) {
+        // Just use the precomputed values
+        typename T::node leaf = rdpf.get_expansion(nextindex);
+        nextindex = (nextindex + 1) & indexmask;
+        return leaf;
+    }
+    // Invariant: in the first call to next(), nextindex = pathindex.
+    // Otherwise, nextindex = pathindex+1.
+    // Get the XOR of nextindex and pathindex, and strip the low bit.
+    // If nextindex and pathindex are equal, or pathindex is even
+    // and nextindex is the consecutive odd number, index_xor will be 0,
+    // indicating that we don't have to update the path, but just
+    // compute the appropriate leaf given by the low bit of nextindex.
+    //
+    // Otherwise, say for example pathindex is 010010111 and nextindex
+    // is 010011000.  Then their XOR is 000001111, and stripping the low
+    // bit yields 000001110, so how_many_1_bits will be 3.
+    // That indicates (typically) that path[depth-3] was a left child,
+    // and now we need to change it to a right child by descending right
+    // from path[depth-4], and then filling the path after that with
+    // left children.
+    //
+    // When we wrap around, however, index_xor will be 111111110 (after
+    // we strip the low bit), and how_many_1_bits will be depth-1, but
+    // the new top child (of the root seed) we have to compute will be a
+    // left, not a right, child.
+    uint64_t index_xor = (nextindex ^ pathindex) & ~1;
+    nbits_t how_many_1_bits = __builtin_popcount(index_xor);
+    if (how_many_1_bits > 0) {
+        // This will almost always be 1, unless we've just wrapped
+        // around from the right subtree back to the left, in which case
+        // it will be 0.
+        bool top_changed_bit =
+            nextindex & (address_t(1) << how_many_1_bits);
+        path[depth-how_many_1_bits] =
+            rdpf.descend(path[depth-how_many_1_bits-1],
+                depth-how_many_1_bits-1, top_changed_bit, op_counter);
+        for (nbits_t i = depth-how_many_1_bits; i < depth-1; ++i) {
+            path[i+1] = rdpf.descend(path[i], i, 0, op_counter);
+        }
+    }
+    typename T::node leaf = rdpf.descend(path[depth-1], depth-1,
+        nextindex & 1, op_counter);
+    pathindex = nextindex;
+    nextindex = (nextindex + 1) & indexmask;
+    return leaf;
+}
+
 struct RDPF {
+    // The type of nodes
+    using node = DPFnode;
+
     // The 128-bit seed
     DPFnode seed;
     // Which half of the DPF are we?
@@ -58,12 +169,23 @@ struct RDPF {
     // The depth
     inline nbits_t depth() const { return cw.size(); }
 
+    // The seed
+    inline node get_seed() const { return seed; }
+
+    // Do we have a precomputed expansion?
+    inline bool has_expansion() const { return expansion.size() > 0; }
+
+    // Get an element of the expansion
+    inline node get_expansion(address_t index) const {
+        return expansion[index];
+    }
+
     // Descend from a node at depth parentdepth to one of its children
     // whichchild = 0: left child
     // whichchild = 1: right child
     //
     // Cost: 1 AES operation
-    DPFnode descend(const DPFnode parent, nbits_t parentdepth,
+    DPFnode descend(const DPFnode &parent, nbits_t parentdepth,
         bit_t whichchild, size_t &op_counter) const;
 
     // Get the leaf node for the given input
@@ -74,6 +196,7 @@ struct RDPF {
     // Expand the DPF if it's not already expanded
     void expand(size_t &op_counter);
 
+#if 0
     // Streaming evaluation, to avoid taking up enough memory to store
     // an entire evaluation
     class Eval {
@@ -97,8 +220,9 @@ struct RDPF {
     // is true, then if the DPF has been expanded, just output values
     // from that.  If use_expansion=false or if the DPF has not been
     // expanded, compute the values on the fly.
-    Eval eval(address_t start, size_t &op_counter,
+    StreamEval<RDPF> eval(address_t start, size_t &op_counter,
         bool use_expansion=true) const;
+#endif
 
     // Get the bit-shared unit vector entry from the leaf node
     inline RegBS unit_bs(DPFnode leaf) const {
@@ -233,16 +357,43 @@ T& operator<<(T &os, const RDPF &rdpf)
 // not).
 
 struct RDPFTriple {
+    // The type of node triples
+    using node = std::tuple<DPFnode, DPFnode, DPFnode>;
+
     RegAS as_target;
     RegXS xs_target;
     RDPF dpf[3];
 
+    // The depth
+    inline nbits_t depth() const { return dpf[0].depth(); }
+
+    // The seed
+    inline node get_seed() const {
+        return std::make_tuple(dpf[0].get_seed(), dpf[1].get_seed(),
+            dpf[2].get_seed());
+    }
+
+    // Do we have a precomputed expansion?
+    inline bool has_expansion() const {
+        return dpf[0].expansion.size() > 0;
+    }
+
+    // Get an element of the expansion
+    inline node get_expansion(address_t index) const {
+        return std::make_tuple(dpf[0].get_expansion(index),
+            dpf[1].get_expansion(index), dpf[2].get_expansion(index));
+    }
+
     RDPFTriple() {}
 
     // Construct three RDPFs of the given depth all with the same
     // randomly generated target index.
     RDPFTriple(MPCTIO &tio, yield_t &yield,
         nbits_t depth, bool save_expansion = false);
+
+    // Descend the three RDPFs in lock step
+    node descend(const node &parent, nbits_t parentdepth,
+        bit_t whichchild, size_t &op_counter) const;
 };
 
 // I/O for RDPF Triples
@@ -274,7 +425,33 @@ T& operator>>(T &is, RDPFTriple &rdpftrip)
 }
 
 struct RDPFPair {
+    // The type of node pairs
+    using node = std::tuple<DPFnode, DPFnode>;
+
     RDPF dpf[2];
+
+    // The depth
+    inline nbits_t depth() const { return dpf[0].depth(); }
+
+    // The seed
+    inline node get_seed() const {
+        return std::make_tuple(dpf[0].get_seed(), dpf[1].get_seed());
+    }
+
+    // Do we have a precomputed expansion?
+    inline bool has_expansion() const {
+        return dpf[0].expansion.size() > 0;
+    }
+
+    // Get an element of the expansion
+    inline node get_expansion(address_t index) const {
+        return std::make_tuple(dpf[0].get_expansion(index),
+            dpf[1].get_expansion(index));
+    }
+
+    // Descend the two RDPFs in lock step
+    node descend(const node &parent, nbits_t parentdepth,
+        bit_t whichchild, size_t &op_counter) const;
 };
 
 // I/O for RDPF Pairs