|| 
							- #ifndef __MCPIO_HPP__
 
- #define __MCPIO_HPP__
 
- #include <iostream>
 
- #include <fstream>
 
- #include <vector>
 
- #include <array>
 
- #include <deque>
 
- #include <queue>
 
- #include <string>
 
- #include <atomic>
 
- #include <optional>
 
- #include <bsd/stdlib.h> // arc4random_buf
 
- #include <boost/asio.hpp>
 
- #include <boost/thread.hpp>
 
- #include <boost/chrono.hpp>
 
- #include "types.hpp"
 
- #include "corotypes.hpp"
 
- using boost::asio::ip::tcp;
 
- // Classes to represent stored precomputed data (e.g., multiplication triples)
 
- template<typename T, typename N>
 
- class PreCompStorage {
 
- public:
 
-     PreCompStorage() : name(N::name), depth(0), count(0) {}
 
-     PreCompStorage(unsigned player, ProcessingMode mode,
 
-         const char *filenameprefix, unsigned thread_num);
 
-     void init(unsigned player, ProcessingMode mode,
 
-         const char *filenameprefix, unsigned thread_num,
 
-         nbits_t depth = 0, nbits_t width = 1);
 
-     void get(T& nextval);
 
-     inline void inc() { ++count; }
 
-     inline size_t get_stats() { return count; }
 
-     inline void reset_stats() { count = 0; }
 
- private:
 
-     std::ifstream storage;
 
-     std::string name;
 
-     nbits_t depth;
 
-     nbits_t width;
 
-     size_t count;
 
- };
 
- // If we want to send Lamport clocks in messages, define this.  It adds
 
- // an 8-byte header to each message (length and Lamport clock), so it
 
- // has a small network cost.  We always define and pass the Lamport
 
- // clock member of MPCIO to the IO functions for simplicity, but they're
 
- // ignored if this isn't defined
 
- #define SEND_LAMPORT_CLOCKS
 
- using lamport_t = uint32_t;
 
- using atomic_lamport_t = std::atomic<lamport_t>;
 
- using opt_lamport_t = std::optional<lamport_t>;
 
- #ifdef SEND_LAMPORT_CLOCKS
 
- struct MessageWithHeader {
 
-     std::string header;
 
-     std::string message;
 
-     MessageWithHeader(std::string &&msg, lamport_t lamport) :
 
-         message(std::move(msg)) {
 
-             char hdr[sizeof(uint32_t) + sizeof(lamport_t)];
 
-             uint32_t msglen = uint32_t(message.size());
 
-             memmove(hdr, &msglen, sizeof(msglen));
 
-             memmove(hdr+sizeof(msglen), &lamport, sizeof(lamport));
 
-             header.assign(hdr, sizeof(hdr));
 
-     }
 
- };
 
- #endif
 
- // A class to wrap a socket to another MPC party.  This wrapping allows
 
- // us to do some useful logging, and perform async_writes transparently
 
- // to the application.
 
- class MPCSingleIO {
 
-     tcp::socket sock;
 
-     size_t totread, totwritten;
 
-     std::string dest;
 
-     int thread_num;
 
- #ifdef RECORD_IOTRACE
 
-     std::vector<ssize_t> iotrace;
 
- #endif
 
-     // To avoid blocking if both we and our peer are trying to send
 
-     // something very large, and neither side is receiving, we will send
 
-     // with async_write.  But this has a number of implications:
 
-     // - The data to be sent has to be copied into this MPCSingleIO,
 
-     //   since asio::buffer pointers are not guaranteed to remain valid
 
-     //   after the end of the coroutine that created them
 
-     // - We have to keep a queue of messages to be sent, in case
 
-     //   coroutines call send() before the previous message has finished
 
-     //   being sent
 
-     // - This queue may be accessed from the async_write thread as well
 
-     //   as the work thread that uses this MPCSingleIO directly (there
 
-     //   should be only one of the latter), so we need some locking
 
-     // This is where we accumulate data passed in queue()
 
-     std::string dataqueue;
 
-     // When send() is called, the above dataqueue is appended to this
 
-     // messagequeue, and the dataqueue is reset.  If messagequeue was
 
-     // empty before this append, launch async_write to write the first
 
-     // thing in the messagequeue.  When async_write completes, it will
 
-     // delete the first thing in the messagequeue, and see if there are
 
-     // any more elements.  If so, it will start another async_write.
 
-     // The invariant is that there is an async_write currently running
 
-     // iff messagequeue is nonempty.
 
- #ifdef SEND_LAMPORT_CLOCKS
 
-     std::queue<MessageWithHeader> messagequeue;
 
- #else
 
-     std::queue<std::string> messagequeue;
 
- #endif
 
-     // If a single message is broken into chunks in order to get the
 
-     // first part of it out on the wire while the rest of it is still
 
-     // being computed, we want the Lamport clock of all the chunks to be
 
-     // that of when the message is first created.  This value will be
 
-     // nullopt when there has been no queue() since the last explicit
 
-     // send() (as opposed to the implicit send() called by queue()
 
-     // itself if it wants to get a chunk on its way), and will be set to
 
-     // the current lamport clock when that first queue() after each
 
-     // explicit send() happens.
 
-     opt_lamport_t message_lamport;
 
- #ifdef SEND_LAMPORT_CLOCKS
 
-     // If Lamport clocks are being sent, then the data stream is divided
 
-     // into chunks, each with a header containing the length of the
 
-     // chunk and the Lamport clock.  So when we read, we'll read a whole
 
-     // chunk, and store it here.  Then calls to recv() will read pieces
 
-     // of this buffer until it has all been read, and then read the next
 
-     // header and chunk.
 
-     std::string recvdata;
 
-     size_t recvdataremain;
 
- #endif
 
-     // Never touch the above messagequeue without holding this lock (you
 
-     // _can_ touch the strings it contains, though, if you looked one up
 
-     // while holding the lock).
 
-     boost::mutex messagequeuelock;
 
-     // Asynchronously send the first message from the message queue.
 
-     // * The messagequeuelock must be held when this is called! *
 
-     // This method may be called from either thread (the work thread or
 
-     // the async_write handler thread).
 
-     void async_send_from_msgqueue();
 
- public:
 
-     MPCSingleIO(tcp::socket &&sock, const char *dest, int thread_num) :
 
-         sock(std::move(sock)), totread(0), totwritten(0), dest(dest),
 
-         thread_num(thread_num)
 
- #ifdef SEND_LAMPORT_CLOCKS
 
-         , recvdataremain(0)
 
- #endif
 
-         {}
 
-     // Returns 1 if a new message is started, 0 otherwise
 
-     size_t queue(const void *data, size_t len, lamport_t lamport);
 
-     void send(bool implicit_send = false);
 
-     size_t recv(void *data, size_t len, lamport_t &lamport);
 
- #ifdef RECORD_IOTRACE
 
-     void dumptrace(std::ostream &os, const char *label = NULL);
 
-     void resettrace() {
 
-         iotrace.clear();
 
-     }
 
- #endif
 
- };
 
- // A base class to represent all of a computation peer or server's IO,
 
- // either to other parties or to local storage (the computation and
 
- // server cases are separate subclasses below).
 
- struct MPCIO {
 
-     int player;
 
-     ProcessingMode mode;
 
-     size_t num_threads;
 
-     atomic_lamport_t lamport;
 
-     std::vector<size_t> msgs_sent;
 
-     std::vector<size_t> msg_bytes_sent;
 
-     std::vector<size_t> aes_ops;
 
-     boost::chrono::steady_clock::time_point steady_start;
 
-     boost::chrono::process_cpu_clock::time_point cpu_start;
 
-     MPCIO(int player, ProcessingMode mode, size_t num_threads) :
 
-         player(player), mode(mode),
 
-         num_threads(num_threads), lamport(0)
 
-     {
 
-         reset_stats();
 
-     }
 
-     void reset_stats();
 
-     static void dump_memusage(std::ostream &os);
 
-     void dump_stats(std::ostream &os);
 
- };
 
- // A class to represent all of a computation peer's IO, either to other
 
- // parties or to local storage
 
- struct MPCPeerIO : public MPCIO {
 
-     // We use a deque here instead of a vector because you can't have a
 
-     // vector of a type without a copy constructor (tcp::socket is the
 
-     // culprit), but you can have a deque of those for some reason.
 
-     std::deque<MPCSingleIO> peerios;
 
-     std::deque<MPCSingleIO> serverios;
 
-     std::vector<PreCompStorage<MultTriple, MultTripleName>> multtriples;
 
-     std::vector<PreCompStorage<HalfTriple, HalfTripleName>> halftriples;
 
-     std::vector<PreCompStorage<AndTriple, AndTripleName>> andtriples;
 
-     std::vector<PreCompStorage<
 
-         SelectTriple<value_t>, ValSelectTripleName>> valselecttriples;
 
-     std::vector<PreCompStorage<CDPF, CDPFName>> cdpfs;
 
-     // The outer vector is (like above) one item per thread
 
-     // The inner array is indexed by DPF depth (depth d is at entry d-1)
 
-     // We have one of these whole vectors-of-arrays for each RDPF width,
 
-     // wrapped into a tuple
 
-     template <nbits_t WIDTH>
 
-     using RDPFPrecomps =
 
-         std::vector<std::array<
 
-             PreCompStorage<RDPFTriple<WIDTH>, RDPFTripleName>,ADDRESS_MAX_BITS>>;
 
-     template <nbits_t WIDTH>
 
-     using IRDPFPrecomps =
 
-         std::vector<std::array<
 
-             PreCompStorage<RDPFTriple<WIDTH>, IRDPFTripleName>,ADDRESS_MAX_BITS>>;
 
-     std::tuple<
 
-         RDPFPrecomps<1>,
 
-         RDPFPrecomps<2>,
 
-         RDPFPrecomps<3>,
 
-         RDPFPrecomps<4>,
 
-         RDPFPrecomps<5>> rdpftriples;
 
-     std::tuple<
 
-         IRDPFPrecomps<1>,
 
-         IRDPFPrecomps<2>,
 
-         IRDPFPrecomps<3>,
 
-         IRDPFPrecomps<4>,
 
-         IRDPFPrecomps<5>> irdpftriples;
 
-     MPCPeerIO(unsigned player, ProcessingMode mode,
 
-             std::deque<tcp::socket> &peersocks,
 
-             std::deque<tcp::socket> &serversocks);
 
-     void dump_precomp_stats(std::ostream &os);
 
-     void reset_precomp_stats();
 
-     void dump_stats(std::ostream &os);
 
- };
 
- // A class to represent all of the server party's IO, either to
 
- // computational parties or to local storage
 
- struct MPCServerIO : public MPCIO {
 
-     std::deque<MPCSingleIO> p0ios;
 
-     std::deque<MPCSingleIO> p1ios;
 
-     // The outer vector is (like above) one item per thread
 
-     // The inner array is indexed by DPF depth (depth d is at entry d-1)
 
-     // We have one of these whole vectors-of-arrays for each RDPF width,
 
-     // wrapped into a tuple
 
-     template <nbits_t WIDTH>
 
-     using RDPFPrecomps =
 
-         std::vector<std::array<
 
-             PreCompStorage<RDPFPair<WIDTH>, RDPFPairName>,ADDRESS_MAX_BITS>>;
 
-     template <nbits_t WIDTH>
 
-     using IRDPFPrecomps =
 
-         std::vector<std::array<
 
-             PreCompStorage<RDPFPair<WIDTH>, IRDPFPairName>,ADDRESS_MAX_BITS>>;
 
-     std::tuple<
 
-         RDPFPrecomps<1>,
 
-         RDPFPrecomps<2>,
 
-         RDPFPrecomps<3>,
 
-         RDPFPrecomps<4>,
 
-         RDPFPrecomps<5>> rdpfpairs;
 
-     std::tuple<
 
-         IRDPFPrecomps<1>,
 
-         IRDPFPrecomps<2>,
 
-         IRDPFPrecomps<3>,
 
-         IRDPFPrecomps<4>,
 
-         IRDPFPrecomps<5>> irdpfpairs;
 
-     MPCServerIO(ProcessingMode mode,
 
-             std::deque<tcp::socket> &p0socks,
 
-             std::deque<tcp::socket> &p1socks);
 
-     void dump_precomp_stats(std::ostream &os);
 
-     void reset_precomp_stats();
 
-     void dump_stats(std::ostream &os);
 
- };
 
- class MPCSingleIOStream {
 
-     MPCSingleIO &sio;
 
-     lamport_t &lamport;
 
-     size_t &msgs_sent;
 
-     size_t &msg_bytes_sent;
 
- public:
 
-     MPCSingleIOStream(MPCSingleIO &sio, lamport_t &lamport,
 
-             size_t &msgs_sent, size_t &msg_bytes_sent) :
 
-         sio(sio), lamport(lamport), msgs_sent(msgs_sent),
 
-         msg_bytes_sent(msg_bytes_sent) {}
 
-     MPCSingleIOStream& write(const char *data, std::streamsize len) {
 
-         size_t newmsg = sio.queue(data, len, lamport);
 
-         msgs_sent += newmsg;
 
-         msg_bytes_sent += len;
 
-         return *this;
 
-     }
 
-     MPCSingleIOStream& read(char *data, std::streamsize len) {
 
-         sio.recv(data, len, lamport);
 
-         return *this;
 
-     }
 
- };
 
- // A handle to one thread's sockets and streams in a MPCIO
 
- class MPCTIO {
 
-     int thread_num;
 
-     // The number of threads a coroutine using this MPCTIO can use for
 
-     // local computation (no communication and no yielding).  Multiple
 
-     // coroutines with the same MPCTIO can have this value larger than
 
-     // 1, since they will not be able to use multiple threads at the
 
-     // same time.
 
-     int local_cpu_nthreads;
 
-     // The number of threads a coroutine using this MPCTIO can launch
 
-     // into separate MPCTIOs with their own communication.  It is
 
-     // important that at most one coroutine using this MPCTIO can have
 
-     // this value set larger than 1, since all MPCTIOs with the same
 
-     // thread_num (and so using the same sockets) have to be controlled
 
-     // by the same run_coroutines(tio, ...) call.
 
-     int communication_nthreads;
 
-     lamport_t thread_lamport;
 
-     MPCIO &mpcio;
 
-     std::optional<MPCSingleIOStream> peer_iostream;
 
-     std::optional<MPCSingleIOStream> server_iostream;
 
-     std::optional<MPCSingleIOStream> p0_iostream;
 
-     std::optional<MPCSingleIOStream> p1_iostream;
 
- #ifdef VERBOSE_COMMS
 
-     size_t round_num;
 
- #endif
 
-     // We implement SelectTriple<bit_t> by fetching a single AndTriple
 
-     // and using it for producing 64 bitwise SelectTriple<bit_t>s.
 
-     AndTriple last_andtriple;
 
-     nbits_t last_andtriple_bits_remaining;
 
-     // We allow for prefetching of SelectTriple<DPFnode>s to save one
 
-     // network round per level when constructing RDPFs
 
-     std::deque<SelectTriple<DPFnode>> queued_nodeselecttriples;
 
-     // For P0 and P1, it should always be the case that
 
-     // remaining_nodesselecttriples equals
 
-     // queued_nodeselecttriples.size().  P2 does not store anything in
 
-     // queued_nodeselecttriples, however.
 
-     size_t remaining_nodesselecttriples;
 
- public:
 
-     MPCTIO(MPCIO &mpcio, int thread_num, int num_threads = 1);
 
-     // Sync our per-thread lamport clock with the master one in the
 
-     // mpcio.  You only need to call this explicitly if your MPCTIO
 
-     // outlives your thread (in which case call it after the join), or
 
-     // if your threads do interthread communication amongst themselves
 
-     // (in which case call it in the sending thread before the send, and
 
-     // call it in the receiving thread after the receive).  If you want
 
-     // to call MPCIO::dump_stats() in the middle of a run (while the
 
-     // MPCTIO is still alive), call this as well.
 
-     void sync_lamport();
 
-     // Only call this if you can be sure that there are no outstanding
 
-     // messages in flight, you can call it on all existing MPCTIOs, and
 
-     // you really want to reset the Lamport clock in the midding of a
 
-     // run.
 
-     void reset_lamport();
 
-     // The normal case, where the MPCIO is created inside the thread,
 
-     // and so destructed when the thread ends, is handled automatically
 
-     // here.
 
-     ~MPCTIO() {
 
-         send();
 
-         sync_lamport();
 
-     }
 
-     // Computational peers use these functions:
 
-     // Queue up data to the peer or to the server
 
-     void queue_peer(const void *data, size_t len);
 
-     void queue_server(const void *data, size_t len);
 
-     // Receive data from the peer or to the server
 
-     size_t recv_peer(void *data, size_t len);
 
-     size_t recv_server(void *data, size_t len);
 
-     // Or get these MPCSingleIOStreams
 
-     MPCSingleIOStream& iostream_peer() { return peer_iostream.value(); }
 
-     MPCSingleIOStream& iostream_server() { return server_iostream.value(); }
 
-     // The server uses these functions:
 
-     // Queue up data to p0 or p1
 
-     void queue_p0(const void *data, size_t len);
 
-     void queue_p1(const void *data, size_t len);
 
-     // Receive data from p0 or p1
 
-     size_t recv_p0(void *data, size_t len);
 
-     size_t recv_p1(void *data, size_t len);
 
-     // Or get these MPCSingleIOStreams
 
-     MPCSingleIOStream& iostream_p0() { return p0_iostream.value(); }
 
-     MPCSingleIOStream& iostream_p1() { return p1_iostream.value(); }
 
-     // Everyone can use the remaining functions.
 
-     // Send all queued data for this thread
 
-     void send();
 
-     // Functions to get precomputed values.  If we're in the online
 
-     // phase, get them from PreCompStorage.  If we're in the
 
-     // preprocessing phase, read them from the server.
 
-     MultTriple multtriple(yield_t &yield);
 
-     HalfTriple halftriple(yield_t &yield, bool tally=true);
 
-     AndTriple andtriple(yield_t &yield);
 
-     void request_nodeselecttriples(yield_t &yield, size_t num);
 
-     SelectTriple<DPFnode> nodeselecttriple(yield_t &yield);
 
-     SelectTriple<value_t> valselecttriple(yield_t &yield);
 
-     SelectTriple<bit_t> bitselecttriple(yield_t &yield);
 
-     // These ones only work during the online phase
 
-     // Computational peers call:
 
-     template <nbits_t WIDTH = 1>
 
-     RDPFTriple<WIDTH> rdpftriple(yield_t &yield, nbits_t depth,
 
-         bool incremental = false, bool keep_expansion = true);
 
-     // The server calls:
 
-     template <nbits_t WIDTH = 1>
 
-     RDPFPair<WIDTH> rdpfpair(yield_t &yield, nbits_t depth,
 
-         bool incremental = false);
 
-     // Anyone can call:
 
-     CDPF cdpf(yield_t &yield);
 
-     // Accessors
 
-     inline int player() { return mpcio.player; }
 
-     inline bool preprocessing() { return mpcio.mode == MODE_PREPROCESSING; }
 
-     inline bool is_server() { return mpcio.player == 2; }
 
-     inline size_t& aes_ops() { return mpcio.aes_ops[thread_num]; }
 
-     inline size_t msgs_sent() { return mpcio.msgs_sent[thread_num]; }
 
-     inline int cpu_nthreads(int nthreads=0) {
 
-         int res = local_cpu_nthreads;
 
-         if (nthreads > 0) {
 
-             local_cpu_nthreads = nthreads;
 
-         }
 
-         return res;
 
-     }
 
-     inline int comm_nthreads(int nthreads=0) {
 
-         int res = communication_nthreads;
 
-         if (nthreads > 0) {
 
-             communication_nthreads = nthreads;
 
-         }
 
-         return res;
 
-     }
 
- };
 
- // Set up the socket connections between the two computational parties
 
- // (P0 and P1) and the server party (P2).  For each connection, the
 
- // lower-numbered party does the accept() and the higher-numbered party
 
- // does the connect().
 
- // Computational parties call this version with player=0 or 1
 
- void mpcio_setup_computational(unsigned player,
 
-     boost::asio::io_context &io_context,
 
-     const char *p0addr,  // can be NULL when player=0
 
-     int num_threads,
 
-     std::deque<tcp::socket> &peersocks,
 
-     std::deque<tcp::socket> &serversocks);
 
- // Server calls this version
 
- void mpcio_setup_server(boost::asio::io_context &io_context,
 
-     const char *p0addr, const char *p1addr, int num_threads,
 
-     std::deque<tcp::socket> &p0socks,
 
-     std::deque<tcp::socket> &p1socks);
 
- #include "mpcio.tcc"
 
- #endif
 
 
  |