mpcio.hpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. #ifndef __MCPIO_HPP__
  2. #define __MCPIO_HPP__
  3. #include <iostream>
  4. #include <fstream>
  5. #include <vector>
  6. #include <array>
  7. #include <deque>
  8. #include <queue>
  9. #include <string>
  10. #include <atomic>
  11. #include <optional>
  12. #include <bsd/stdlib.h> // arc4random_buf
  13. #include <boost/asio.hpp>
  14. #include <boost/thread.hpp>
  15. #include <boost/chrono.hpp>
  16. #include "types.hpp"
  17. #include "corotypes.hpp"
  18. using boost::asio::ip::tcp;
  19. // Classes to represent stored precomputed data (e.g., multiplication triples)
  20. template<typename T, typename N>
  21. class PreCompStorage {
  22. public:
  23. PreCompStorage() : name(N::name), depth(0), count(0) {}
  24. PreCompStorage(unsigned player, ProcessingMode mode,
  25. const char *filenameprefix, unsigned thread_num);
  26. void init(unsigned player, ProcessingMode mode,
  27. const char *filenameprefix, unsigned thread_num,
  28. nbits_t depth = 0, nbits_t width = 1);
  29. void get(T& nextval);
  30. inline void inc() { ++count; }
  31. inline size_t get_stats() { return count; }
  32. inline void reset_stats() { count = 0; }
  33. private:
  34. std::ifstream storage;
  35. std::string name;
  36. nbits_t depth;
  37. nbits_t width;
  38. size_t count;
  39. };
  40. // If we want to send Lamport clocks in messages, define this. It adds
  41. // an 8-byte header to each message (length and Lamport clock), so it
  42. // has a small network cost. We always define and pass the Lamport
  43. // clock member of MPCIO to the IO functions for simplicity, but they're
  44. // ignored if this isn't defined
  45. #define SEND_LAMPORT_CLOCKS
  46. using lamport_t = uint32_t;
  47. using atomic_lamport_t = std::atomic<lamport_t>;
  48. using opt_lamport_t = std::optional<lamport_t>;
  49. #ifdef SEND_LAMPORT_CLOCKS
  50. struct MessageWithHeader {
  51. std::string header;
  52. std::string message;
  53. MessageWithHeader(std::string &&msg, lamport_t lamport) :
  54. message(std::move(msg)) {
  55. char hdr[sizeof(uint32_t) + sizeof(lamport_t)];
  56. uint32_t msglen = uint32_t(message.size());
  57. memmove(hdr, &msglen, sizeof(msglen));
  58. memmove(hdr+sizeof(msglen), &lamport, sizeof(lamport));
  59. header.assign(hdr, sizeof(hdr));
  60. }
  61. };
  62. #endif
  63. // A class to wrap a socket to another MPC party. This wrapping allows
  64. // us to do some useful logging, and perform async_writes transparently
  65. // to the application.
  66. class MPCSingleIO {
  67. tcp::socket sock;
  68. size_t totread, totwritten;
  69. std::string dest;
  70. int thread_num;
  71. #ifdef RECORD_IOTRACE
  72. std::vector<ssize_t> iotrace;
  73. #endif
  74. // To avoid blocking if both we and our peer are trying to send
  75. // something very large, and neither side is receiving, we will send
  76. // with async_write. But this has a number of implications:
  77. // - The data to be sent has to be copied into this MPCSingleIO,
  78. // since asio::buffer pointers are not guaranteed to remain valid
  79. // after the end of the coroutine that created them
  80. // - We have to keep a queue of messages to be sent, in case
  81. // coroutines call send() before the previous message has finished
  82. // being sent
  83. // - This queue may be accessed from the async_write thread as well
  84. // as the work thread that uses this MPCSingleIO directly (there
  85. // should be only one of the latter), so we need some locking
  86. // This is where we accumulate data passed in queue()
  87. std::string dataqueue;
  88. // When send() is called, the above dataqueue is appended to this
  89. // messagequeue, and the dataqueue is reset. If messagequeue was
  90. // empty before this append, launch async_write to write the first
  91. // thing in the messagequeue. When async_write completes, it will
  92. // delete the first thing in the messagequeue, and see if there are
  93. // any more elements. If so, it will start another async_write.
  94. // The invariant is that there is an async_write currently running
  95. // iff messagequeue is nonempty.
  96. #ifdef SEND_LAMPORT_CLOCKS
  97. std::queue<MessageWithHeader> messagequeue;
  98. #else
  99. std::queue<std::string> messagequeue;
  100. #endif
  101. // If a single message is broken into chunks in order to get the
  102. // first part of it out on the wire while the rest of it is still
  103. // being computed, we want the Lamport clock of all the chunks to be
  104. // that of when the message is first created. This value will be
  105. // nullopt when there has been no queue() since the last explicit
  106. // send() (as opposed to the implicit send() called by queue()
  107. // itself if it wants to get a chunk on its way), and will be set to
  108. // the current lamport clock when that first queue() after each
  109. // explicit send() happens.
  110. opt_lamport_t message_lamport;
  111. #ifdef SEND_LAMPORT_CLOCKS
  112. // If Lamport clocks are being sent, then the data stream is divided
  113. // into chunks, each with a header containing the length of the
  114. // chunk and the Lamport clock. So when we read, we'll read a whole
  115. // chunk, and store it here. Then calls to recv() will read pieces
  116. // of this buffer until it has all been read, and then read the next
  117. // header and chunk.
  118. std::string recvdata;
  119. size_t recvdataremain;
  120. #endif
  121. // Never touch the above messagequeue without holding this lock (you
  122. // _can_ touch the strings it contains, though, if you looked one up
  123. // while holding the lock).
  124. boost::mutex messagequeuelock;
  125. // Asynchronously send the first message from the message queue.
  126. // * The messagequeuelock must be held when this is called! *
  127. // This method may be called from either thread (the work thread or
  128. // the async_write handler thread).
  129. void async_send_from_msgqueue();
  130. public:
  131. MPCSingleIO(tcp::socket &&sock, const char *dest, int thread_num) :
  132. sock(std::move(sock)), totread(0), totwritten(0), dest(dest),
  133. thread_num(thread_num)
  134. #ifdef SEND_LAMPORT_CLOCKS
  135. , recvdataremain(0)
  136. #endif
  137. {}
  138. // Returns 1 if a new message is started, 0 otherwise
  139. size_t queue(const void *data, size_t len, lamport_t lamport);
  140. void send(bool implicit_send = false);
  141. size_t recv(void *data, size_t len, lamport_t &lamport);
  142. #ifdef RECORD_IOTRACE
  143. void dumptrace(std::ostream &os, const char *label = NULL);
  144. void resettrace() {
  145. iotrace.clear();
  146. }
  147. #endif
  148. };
  149. // A base class to represent all of a computation peer or server's IO,
  150. // either to other parties or to local storage (the computation and
  151. // server cases are separate subclasses below).
  152. struct MPCIO {
  153. int player;
  154. ProcessingMode mode;
  155. size_t num_threads;
  156. atomic_lamport_t lamport;
  157. std::vector<size_t> msgs_sent;
  158. std::vector<size_t> msg_bytes_sent;
  159. std::vector<size_t> aes_ops;
  160. boost::chrono::steady_clock::time_point steady_start;
  161. boost::chrono::process_cpu_clock::time_point cpu_start;
  162. MPCIO(int player, ProcessingMode mode, size_t num_threads) :
  163. player(player), mode(mode),
  164. num_threads(num_threads), lamport(0)
  165. {
  166. reset_stats();
  167. }
  168. void reset_stats();
  169. static void dump_memusage(std::ostream &os);
  170. void dump_stats(std::ostream &os);
  171. // Make MPCIO objects non-copyable; otherwise the Lamport clock
  172. // gets duplicated and not kept in sync.
  173. MPCIO(const MPCIO&) = delete;
  174. MPCIO& operator=(const MPCIO&) = delete;
  175. };
  176. // A class to represent all of a computation peer's IO, either to other
  177. // parties or to local storage
  178. struct MPCPeerIO : public MPCIO {
  179. // We use a deque here instead of a vector because you can't have a
  180. // vector of a type without a copy constructor (tcp::socket is the
  181. // culprit), but you can have a deque of those for some reason.
  182. std::deque<MPCSingleIO> peerios;
  183. std::deque<MPCSingleIO> serverios;
  184. std::vector<PreCompStorage<MultTriple, MultTripleName>> multtriples;
  185. std::vector<PreCompStorage<HalfTriple, HalfTripleName>> halftriples;
  186. std::vector<PreCompStorage<AndTriple, AndTripleName>> andtriples;
  187. std::vector<PreCompStorage<
  188. SelectTriple<value_t>, ValSelectTripleName>> valselecttriples;
  189. std::vector<PreCompStorage<CDPF, CDPFName>> cdpfs;
  190. // The outer vector is (like above) one item per thread
  191. // The inner array is indexed by DPF depth (depth d is at entry d-1)
  192. // We have one of these whole vectors-of-arrays for each RDPF width,
  193. // wrapped into a tuple
  194. template <nbits_t WIDTH>
  195. using RDPFPrecomps =
  196. std::vector<std::array<
  197. PreCompStorage<RDPFTriple<WIDTH>, RDPFTripleName>,ADDRESS_MAX_BITS>>;
  198. template <nbits_t WIDTH>
  199. using IRDPFPrecomps =
  200. std::vector<std::array<
  201. PreCompStorage<RDPFTriple<WIDTH>, IRDPFTripleName>,ADDRESS_MAX_BITS>>;
  202. std::tuple<
  203. RDPFPrecomps<1>,
  204. RDPFPrecomps<2>,
  205. RDPFPrecomps<3>,
  206. RDPFPrecomps<4>,
  207. RDPFPrecomps<5>> rdpftriples;
  208. std::tuple<
  209. IRDPFPrecomps<1>,
  210. IRDPFPrecomps<2>,
  211. IRDPFPrecomps<3>,
  212. IRDPFPrecomps<4>,
  213. IRDPFPrecomps<5>> irdpftriples;
  214. MPCPeerIO(unsigned player, ProcessingMode mode,
  215. std::deque<tcp::socket> &peersocks,
  216. std::deque<tcp::socket> &serversocks);
  217. void dump_precomp_stats(std::ostream &os);
  218. void reset_precomp_stats();
  219. void dump_stats(std::ostream &os);
  220. };
  221. // A class to represent all of the server party's IO, either to
  222. // computational parties or to local storage
  223. struct MPCServerIO : public MPCIO {
  224. std::deque<MPCSingleIO> p0ios;
  225. std::deque<MPCSingleIO> p1ios;
  226. // The outer vector is (like above) one item per thread
  227. // The inner array is indexed by DPF depth (depth d is at entry d-1)
  228. // We have one of these whole vectors-of-arrays for each RDPF width,
  229. // wrapped into a tuple
  230. template <nbits_t WIDTH>
  231. using RDPFPrecomps =
  232. std::vector<std::array<
  233. PreCompStorage<RDPFPair<WIDTH>, RDPFPairName>,ADDRESS_MAX_BITS>>;
  234. template <nbits_t WIDTH>
  235. using IRDPFPrecomps =
  236. std::vector<std::array<
  237. PreCompStorage<RDPFPair<WIDTH>, IRDPFPairName>,ADDRESS_MAX_BITS>>;
  238. std::tuple<
  239. RDPFPrecomps<1>,
  240. RDPFPrecomps<2>,
  241. RDPFPrecomps<3>,
  242. RDPFPrecomps<4>,
  243. RDPFPrecomps<5>> rdpfpairs;
  244. std::tuple<
  245. IRDPFPrecomps<1>,
  246. IRDPFPrecomps<2>,
  247. IRDPFPrecomps<3>,
  248. IRDPFPrecomps<4>,
  249. IRDPFPrecomps<5>> irdpfpairs;
  250. MPCServerIO(ProcessingMode mode,
  251. std::deque<tcp::socket> &p0socks,
  252. std::deque<tcp::socket> &p1socks);
  253. void dump_precomp_stats(std::ostream &os);
  254. void reset_precomp_stats();
  255. void dump_stats(std::ostream &os);
  256. };
  257. class MPCSingleIOStream {
  258. MPCSingleIO &sio;
  259. lamport_t &lamport;
  260. size_t &msgs_sent;
  261. size_t &msg_bytes_sent;
  262. public:
  263. MPCSingleIOStream(MPCSingleIO &sio, lamport_t &lamport,
  264. size_t &msgs_sent, size_t &msg_bytes_sent) :
  265. sio(sio), lamport(lamport), msgs_sent(msgs_sent),
  266. msg_bytes_sent(msg_bytes_sent) {}
  267. MPCSingleIOStream& write(const char *data, std::streamsize len) {
  268. size_t newmsg = sio.queue(data, len, lamport);
  269. msgs_sent += newmsg;
  270. msg_bytes_sent += len;
  271. return *this;
  272. }
  273. MPCSingleIOStream& read(char *data, std::streamsize len) {
  274. sio.recv(data, len, lamport);
  275. return *this;
  276. }
  277. };
  278. // A handle to one thread's sockets and streams in a MPCIO
  279. class MPCTIO {
  280. int thread_num;
  281. // The number of threads a coroutine using this MPCTIO can use for
  282. // local computation (no communication and no yielding). Multiple
  283. // coroutines with the same MPCTIO can have this value larger than
  284. // 1, since they will not be able to use multiple threads at the
  285. // same time.
  286. int local_cpu_nthreads;
  287. // The number of threads a coroutine using this MPCTIO can launch
  288. // into separate MPCTIOs with their own communication. It is
  289. // important that at most one coroutine using this MPCTIO can have
  290. // this value set larger than 1, since all MPCTIOs with the same
  291. // thread_num (and so using the same sockets) have to be controlled
  292. // by the same run_coroutines(tio, ...) call.
  293. int communication_nthreads;
  294. lamport_t thread_lamport;
  295. MPCIO &mpcio;
  296. std::optional<MPCSingleIOStream> peer_iostream;
  297. std::optional<MPCSingleIOStream> server_iostream;
  298. std::optional<MPCSingleIOStream> p0_iostream;
  299. std::optional<MPCSingleIOStream> p1_iostream;
  300. #ifdef VERBOSE_COMMS
  301. size_t round_num;
  302. #endif
  303. // We implement SelectTriple<bit_t> by fetching a single AndTriple
  304. // and using it for producing 64 bitwise SelectTriple<bit_t>s.
  305. AndTriple last_andtriple;
  306. nbits_t last_andtriple_bits_remaining;
  307. // We allow for prefetching of SelectTriple<DPFnode>s to save one
  308. // network round per level when constructing RDPFs
  309. std::deque<SelectTriple<DPFnode>> queued_nodeselecttriples;
  310. // For P0 and P1, it should always be the case that
  311. // remaining_nodesselecttriples equals
  312. // queued_nodeselecttriples.size(). P2 does not store anything in
  313. // queued_nodeselecttriples, however.
  314. size_t remaining_nodesselecttriples;
  315. public:
  316. // Make MPCTIO objects non-copyable; otherwise the Lamport clock
  317. // gets duplicated and not kept in sync.
  318. MPCTIO(const MPCTIO&) = delete;
  319. MPCTIO& operator=(const MPCTIO&) = delete;
  320. MPCTIO(MPCIO &mpcio, int thread_num, int num_threads = 1);
  321. // Sync our per-thread lamport clock with the master one in the
  322. // mpcio. You only need to call this explicitly if your MPCTIO
  323. // outlives your thread (in which case call it after the join), or
  324. // if your threads do interthread communication amongst themselves
  325. // (in which case call it in the sending thread before the send, and
  326. // call it in the receiving thread after the receive). If you want
  327. // to call MPCIO::dump_stats() in the middle of a run (while the
  328. // MPCTIO is still alive), call this as well.
  329. void sync_lamport();
  330. // Only call this if you can be sure that there are no outstanding
  331. // messages in flight, you can call it on all existing MPCTIOs, and
  332. // you really want to reset the Lamport clock in the midding of a
  333. // run.
  334. void reset_lamport();
  335. // Read the thread_lamport counter, for performance debugging
  336. lamport_t get_thread_lamport() {
  337. return thread_lamport;
  338. }
  339. // The normal case, where the MPCIO is created inside the thread,
  340. // and so destructed when the thread ends, is handled automatically
  341. // here.
  342. ~MPCTIO() {
  343. send();
  344. sync_lamport();
  345. }
  346. // Computational peers use these functions:
  347. // Queue up data to the peer or to the server
  348. void queue_peer(const void *data, size_t len);
  349. void queue_server(const void *data, size_t len);
  350. // Receive data from the peer or to the server
  351. size_t recv_peer(void *data, size_t len);
  352. size_t recv_server(void *data, size_t len);
  353. // Or get these MPCSingleIOStreams
  354. MPCSingleIOStream& iostream_peer() { return peer_iostream.value(); }
  355. MPCSingleIOStream& iostream_server() { return server_iostream.value(); }
  356. // The server uses these functions:
  357. // Queue up data to p0 or p1
  358. void queue_p0(const void *data, size_t len);
  359. void queue_p1(const void *data, size_t len);
  360. // Receive data from p0 or p1
  361. size_t recv_p0(void *data, size_t len);
  362. size_t recv_p1(void *data, size_t len);
  363. // Or get these MPCSingleIOStreams
  364. MPCSingleIOStream& iostream_p0() { return p0_iostream.value(); }
  365. MPCSingleIOStream& iostream_p1() { return p1_iostream.value(); }
  366. // Everyone can use the remaining functions.
  367. // Send all queued data for this thread
  368. void send();
  369. // Functions to get precomputed values. If we're in the online
  370. // phase, get them from PreCompStorage. If we're in the
  371. // preprocessing phase, read them from the server.
  372. MultTriple multtriple(yield_t &yield);
  373. HalfTriple halftriple(yield_t &yield, bool tally=true);
  374. AndTriple andtriple(yield_t &yield);
  375. void request_nodeselecttriples(yield_t &yield, size_t num);
  376. SelectTriple<DPFnode> nodeselecttriple(yield_t &yield);
  377. SelectTriple<value_t> valselecttriple(yield_t &yield);
  378. SelectTriple<bit_t> bitselecttriple(yield_t &yield);
  379. // These ones only work during the online phase
  380. // Computational peers call:
  381. template <nbits_t WIDTH = 1>
  382. RDPFTriple<WIDTH> rdpftriple(yield_t &yield, nbits_t depth,
  383. bool incremental = false, bool keep_expansion = true);
  384. // The server calls:
  385. template <nbits_t WIDTH = 1>
  386. RDPFPair<WIDTH> rdpfpair(yield_t &yield, nbits_t depth,
  387. bool incremental = false);
  388. // Anyone can call:
  389. CDPF cdpf(yield_t &yield);
  390. // Accessors
  391. inline int player() { return mpcio.player; }
  392. inline bool preprocessing() { return mpcio.mode == MODE_PREPROCESSING; }
  393. inline bool is_server() { return mpcio.player == 2; }
  394. inline size_t& aes_ops() { return mpcio.aes_ops[thread_num]; }
  395. inline size_t msgs_sent() { return mpcio.msgs_sent[thread_num]; }
  396. inline int cpu_nthreads(int nthreads=0) {
  397. int res = local_cpu_nthreads;
  398. if (nthreads > 0) {
  399. local_cpu_nthreads = nthreads;
  400. }
  401. return res;
  402. }
  403. inline int comm_nthreads(int nthreads=0) {
  404. int res = communication_nthreads;
  405. if (nthreads > 0) {
  406. communication_nthreads = nthreads;
  407. }
  408. return res;
  409. }
  410. };
  411. // Set up the socket connections between the two computational parties
  412. // (P0 and P1) and the server party (P2). For each connection, the
  413. // lower-numbered party does the accept() and the higher-numbered party
  414. // does the connect().
  415. // Computational parties call this version with player=0 or 1
  416. void mpcio_setup_computational(unsigned player,
  417. boost::asio::io_context &io_context,
  418. const char *p0addr, // can be NULL when player=0
  419. int num_threads,
  420. std::deque<tcp::socket> &peersocks,
  421. std::deque<tcp::socket> &serversocks);
  422. // Server calls this version
  423. void mpcio_setup_server(boost::asio::io_context &io_context,
  424. const char *p0addr, const char *p1addr, int num_threads,
  425. std::deque<tcp::socket> &p0socks,
  426. std::deque<tcp::socket> &p1socks);
  427. #include "mpcio.tcc"
  428. #endif