mpcio.hpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. #ifndef __MCPIO_HPP__
  2. #define __MCPIO_HPP__
  3. #include <iostream>
  4. #include <fstream>
  5. #include <vector>
  6. #include <deque>
  7. #include <queue>
  8. #include <string>
  9. #include <atomic>
  10. #include <optional>
  11. #include <bsd/stdlib.h> // arc4random_buf
  12. #include <boost/asio.hpp>
  13. #include <boost/thread.hpp>
  14. #include "types.hpp"
  15. using boost::asio::ip::tcp;
  16. // Classes to represent stored precomputed data (e.g., multiplication triples)
  17. template<typename T>
  18. class PreCompStorage {
  19. public:
  20. PreCompStorage(unsigned player, bool preprocessing,
  21. const char *filenameprefix, unsigned thread_num);
  22. void get(T& nextval);
  23. inline size_t get_stats() { return count; }
  24. inline void reset_stats() { count = 0; }
  25. private:
  26. std::ifstream storage;
  27. size_t count;
  28. };
  29. template<typename T>
  30. PreCompStorage<T>::PreCompStorage(unsigned player, bool preprocessing,
  31. const char *filenameprefix, unsigned thread_num) {
  32. if (preprocessing) return;
  33. std::string filename(filenameprefix);
  34. char suffix[20];
  35. sprintf(suffix, ".p%d.t%u", player%10, thread_num);
  36. filename.append(suffix);
  37. storage.open(filename);
  38. if (storage.fail()) {
  39. std::cerr << "Failed to open " << filename << "\n";
  40. exit(1);
  41. }
  42. count = 0;
  43. }
  44. template<typename T>
  45. void PreCompStorage<T>::get(T& nextval) {
  46. storage.read((char *)&nextval, sizeof(T));
  47. if (storage.gcount() != sizeof(T)) {
  48. std::cerr << "Failed to read precomputed value from storage\n";
  49. exit(1);
  50. }
  51. ++count;
  52. }
  53. // If we want to send Lamport clocks in messages, define this. It adds
  54. // an 8-byte header to each message (length and Lamport clock), so it
  55. // has a small network cost. We always define and pass the Lamport
  56. // clock member of MPCIO to the IO functions for simplicity, but they're
  57. // ignored if this isn't defined
  58. #define SEND_LAMPORT_CLOCKS
  59. using lamport_t = uint32_t;
  60. using atomic_lamport_t = std::atomic<lamport_t>;
  61. using opt_lamport_t = std::optional<lamport_t>;
  62. #ifdef SEND_LAMPORT_CLOCKS
  63. struct MessageWithHeader {
  64. std::string header;
  65. std::string message;
  66. MessageWithHeader(std::string &&msg, lamport_t lamport) :
  67. message(std::move(msg)) {
  68. char hdr[sizeof(uint32_t) + sizeof(lamport_t)];
  69. uint32_t msglen = uint32_t(message.size());
  70. memmove(hdr, &msglen, sizeof(msglen));
  71. memmove(hdr+sizeof(msglen), &lamport, sizeof(lamport));
  72. header.assign(hdr, sizeof(hdr));
  73. }
  74. };
  75. #endif
  76. // A class to wrap a socket to another MPC party. This wrapping allows
  77. // us to do some useful logging, and perform async_writes transparently
  78. // to the application.
  79. class MPCSingleIO {
  80. tcp::socket sock;
  81. size_t totread, totwritten;
  82. #ifdef RECORD_IOTRACE
  83. std::vector<ssize_t> iotrace;
  84. #endif
  85. // To avoid blocking if both we and our peer are trying to send
  86. // something very large, and neither side is receiving, we will send
  87. // with async_write. But this has a number of implications:
  88. // - The data to be sent has to be copied into this MPCSingleIO,
  89. // since asio::buffer pointers are not guaranteed to remain valid
  90. // after the end of the coroutine that created them
  91. // - We have to keep a queue of messages to be sent, in case
  92. // coroutines call send() before the previous message has finished
  93. // being sent
  94. // - This queue may be accessed from the async_write thread as well
  95. // as the work thread that uses this MPCSingleIO directly (there
  96. // should be only one of the latter), so we need some locking
  97. // This is where we accumulate data passed in queue()
  98. std::string dataqueue;
  99. // When send() is called, the above dataqueue is appended to this
  100. // messagequeue, and the dataqueue is reset. If messagequeue was
  101. // empty before this append, launch async_write to write the first
  102. // thing in the messagequeue. When async_write completes, it will
  103. // delete the first thing in the messagequeue, and see if there are
  104. // any more elements. If so, it will start another async_write.
  105. // The invariant is that there is an async_write currently running
  106. // iff messagequeue is nonempty.
  107. #ifdef SEND_LAMPORT_CLOCKS
  108. std::queue<MessageWithHeader> messagequeue;
  109. // If a single message is broken into chunks in order to get the
  110. // first part of it out on the wire while the rest of it is still
  111. // being computed, we want the Lamport clock of all the chunks to be
  112. // that of when the message is first created. This value will be
  113. // nullopt when there has been no queue() since the last explicit
  114. // send() (as opposed to the implicit send() called by queue()
  115. // itself if it wants to get a chunk on its way), and will be set to
  116. // the current lamport clock when that first queue() after each
  117. // explicit send() happens.
  118. opt_lamport_t message_lamport;
  119. #else
  120. std::queue<std::string> messagequeue;
  121. #endif
  122. #ifdef SEND_LAMPORT_CLOCKS
  123. // If Lamport clocks are being sent, then the data stream is divided
  124. // into chunks, each with a header containing the length of the
  125. // chunk and the Lamport clock. So when we read, we'll read a whole
  126. // chunk, and store it here. Then calls to recv() will read pieces
  127. // of this buffer until it has all been read, and then read the next
  128. // header and chunk.
  129. std::string recvdata;
  130. size_t recvdataremain;
  131. #endif
  132. // Never touch the above messagequeue without holding this lock (you
  133. // _can_ touch the strings it contains, though, if you looked one up
  134. // while holding the lock).
  135. boost::mutex messagequeuelock;
  136. // Asynchronously send the first message from the message queue.
  137. // * The messagequeuelock must be held when this is called! *
  138. // This method may be called from either thread (the work thread or
  139. // the async_write handler thread).
  140. void async_send_from_msgqueue() {
  141. #ifdef SEND_LAMPORT_CLOCKS
  142. std::vector<boost::asio::const_buffer> tosend;
  143. tosend.push_back(boost::asio::buffer(messagequeue.front().header));
  144. tosend.push_back(boost::asio::buffer(messagequeue.front().message));
  145. #endif
  146. boost::asio::async_write(sock,
  147. #ifdef SEND_LAMPORT_CLOCKS
  148. tosend,
  149. #else
  150. boost::asio::buffer(messagequeue.front()),
  151. #endif
  152. [&](boost::system::error_code ec, std::size_t amt){
  153. messagequeuelock.lock();
  154. messagequeue.pop();
  155. if (messagequeue.size() > 0) {
  156. async_send_from_msgqueue();
  157. }
  158. messagequeuelock.unlock();
  159. });
  160. }
  161. public:
  162. MPCSingleIO(tcp::socket &&sock) :
  163. sock(std::move(sock)), totread(0), totwritten(0) {}
  164. void queue(const void *data, size_t len, lamport_t lamport) {
  165. dataqueue.append((const char *)data, len);
  166. #ifdef SEND_LAMPORT_CLOCKS
  167. // If this is the first queue() since the last explicit send(),
  168. // which we'll know because message_lamport will be nullopt, set
  169. // message_lamport to the current Lamport clock. Note that the
  170. // boolean test tests whether message_lamport is nullopt, not
  171. // whether its value is zero.
  172. if (!message_lamport) {
  173. message_lamport = lamport;
  174. }
  175. #endif
  176. // If we already have some full packets worth of data, may as
  177. // well send it.
  178. if (dataqueue.size() > 28800) {
  179. send(true);
  180. }
  181. }
  182. void send(bool implicit_send = false) {
  183. size_t thissize = dataqueue.size();
  184. // Ignore spurious calls to send(), except for resetting
  185. // message_lamport if this was an explicit send().
  186. if (thissize == 0) {
  187. #ifdef SEND_LAMPORT_CLOCKS
  188. // If this was an explicit send(), reset the message_lamport so
  189. // that it gets updated at the next queue().
  190. if (!implicit_send) {
  191. message_lamport.reset();
  192. }
  193. #endif
  194. return;
  195. }
  196. #ifdef RECORD_IOTRACE
  197. iotrace.push_back(thissize);
  198. #endif
  199. messagequeuelock.lock();
  200. // Move the current message to send into the message queue (this
  201. // moves a pointer to the data, not copying the data itself)
  202. #ifdef SEND_LAMPORT_CLOCKS
  203. messagequeue.emplace(std::move(dataqueue),
  204. message_lamport.value());
  205. // If this was an explicit send(), reset the message_lamport so
  206. // that it gets updated at the next queue().
  207. if (!implicit_send) {
  208. message_lamport.reset();
  209. }
  210. #else
  211. messagequeue.emplace(std::move(dataqueue));
  212. #endif
  213. // If this is now the first thing in the message queue, launch
  214. // an async_write to write it
  215. if (messagequeue.size() == 1) {
  216. async_send_from_msgqueue();
  217. }
  218. messagequeuelock.unlock();
  219. }
  220. size_t recv(void *data, size_t len, lamport_t &lamport) {
  221. #ifdef SEND_LAMPORT_CLOCKS
  222. char *cdata = (char *)data;
  223. size_t res = 0;
  224. while (len > 0) {
  225. while (recvdataremain == 0) {
  226. // Read a new header
  227. char hdr[sizeof(uint32_t) + sizeof(lamport_t)];
  228. uint32_t datalen;
  229. lamport_t recv_lamport;
  230. boost::asio::read(sock, boost::asio::buffer(hdr, sizeof(hdr)));
  231. memmove(&datalen, hdr, sizeof(datalen));
  232. memmove(&recv_lamport, hdr+sizeof(datalen), sizeof(lamport_t));
  233. lamport_t new_lamport = recv_lamport + 1;
  234. if (lamport < new_lamport) {
  235. lamport = new_lamport;
  236. }
  237. if (datalen > 0) {
  238. recvdata.resize(datalen, '\0');
  239. boost::asio::read(sock, boost::asio::buffer(recvdata));
  240. recvdataremain = datalen;
  241. }
  242. }
  243. size_t amttoread = len;
  244. if (amttoread > recvdataremain) {
  245. amttoread = recvdataremain;
  246. }
  247. memmove(cdata, recvdata.data()+recvdata.size()-recvdataremain,
  248. amttoread);
  249. cdata += amttoread;
  250. len -= amttoread;
  251. recvdataremain -= amttoread;
  252. res += amttoread;
  253. }
  254. return res;
  255. #else
  256. size_t res = boost::asio::read(sock, boost::asio::buffer(data, len));
  257. #ifdef RECORD_IOTRACE
  258. iotrace.push_back(-(ssize_t(res)));
  259. #endif
  260. return res;
  261. #endif
  262. }
  263. #ifdef RECORD_IOTRACE
  264. void dumptrace(std::ostream &os, const char *label = NULL) {
  265. if (label) {
  266. os << label << " ";
  267. }
  268. os << "IO trace:";
  269. for (auto& s: iotrace) {
  270. os << " " << s;
  271. }
  272. os << "\n";
  273. }
  274. void resettrace() {
  275. iotrace.clear();
  276. }
  277. #endif
  278. };
  279. // A base class to represent all of a computation peer or server's IO,
  280. // either to other parties or to local storage (the computation and
  281. // server cases are separate subclasses below).
  282. struct MPCIO {
  283. int player;
  284. bool preprocessing;
  285. atomic_lamport_t lamport;
  286. MPCIO(int player, bool preprocessing) :
  287. player(player), preprocessing(preprocessing), lamport(0) {}
  288. };
  289. // A class to represent all of a computation peer's IO, either to other
  290. // parties or to local storage
  291. struct MPCPeerIO : public MPCIO {
  292. // We use a deque here instead of a vector because you can't have a
  293. // vector of a type without a copy constructor (tcp::socket is the
  294. // culprit), but you can have a deque of those for some reason.
  295. std::deque<MPCSingleIO> peerios;
  296. std::deque<MPCSingleIO> serverios;
  297. std::vector<PreCompStorage<MultTriple>> triples;
  298. std::vector<PreCompStorage<HalfTriple>> halftriples;
  299. MPCPeerIO(unsigned player, bool preprocessing,
  300. std::deque<tcp::socket> &peersocks,
  301. std::deque<tcp::socket> &serversocks) :
  302. MPCIO(player, preprocessing)
  303. {
  304. unsigned num_threads = unsigned(peersocks.size());
  305. for (unsigned i=0; i<num_threads; ++i) {
  306. triples.emplace_back(player, preprocessing, "triples", i);
  307. }
  308. for (unsigned i=0; i<num_threads; ++i) {
  309. halftriples.emplace_back(player, preprocessing, "halves", i);
  310. }
  311. for (auto &&sock : peersocks) {
  312. peerios.emplace_back(std::move(sock));
  313. }
  314. for (auto &&sock : serversocks) {
  315. serverios.emplace_back(std::move(sock));
  316. }
  317. }
  318. void dump_precomp_stats(std::ostream &os)
  319. {
  320. for (size_t i=0; i<triples.size(); ++i) {
  321. if (i > 0) {
  322. os << " ";
  323. }
  324. os << "T" << i << " t:" << triples[i].get_stats() <<
  325. " h:" << halftriples[i].get_stats();
  326. }
  327. os << "\n";
  328. }
  329. void reset_precomp_stats()
  330. {
  331. for (size_t i=0; i<triples.size(); ++i) {
  332. triples[i].reset_stats();
  333. halftriples[i].reset_stats();
  334. }
  335. }
  336. };
  337. // A class to represent all of the server party's IO, either to
  338. // computational parties or to local storage
  339. struct MPCServerIO : public MPCIO {
  340. std::deque<MPCSingleIO> p0ios;
  341. std::deque<MPCSingleIO> p1ios;
  342. MPCServerIO(bool preprocessing,
  343. std::deque<tcp::socket> &p0socks,
  344. std::deque<tcp::socket> &p1socks) :
  345. MPCIO(2, preprocessing)
  346. {
  347. for (auto &&sock : p0socks) {
  348. p0ios.emplace_back(std::move(sock));
  349. }
  350. for (auto &&sock : p1socks) {
  351. p1ios.emplace_back(std::move(sock));
  352. }
  353. }
  354. };
  355. // A handle to one thread's sockets and streams in a MPCIO
  356. class MPCTIO {
  357. int thread_num;
  358. lamport_t thread_lamport;
  359. MPCIO &mpcio;
  360. public:
  361. MPCTIO(MPCIO &mpcio, int thread_num):
  362. thread_num(thread_num), thread_lamport(mpcio.lamport),
  363. mpcio(mpcio) {}
  364. // Sync our per-thread lamport clock with the master one in the
  365. // mpcio. You only need to call this explicitly if your MPCTIO
  366. // outlives your thread (in which case call it after the join), or
  367. // if your threads do interthread communication amongst themselves
  368. // (in which case call it in the sending thread before the send, and
  369. // call it in the receiving thread after the receive).
  370. void sync_lamport() {
  371. // Update the mpcio Lamport time to be max of the thread Lamport
  372. // time and what we thought it was before. We use this
  373. // compare_exchange construction in order to atomically
  374. // do the comparison, computation, and replacement
  375. lamport_t old_lamport = mpcio.lamport;
  376. lamport_t new_lamport = thread_lamport;
  377. do {
  378. if (new_lamport < old_lamport) {
  379. new_lamport = old_lamport;
  380. }
  381. // The next line atomically checks if lamport still has
  382. // the value old_lamport; if so, it changes its value to
  383. // new_lamport and returns true (ending the loop). If
  384. // not, it sets old_lamport to the current value of
  385. // lamport, and returns false (continuing the loop so
  386. // that new_lamport can be recomputed based on this new
  387. // value).
  388. } while (!mpcio.lamport.compare_exchange_weak(
  389. old_lamport, new_lamport));
  390. thread_lamport = new_lamport;
  391. }
  392. // The normal case, where the MPCIO is created inside the thread,
  393. // and so destructed when the thread ends, is handles automatically
  394. // here.
  395. ~MPCTIO() {
  396. sync_lamport();
  397. }
  398. // Queue up data to the peer or to the server
  399. void queue_peer(const void *data, size_t len) {
  400. if (mpcio.player < 2) {
  401. MPCPeerIO &mpcpio = static_cast<MPCPeerIO&>(mpcio);
  402. mpcpio.peerios[thread_num].queue(data, len, thread_lamport);
  403. }
  404. }
  405. void queue_server(const void *data, size_t len) {
  406. if (mpcio.player < 2) {
  407. MPCPeerIO &mpcpio = static_cast<MPCPeerIO&>(mpcio);
  408. mpcpio.serverios[thread_num].queue(data, len, thread_lamport);
  409. }
  410. }
  411. // Receive data from the peer or to the server
  412. size_t recv_peer(void *data, size_t len) {
  413. if (mpcio.player < 2) {
  414. MPCPeerIO &mpcpio = static_cast<MPCPeerIO&>(mpcio);
  415. return mpcpio.peerios[thread_num].recv(data, len, thread_lamport);
  416. }
  417. return 0;
  418. }
  419. size_t recv_server(void *data, size_t len) {
  420. if (mpcio.player < 2) {
  421. MPCPeerIO &mpcpio = static_cast<MPCPeerIO&>(mpcio);
  422. return mpcpio.serverios[thread_num].recv(data, len, thread_lamport);
  423. }
  424. return 0;
  425. }
  426. // Queue up data to p0 or p1
  427. void queue_p0(const void *data, size_t len) {
  428. if (mpcio.player == 2) {
  429. MPCServerIO &mpcsrvio = static_cast<MPCServerIO&>(mpcio);
  430. mpcsrvio.p0ios[thread_num].queue(data, len, thread_lamport);
  431. }
  432. }
  433. void queue_p1(const void *data, size_t len) {
  434. if (mpcio.player == 2) {
  435. MPCServerIO &mpcsrvio = static_cast<MPCServerIO&>(mpcio);
  436. mpcsrvio.p1ios[thread_num].queue(data, len, thread_lamport);
  437. }
  438. }
  439. // Receive data from p0 or p1
  440. size_t recv_p0(void *data, size_t len) {
  441. if (mpcio.player == 2) {
  442. MPCServerIO &mpcsrvio = static_cast<MPCServerIO&>(mpcio);
  443. return mpcsrvio.p0ios[thread_num].recv(data, len, thread_lamport);
  444. }
  445. return 0;
  446. }
  447. size_t recv_p1(void *data, size_t len) {
  448. if (mpcio.player == 2) {
  449. MPCServerIO &mpcsrvio = static_cast<MPCServerIO&>(mpcio);
  450. return mpcsrvio.p1ios[thread_num].recv(data, len, thread_lamport);
  451. }
  452. return 0;
  453. }
  454. // Send all queued data for this thread
  455. void send() {
  456. if (mpcio.player < 2) {
  457. MPCPeerIO &mpcpio = static_cast<MPCPeerIO&>(mpcio);
  458. mpcpio.peerios[thread_num].send();
  459. mpcpio.serverios[thread_num].send();
  460. } else {
  461. MPCServerIO &mpcsrvio = static_cast<MPCServerIO&>(mpcio);
  462. mpcsrvio.p0ios[thread_num].send();
  463. mpcsrvio.p1ios[thread_num].send();
  464. }
  465. }
  466. // Functions to get precomputed values. If we're in the online
  467. // phase, get them from PreCompStorage. If we're in the
  468. // preprocessing phase, read them from the server.
  469. MultTriple triple() {
  470. MultTriple val;
  471. if (mpcio.player < 2) {
  472. MPCPeerIO &mpcpio = static_cast<MPCPeerIO&>(mpcio);
  473. if (mpcpio.preprocessing) {
  474. recv_server(&val, sizeof(val));
  475. } else {
  476. mpcpio.triples[thread_num].get(val);
  477. }
  478. } else if (mpcio.preprocessing) {
  479. // Create triples (X0,Y0,Z0),(X1,Y1,Z1) such that
  480. // (X0*Y1 + Y0*X1) = (Z0+Z1)
  481. value_t X0, Y0, Z0, X1, Y1, Z1;
  482. arc4random_buf(&X0, sizeof(X0));
  483. arc4random_buf(&Y0, sizeof(Y0));
  484. arc4random_buf(&Z0, sizeof(Z0));
  485. arc4random_buf(&X1, sizeof(X1));
  486. arc4random_buf(&Y1, sizeof(Y1));
  487. Z1 = X0 * Y1 + X1 * Y0 - Z0;
  488. MultTriple T0, T1;
  489. T0 = std::make_tuple(X0, Y0, Z0);
  490. T1 = std::make_tuple(X1, Y1, Z1);
  491. queue_p0(&T0, sizeof(T0));
  492. queue_p1(&T1, sizeof(T1));
  493. }
  494. return val;
  495. }
  496. HalfTriple halftriple() {
  497. HalfTriple val;
  498. if (mpcio.player < 2) {
  499. MPCPeerIO &mpcpio = static_cast<MPCPeerIO&>(mpcio);
  500. if (mpcpio.preprocessing) {
  501. recv_server(&val, sizeof(val));
  502. } else {
  503. mpcpio.halftriples[thread_num].get(val);
  504. }
  505. } else if (mpcio.preprocessing) {
  506. // Create half-triples (X0,Z0),(Y1,Z1) such that
  507. // X0*Y1 = Z0 + Z1
  508. value_t X0, Z0, Y1, Z1;
  509. arc4random_buf(&X0, sizeof(X0));
  510. arc4random_buf(&Z0, sizeof(Z0));
  511. arc4random_buf(&Y1, sizeof(Y1));
  512. Z1 = X0 * Y1 - Z0;
  513. HalfTriple H0, H1;
  514. H0 = std::make_tuple(X0, Z0);
  515. H1 = std::make_tuple(Y1, Z1);
  516. queue_p0(&H0, sizeof(H0));
  517. queue_p1(&H1, sizeof(H1));
  518. }
  519. return val;
  520. }
  521. // Accessors
  522. inline int player() { return mpcio.player; }
  523. inline bool preprocessing() { return mpcio.preprocessing; }
  524. inline bool is_server() { return mpcio.player == 2; }
  525. };
  526. // Set up the socket connections between the two computational parties
  527. // (P0 and P1) and the server party (P2). For each connection, the
  528. // lower-numbered party does the accept() and the higher-numbered party
  529. // does the connect().
  530. // Computational parties call this version with player=0 or 1
  531. void mpcio_setup_computational(unsigned player,
  532. boost::asio::io_context &io_context,
  533. const char *p0addr, // can be NULL when player=0
  534. int num_threads,
  535. std::deque<tcp::socket> &peersocks,
  536. std::deque<tcp::socket> &serversocks);
  537. // Server calls this version
  538. void mpcio_setup_server(boost::asio::io_context &io_context,
  539. const char *p0addr, const char *p1addr, int num_threads,
  540. std::deque<tcp::socket> &p0socks,
  541. std::deque<tcp::socket> &p1socks);
  542. #endif