Browse Source

Nodes can send each other data asynchronously

Ian Goldberg 1 year ago
parent
commit
1d77726a88
4 changed files with 257 additions and 8 deletions
  1. 129 3
      App/net.cpp
  2. 99 3
      App/net.hpp
  3. 26 2
      App/start.cpp
  4. 3 0
      App/teems.cpp

+ 129 - 3
App/net.cpp

@@ -3,11 +3,137 @@
 #include "config.hpp"
 #include "net.hpp"
 
+NodeIO::NodeIO(tcp::socket &&socket) : sock(std::move(socket))
+{
+}
+
+uint8_t *NodeIO::request_frame()
+{
+    if (frames_available.empty()) {
+        // Allocate a new frame.  Note that this memory will (at this
+        // time) never get deallocated.  In theory, we could deallocate
+        // it in return_frame, but if a certain number of frames were
+        // allocated here, it means we had that much data in flight
+        // (queued but not accepted for sending by the OS), and we're
+        // likely to need that much again.  Subsequent messages will
+        // _reuse_ the allocated data, though, so the used memory won't
+        // grow forever, and will be limited to the amount of in-flight
+        // data needed.
+        return new uint8_t[MAXCHUNKSIZE];
+    }
+    // Copy the pointer to the frame out of the deque and remove it from
+    // the deque.  Note this is _not_ taking the address of the element
+    // *in* the deque (and then popping it, which would invalidate that
+    // pointer).
+    frame_deque_lock.lock();
+    uint8_t *frame = frames_available.back();
+    frames_available.pop_back();
+    frame_deque_lock.unlock();
+    return frame;
+}
+
+void NodeIO::return_frame(uint8_t *frame)
+{
+    if (!frame) return;
+
+    // We push the frame back on to the end of the deque so that it will
+    // be the next one used.  This may lead to better cache behaviour?
+    frame_deque_lock.lock();
+    frames_available.push_back(frame);
+    frame_deque_lock.unlock();
+}
+
+void NodeIO::send_header_data(uint64_t header, uint8_t *data, size_t len)
+{
+    std::vector<boost::asio::const_buffer> tosend;
+
+    // Put the header into the deque so it's in memory at a stable
+    // address during the async write
+    header_deque_lock.lock();
+    headers_inflight.push_back(header);
+
+    uint64_t *headerp = &(headers_inflight.back());
+    header_deque_lock.unlock();
+    tosend.push_back(boost::asio::buffer(headerp, 5));
+    if (data != NULL && len > 0) {
+        tosend.push_back(boost::asio::buffer(data, len));
+    }
+    boost::asio::async_write(sock, tosend,
+        [this, headerp, data](boost::system::error_code, std::size_t){
+            // When the write completes, pop the header from the deque
+            // (which should now be in the front)
+            header_deque_lock.lock();
+            assert(!headers_inflight.empty() &&
+                &(headers_inflight.front()) == headerp);
+            headers_inflight.pop_front();
+            header_deque_lock.unlock();
+            // And return the frame
+            return_frame(data);
+        });
+}
+
+void NodeIO::send_epoch(uint32_t epoch_num)
+{
+    uint64_t header = (uint64_t(epoch_num) << 8) + 0x00;
+    send_header_data(header, NULL, 0);
+}
+
+void NodeIO::send_message_header(uint32_t tot_message_len)
+{
+    uint64_t header = (uint64_t(tot_message_len) << 8) + 0x01;
+    send_header_data(header, NULL, 0);
+    // If we're sending a new message header, we have to have finished
+    // sending the previous message.
+    assert(chunksize_inflight == msgsize_inflight);
+    msgsize_inflight = tot_message_len;
+    chunksize_inflight = 0;
+}
+
+void NodeIO::send_chunk(uint8_t *data, uint32_t chunk_len)
+{
+    assert(chunk_len <= MAXCHUNKSIZE);
+    uint64_t header = (uint64_t(chunk_len) << 8) + 0x02;
+    send_header_data(header, data, chunk_len);
+    chunksize_inflight += chunk_len;
+    assert(chunksize_inflight <= msgsize_inflight);
+}
+
+bool NodeIO::recv_header(uint64_t &header)
+{
+    header = 0;
+    try {
+        boost::asio::read(sock, boost::asio::buffer(&header, 5));
+    } catch (...) {
+        return false;
+    }
+    return true;
+}
+
+bool NodeIO::recv_chunk(uint64_t header, uint8_t *&data, size_t &len)
+{
+    len = 0;
+    data = NULL;
+    assert((header & 0xff) == 0x02);
+    size_t datalen = header >> 8;
+    if (datalen > MAXCHUNKSIZE) {
+        return false;
+    }
+    try {
+        boost::asio::read(sock,
+            boost::asio::buffer(receive_frame, datalen));
+    } catch (...) {
+        return false;
+    }
+    data = receive_frame;
+    len = datalen;
+    return true;
+}
+
 NetIO::NetIO(boost::asio::io_context &io_context, const Config &config)
     : conf(config), myconf(config.nodes[config.my_node_num])
 {
     num_nodes = conf.nodes.size();
-    nodesockets.resize(num_nodes);
+    nodeios.resize(num_nodes);
     me = conf.my_node_num;
 
     // Node number n will accept connections from nodes 0, ..., n-1 and
@@ -38,7 +164,7 @@ NetIO::NetIO(boost::asio::io_context &io_context, const Config &config)
         if (node_num >= num_nodes) {
             std::cerr << "Received bad node number\n";
         } else {
-            nodesockets[node_num] = std::move(nodesock);
+            nodeios[node_num].emplace(std::move(nodesock));
 #ifdef VERBOSE_NET
             std::cerr << "Received connection from " <<
                 config.nodes[node_num].name << "\n";
@@ -66,7 +192,7 @@ NetIO::NetIO(boost::asio::io_context &io_context, const Config &config)
         unsigned short node_num = (unsigned short)me;
         boost::asio::write(nodesock,
             boost::asio::buffer(&node_num, sizeof(node_num)));
-        nodesockets[i] = std::move(nodesock);
+        nodeios[i].emplace(std::move(nodesock));
 #ifdef VERBOSE_NET
         std::cerr << "Connected to " << config.nodes[i].name << "\n";
 #endif

+ 99 - 3
App/net.hpp

@@ -2,22 +2,118 @@
 #define __NET_HPP__
 
 #include <vector>
+#include <deque>
 #include <optional>
 #include <boost/asio.hpp>
+#include <boost/thread.hpp>
 
 #include "config.hpp"
 
+#define MAXCHUNKSIZE (65536+16)
+
+// The inter-node (untrusted node to untrusted node) communication
+// protocol is as follows.  Nodes are numbered 0 through num_nodes-1.
+// At startup time, each pair of nodes establishes a TCP connection by
+// having the lower-numbered node connect to the higher-numbered node,
+// and send a two-byte value of its (the sender's) node number.  Once
+// all the connections are established, commands consist of a 5-byte
+// header, followed optionally by some data.  The commands are listed
+// below.  If a socket closes, we interpret that to mean the experiment
+// is over, and the node shuts down (which will close its own sockets,
+// its peers will shut down, etc.).  [This isn't the best idea for a
+// robust long-lived deployment, of course.]
+//
+// The commands are:
+//
+// EPOCH: 0x00 + 4-byte epoch number (little-endian)
+//
+// This command is sent by the leader (typically node 0) to each other
+// node at the start of each epoch.
+//
+// MESSAGE: 0x01 + 4-byte total message length (little-endian)
+//
+// This command says that a number of CHUNKs comprising a single
+// enclave-to-enclave message will follow, whose total size will be the
+// given value.  Note that the data itself is sent following a CHUNK
+// header, not a MESSAGE header, even if it's small.
+//
+// CHUNK: 0x02 + 4-byte chunk length (little-endian)
+// + that many bytes of data
+//
+// This command transmits the enclave-to-enclave data.  The data in the
+// chunk will be (after the enclace-to-enclave handshake, anyway)
+// AES-GCM encrypted to a key known to the receiving enclave (but not
+// the receiving untrusted node).  The chunk number (starting from 0 and
+// not reset between messages) will be the IV, which is not transmitted.
+// The 16-byte GCM tag will be the last 16 bytes of the chunk (and
+// included in the length in the chunk header).  The sum of the chunk
+// lengths since the last MESSAGE command may not exceed the length in
+// that MESSAGE command.
+
+// Data for chunks are stored in frames.  The frames are pre-allocated
+// to be MAXCHUNKSIZE bytes each, and reused as much as possible by the
+// NodeIO class.  A node will request a frame from the NodeIO, which
+// will return a pointer.  The node will pass that pointer to the
+// enclave, which will write data into it, and also return to the node
+// how much data it wrote.  The node will async_write the chunk header
+// and the chunk data.  The async write completion handler will return
+// the frame to the NodeIO when the write completes.
+//
+// Headers are stored as the low 5 bytes of a uint64_t.  Note that means
+// for headers containing sizes, the value of this uint64_t will be (for
+// example for the CHUNK header) (chunk_len << 8) + 0x02.
+
 using boost::asio::ip::tcp;
 
+class NodeIO {
+    tcp::socket sock;
+    std::deque<uint64_t> headers_inflight;
+    std::deque<uint8_t *> frames_available;
+    // The frames and headers are used and returned by different
+    // threads, so we protect them with a mutex each
+    boost::mutex frame_deque_lock, header_deque_lock;
+
+    // The claimed size of the message currently being sent in chunks
+    uint32_t msgsize_inflight;
+    // The total size of the chunks so far we've sent for this message
+    uint32_t chunksize_inflight;
+
+    // The static frame used to _receive_ data
+    uint8_t receive_frame[MAXCHUNKSIZE];
+
+    void send_header_data(uint64_t header, uint8_t *data, size_t len);
+
+public:
+    NodeIO(tcp::socket &&socket);
+
+    uint8_t *request_frame();
+    void return_frame(uint8_t* frame);
+
+    void send_epoch(uint32_t epoch_num);
+    void send_message_header(uint32_t tot_message_len);
+    void send_chunk(uint8_t *data, uint32_t chunk_len);
+
+    // These functions return true for success, false for failure
+    bool recv_header(uint64_t &header);
+    // This function puts the received data into a _static_ frame that's
+    // only used for receiving.  Be sure to do whatever you need to do
+    // with the contents (typically, pass it to the enclave) before
+    // calling this function again.  Pass *in* the header you got from
+    // recv_header.
+    bool recv_chunk(uint64_t header, uint8_t *&data, size_t &len);
+};
+
 class NetIO {
     const Config &conf;
     const NodeConfig &myconf;
-    size_t num_nodes;
-    size_t me;
-    std::vector<std::optional<tcp::socket>> nodesockets;
+    std::deque<std::optional<NodeIO>> nodeios;
 
 public:
     NetIO(boost::asio::io_context &io_context, const Config &config);
+
+    size_t num_nodes;
+    size_t me;
+    NodeIO &node(size_t node_num) { return nodeios[node_num].value(); }
 };
 
 #endif

+ 26 - 2
App/start.cpp

@@ -1,9 +1,33 @@
+#include <stdlib.h>
+
 #include "start.hpp"
 
 // Once all the networking is set up, start doing whatever we were asked
 // to do on the command line
 void start(NetIO &netio, int argc, char **argv)
 {
-    // Nothing yet
-}
+    srand48(1);
+    // Send a bunch of data to all peers
+    for (size_t node_num = 0; node_num < netio.num_nodes; ++node_num) {
+        if (node_num == netio.me) continue;
+        NodeIO &node = netio.node(node_num);
+        uint32_t msgsize = lrand48() % 10000000;
+        printf("Msgsize to %lu: %u\n", node_num, msgsize);
+        node.send_message_header(msgsize);
 
+        uint8_t c = 0;
+        while (msgsize > 0) {
+            uint8_t* frame = node.request_frame();
+            uint32_t chunk_size = (lrand48() % (MAXCHUNKSIZE-1)) + 1;
+            if (chunk_size > msgsize) {
+                chunk_size = msgsize;
+            }
+            memset(frame, ++c, chunk_size);
+            node.send_chunk(frame, chunk_size);
+            msgsize -= chunk_size;
+        }
+    }
+
+    printf("Sleeping\n");
+    sleep(10);
+}

+ 3 - 0
App/teems.cpp

@@ -141,6 +141,9 @@ static void usage(const char *argv0)
 
 int main(int argc, char **argv)
 {
+    // Unbuffer stdout
+    setbuf(stdout, NULL);
+
     if (initialize_enclave() < 0) {
         return -1;
     }