Browse Source

Trace and plot node-to-node messages

Ian Goldberg 1 year ago
parent
commit
9471a0b20e
3 changed files with 202 additions and 0 deletions
  1. 42 0
      App/net.cpp
  2. 7 0
      Makefile
  3. 153 0
      plot_traces

+ 42 - 0
App/net.cpp

@@ -115,6 +115,18 @@ void NodeIO::send_message_header(uint32_t tot_message_len)
     assert(chunksize_inflight == msgsize_inflight);
     msgsize_inflight = tot_message_len;
     chunksize_inflight = 0;
+#ifdef TRACE_SOCKIO
+    struct timeval now;
+    gettimeofday(&now, NULL);
+    printf("%lu.%06lu: RTE queueing %u bytes to %s\n", now.tv_sec,
+        now.tv_usec, msgsize_inflight,
+        g_netio->config().nodes[node_num].name.c_str());
+    if (msgsize_inflight == 0) {
+        printf("%lu.%06lu: RTE queued %u bytes to %s\n", now.tv_sec,
+            now.tv_usec, msgsize_inflight,
+            g_netio->config().nodes[node_num].name.c_str());
+    }
+#endif
 }
 
 bool NodeIO::send_chunk(uint8_t *data, uint32_t chunk_len)
@@ -124,6 +136,15 @@ bool NodeIO::send_chunk(uint8_t *data, uint32_t chunk_len)
     send_header_data(header, data, chunk_len);
     chunksize_inflight += chunk_len;
     assert(chunksize_inflight <= msgsize_inflight);
+#ifdef TRACE_SOCKIO
+    if (msgsize_inflight == chunksize_inflight) {
+        struct timeval now;
+        gettimeofday(&now, NULL);
+        printf("%lu.%06lu: RTE queued %u bytes to %s\n", now.tv_sec,
+            now.tv_usec, msgsize_inflight,
+            g_netio->config().nodes[node_num].name.c_str());
+    }
+#endif
     return (chunksize_inflight < msgsize_inflight);
 }
 
@@ -147,6 +168,18 @@ void NodeIO::recv_commands(
                 assert(recv_msgsize_inflight == recv_chunksize_inflight);
                 recv_msgsize_inflight = uint32_t(receive_header >> 8);
                 recv_chunksize_inflight = 0;
+#ifdef TRACE_SOCKIO
+                struct timeval now;
+                gettimeofday(&now, NULL);
+                printf("%lu.%06lu: RTE receiving %u bytes from %s\n", now.tv_sec,
+                    now.tv_usec, recv_msgsize_inflight,
+                    g_netio->config().nodes[node_num].name.c_str());
+                if (recv_msgsize_inflight == 0) {
+                    printf("%lu.%06lu: RTE received %u bytes from %s\n", now.tv_sec,
+                        now.tv_usec, recv_msgsize_inflight,
+                        g_netio->config().nodes[node_num].name.c_str());
+                }
+#endif
                 if (ecall_message(node_num, recv_msgsize_inflight)) {
                     recv_commands(error_cb, epoch_cb);
                 } else {
@@ -165,6 +198,15 @@ void NodeIO::recv_commands(
                             error_cb(ecc);
                             return;
                         }
+#ifdef TRACE_SOCKIO
+                    if (recv_msgsize_inflight == recv_chunksize_inflight) {
+                        struct timeval now;
+                        gettimeofday(&now, NULL);
+                        printf("%lu.%06lu: RTE received %u bytes from %s\n", now.tv_sec,
+                            now.tv_usec, recv_msgsize_inflight,
+                            g_netio->config().nodes[node_num].name.c_str());
+                    }
+#endif
                         if (ecall_chunk(node_num, receive_frame,
                                 this_chunk_size)) {
                             recv_commands(error_cb, epoch_cb);

+ 7 - 0
Makefile

@@ -31,6 +31,9 @@
 #
 #
 
+# Turn this on to output log lines when messages are sent and received
+TRACE_SOCKIO ?= 0
+
 ######## SGX SDK Settings ########
 
 SGX_SDK ?= /opt/intel/sgxsdk
@@ -68,6 +71,10 @@ else
         SGX_COMMON_FLAGS += -O3
 endif
 
+ifeq ($(TRACE_SOCKIO), 1)
+	SGX_COMMON_FLAGS += -DTRACE_SOCKIO
+endif
+
 SGX_COMMON_FLAGS += -Wall -Wextra -Winit-self -Wpointer-arith -Wreturn-type \
                     -Waddress -Wsequence-point -Wformat-security \
                     -Wmissing-include-dirs -Wfloat-equal -Wundef -Wshadow \

+ 153 - 0
plot_traces

@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+
+# Plot the network traces output when TRACE_SOCKIO is set to 1
+# ("make TRACE_SOCKIO=1" to build in this configuration)
+
+# Args: log_directory
+
+# Outputs trace.tex (and the files pdflatex builds, including the target
+# trace.pdf) into the log directory
+
+import glob
+import os
+import re
+import sys
+
+if len(sys.argv) != 2:
+    print(f"Usage: {sys.argv[0]} log_directory", file=sys.stderr)
+    sys.exit(1)
+
+# This will throw an exception if the directory does not exist
+os.chdir(sys.argv[1])
+
+nodelogs = glob.glob("s*.log")
+nodelogs.sort(key=lambda filename : int(filename[1:-4]))
+
+# Pass 1: For each sender and receiver, make a list of each message
+# queued from that sender to receiver, noting its queue start time,
+# queue end time, size, and type
+
+queued_messages = {}
+min_ts = None
+
+for nodelog in nodelogs:
+    node = nodelog[:-4]
+    with open(nodelog) as logf:
+        queueing = {}
+        for logline in logf:
+            logline = logline.rstrip()
+            if "RTE" in logline:
+                if "queueing" in logline:
+                    matches = re.match(
+                        r'(\d+\.\d+): RTE queueing (\d+) bytes to (\S+)',
+                        logline)
+                    [ts, size, recv] = matches.groups()
+                    assert(recv not in queueing)
+                    tsf = float(ts)
+                    queueing[recv] = \
+                        { 'queue_start': tsf, 'size': size }
+                    if min_ts is None or min_ts > tsf:
+                        min_ts = tsf
+                if "queued" in logline:
+                    matches = re.match(
+                        r'(\d+\.\d+): RTE queued (\d+) bytes to (\S+)',
+                        logline)
+                    [ts, size, recv] = matches.groups()
+                    assert(recv in queueing)
+                    assert(queueing[recv]['size'] == size)
+                    if (node, recv) not in queued_messages:
+                        queued_messages[(node, recv)] = []
+                    msg = {
+                        'queue_start': queueing[recv]['queue_start'],
+                        'queue_end': float(ts),
+                        'size': size,
+                        'type': 'RTE',
+                    }
+                    queued_messages[(node, recv)].append(msg)
+                    del queueing[recv]
+
+# Pass 2: For each sender and receiver, note the receive start time and
+# receive end time for each message in the queued_messages list
+
+messages = {}
+
+max_ts = None
+
+for nodelog in nodelogs:
+    node = nodelog[:-4]
+    with open(nodelog) as logf:
+        receiving = {}
+        for logline in logf:
+            logline = logline.rstrip()
+            if "RTE" in logline:
+                if "receiving" in logline:
+                    matches = re.match(
+                        r'(\d+\.\d+): RTE receiving (\d+) bytes from (\S+)',
+                        logline)
+                    [ts, size, snd] = matches.groups()
+                    assert(snd not in receiving)
+                    receiving[snd] = \
+                        { 'recv_start': float(ts), 'size': size }
+                if "received" in logline:
+                    matches = re.match(
+                        r'(\d+\.\d+): RTE received (\d+) bytes from (\S+)',
+                        logline)
+                    [ts, size, snd] = matches.groups()
+                    assert(snd in receiving)
+                    assert(receiving[snd]['size'] == size)
+                    assert(queued_messages[(snd, node)][0]['size'] == size)
+                    tsf = float(ts)
+                    if (snd, node) not in messages:
+                        messages[(snd, node)] = []
+                    msg = queued_messages[(snd, node)].pop(0)
+                    msg['recv_start'] = receiving[snd]['recv_start']
+                    msg['recv_end'] = tsf
+                    if max_ts is None or max_ts < tsf:
+                        max_ts = tsf
+                    messages[(snd, node)].append(msg)
+                    del receiving[snd]
+
+# Write a latex file that draws the messages
+
+# Timescale (cm per second)
+timescale = 2
+
+# Nodescale (cm between nodes)
+nodescale = 1
+
+with open("trace.tex", "w") as tf:
+    print(r'''\documentclass{article}
+\usepackage[paperwidth=%fcm,paperheight=%fcm,margin=1cm]{geometry}
+\usepackage{tikz}
+\setlength\parindent{0pt}
+\pagestyle{empty}
+\begin{document}
+\begin{tikzpicture}''' % (((max_ts-min_ts)*timescale)+2.5,
+        (len(nodelogs)+1)*nodescale+2.5), file=tf)
+
+    nodenum = 0
+    nodepos = {}
+    for nodelog in nodelogs:
+        node = nodelog[:-4]
+        nodenum += 1
+        nodepos[node] = -nodenum * nodescale
+        print(r'''\node [anchor=east] at (0,%f) { %s };
+\draw[thick] (0,%f) -- ++(%f,0);''' %
+            (nodepos[node], node, nodepos[node],
+            (max_ts-min_ts)*timescale), file=tf)
+
+    for (snd,recv) in messages:
+        for msg in messages[(snd,recv)]:
+            print(msg)
+            print(r'''\fill [fill=%s,fill opacity=.2] (%f,%f) -- (%f,%f) -- (%f,%f) -- (%f,%f) -- cycle;''' %
+                ('black',
+                (msg['queue_start']-min_ts)*timescale, nodepos[snd],
+                (msg['queue_end']-min_ts)*timescale, nodepos[snd],
+                (msg['recv_end']-min_ts)*timescale, nodepos[recv],
+                (msg['recv_start']-min_ts)*timescale, nodepos[recv]),
+                file=tf)
+
+    print(r'''\end{tikzpicture}
+\end{document}''', file=tf)
+
+os.system("pdflatex trace")