Browse Source

Add analysis and plotting scripts

Ian Goldberg 4 years ago
parent
commit
e3e184a106
5 changed files with 795 additions and 0 deletions
  1. 18 0
      analysis/README
  2. 191 0
      analysis/analytical.py
  3. 233 0
      analysis/bytecounts.py
  4. 253 0
      analysis/parselogs.py
  5. 100 0
      analysis/plotdats.py

+ 18 - 0
analysis/README

@@ -0,0 +1,18 @@
+The bytecounts.py script produces the formulas that are coded into the
+analytical.py script.
+
+The analytical.py script produces the formulas that are coded into the
+plotdats.py script.
+
+You shouldn't have to touch either of those, unless the simulator itself
+changes.
+
+If you're just plotting the output of simulator logfiles, just do:
+
+$ ./parselogs.py ../path/to/*.log
+
+(the above will generate 5 .dat files)
+
+$ ./plotdats.py
+
+(the above will generate a bunch of .pdf graphs)

+ 191 - 0
analysis/analytical.py

@@ -0,0 +1,191 @@
+#!/usr/bin/env python3
+
+# Compute analytical formulas for the bytes used per epoch by the
+# various modes
+
+import sympy
+
+A, R_B, R_N, R, logR, C_B, C_N, C, gamma, circ, P_Delta, \
+DirAuthConsensusMsg, DirAuthENDIVEDiffMsg, DirAuthGetConsensusMsg, \
+DirAuthGetENDIVEDiffMsg, DirAuthUploadDescMsg, DirAuthENDIVEMsg, \
+DirAuthGetENDIVEMsg, DirAuthUploadDescMsg, RelayConsensusMsg, \
+RelayDescMsg, RelayGetConsensusMsg, RelayGetDescMsg, \
+SinglePassCreateCircuitMsgNotLast, SinglePassCreateCircuitMsgLast, \
+SinglePassCreatedCircuitCellLast, SinglePassCreatedCircuitCellMiddle, \
+SinglePassCreatedCircuitCellFirst, TelescopingCreateCircuitMsg, \
+TelescopingCreatedCircuitCell, TelescopingExtendCircuitCell, \
+TelescopingExtendedCircuitCell, VanillaCreateCircuitMsg, \
+VanillaCreatedCircuitCell, VanillaExtendCircuitCell, \
+VanillaExtendedCircuitCell, DirAuthGetConsensusDiffMsg, \
+DirAuthConsensusDiffMsg, RelayGetConsensusDiffMsg, \
+RelayConsensusDiffMsg \
+= sympy.symbols("""
+A, R_B, R_N, R, logR, C_B, C_N, C, gamma, circ, P_Delta,
+DirAuthConsensusMsg, DirAuthENDIVEDiffMsg, DirAuthGetConsensusMsg,
+DirAuthGetENDIVEDiffMsg, DirAuthUploadDescMsg, DirAuthENDIVEMsg,
+DirAuthGetENDIVEMsg, DirAuthUploadDescMsg, RelayConsensusMsg,
+RelayDescMsg, RelayGetConsensusMsg, RelayGetDescMsg,
+SinglePassCreateCircuitMsgNotLast, SinglePassCreateCircuitMsgLast,
+SinglePassCreatedCircuitCellLast, SinglePassCreatedCircuitCellMiddle,
+SinglePassCreatedCircuitCellFirst, TelescopingCreateCircuitMsg,
+TelescopingCreatedCircuitCell, TelescopingExtendCircuitCell,
+TelescopingExtendedCircuitCell, VanillaCreateCircuitMsg,
+VanillaCreatedCircuitCell, VanillaExtendCircuitCell,
+VanillaExtendedCircuitCell, DirAuthGetConsensusDiffMsg,
+DirAuthConsensusDiffMsg, RelayGetConsensusDiffMsg,
+RelayConsensusDiffMsg
+""")
+
+globalsubs = [
+    (A , 9),
+    (R_N , R - R_B),
+    (R_B , 0.010 * R),
+    (C_N , C - C_B),
+    (C_B , 0.16 * C),
+    (circ , gamma * C),
+    (gamma , 8.9),
+    (C , 2500000*R/6500),
+    (P_Delta, 0.019),
+]
+
+# The actual sizes in bytes of each message type were logged by
+# uncommenting this line in network.py:
+    # logging.info("%s size %d", type(self).__name__, sz)
+
+singlepass_merkle_subs = [
+    (DirAuthConsensusMsg, 877),
+    (DirAuthGetConsensusMsg, 41),
+    (DirAuthGetENDIVEMsg, 38),
+    (DirAuthGetENDIVEDiffMsg, 42),
+    (DirAuthENDIVEDiffMsg, (P_Delta * DirAuthENDIVEMsg).subs(globalsubs)),
+    (DirAuthENDIVEMsg, 274 * R),
+    (DirAuthUploadDescMsg, 425),
+    (RelayConsensusMsg, 873),
+    (RelayDescMsg, 415),
+    (RelayGetConsensusMsg, 37),
+    (RelayGetDescMsg, 32),
+    (SinglePassCreateCircuitMsgLast, 187),
+    (SinglePassCreateCircuitMsgNotLast, 239),
+    (SinglePassCreatedCircuitCellFirst, 1426+82*logR),
+    (SinglePassCreatedCircuitCellMiddle, 903+41*logR),
+    (SinglePassCreatedCircuitCellLast, 190),
+]
+
+singlepass_threshsig_subs = [
+    (DirAuthConsensusMsg, 789),
+    (DirAuthGetConsensusMsg, 41),
+    (DirAuthGetENDIVEMsg, 38),
+    (DirAuthGetENDIVEDiffMsg, 42),
+    (DirAuthENDIVEDiffMsg, DirAuthENDIVEMsg),
+    (DirAuthENDIVEMsg, 348*R),
+    (DirAuthUploadDescMsg, 425),
+    (RelayConsensusMsg, 784),
+    (RelayDescMsg, 415),
+    (RelayGetConsensusMsg, 37),
+    (RelayGetDescMsg, 32),
+    (SinglePassCreateCircuitMsgLast, 187),
+    (SinglePassCreateCircuitMsgNotLast, 239),
+    (SinglePassCreatedCircuitCellFirst, 1554),
+    (SinglePassCreatedCircuitCellMiddle, 969),
+    (SinglePassCreatedCircuitCellLast, 190),
+]
+
+telescoping_merkle_subs = [
+    (DirAuthConsensusMsg, 877),
+    (DirAuthGetConsensusMsg, 41),
+    (DirAuthGetENDIVEMsg, 38),
+    (DirAuthGetENDIVEDiffMsg, 42),
+    (DirAuthENDIVEDiffMsg, (P_Delta * DirAuthENDIVEMsg).subs(globalsubs)),
+    (DirAuthENDIVEMsg, 234 * R),
+    (DirAuthUploadDescMsg, 372),
+    (RelayConsensusMsg, 873),
+    (RelayGetConsensusMsg, 37),
+    (RelayGetDescMsg, 32),
+    (RelayDescMsg, 362),
+    (TelescopingCreateCircuitMsg, 120),
+    (TelescopingCreatedCircuitCell, 179),
+    (TelescopingExtendCircuitCell, 122),
+    (TelescopingExtendedCircuitCell, 493+41*logR),
+]
+
+telescoping_threshsig_subs = [
+    (DirAuthConsensusMsg, 789),
+    (DirAuthGetConsensusMsg, 41),
+    (DirAuthGetENDIVEMsg, 38),
+    (DirAuthGetENDIVEDiffMsg, 42),
+    (DirAuthENDIVEDiffMsg, DirAuthENDIVEMsg),
+    (DirAuthENDIVEMsg, 307*R),
+    (DirAuthUploadDescMsg, 372),
+    (RelayConsensusMsg, 788),
+    (RelayGetConsensusMsg, 37),
+    (RelayGetDescMsg, 32),
+    (RelayDescMsg, 362),
+    (TelescopingCreateCircuitMsg, 120),
+    (TelescopingCreatedCircuitCell, 179),
+    (TelescopingExtendCircuitCell, 122),
+    (TelescopingExtendedCircuitCell, 556),
+]
+
+vanilla_subs = [
+    (DirAuthConsensusDiffMsg, (P_Delta * DirAuthConsensusMsg).subs(globalsubs)),
+    (DirAuthConsensusMsg, RelayConsensusMsg),
+    (DirAuthGetConsensusDiffMsg, 45),
+    (DirAuthGetConsensusMsg, 41),
+    (DirAuthUploadDescMsg, 372),
+    (RelayConsensusDiffMsg, (P_Delta * RelayConsensusMsg).subs(globalsubs)),
+    (RelayConsensusMsg, 219*R),
+    (RelayGetConsensusDiffMsg, 41),
+    (RelayGetConsensusMsg, 37),
+    (VanillaCreateCircuitMsg, 116),
+    (VanillaCreatedCircuitCell, 175),
+    (VanillaExtendCircuitCell, 157),
+    (VanillaExtendedCircuitCell, 176),
+]
+
+# The formulas were output by bytecounts.py
+
+singlepass_totrelay = \
+  R_N * ( DirAuthConsensusMsg + DirAuthENDIVEDiffMsg + DirAuthGetConsensusMsg + DirAuthGetENDIVEDiffMsg + A*DirAuthUploadDescMsg ) \
++ R_B * ( DirAuthConsensusMsg + DirAuthENDIVEMsg + DirAuthGetConsensusMsg + DirAuthGetENDIVEMsg + A*DirAuthUploadDescMsg ) \
++ C   * ( RelayConsensusMsg + RelayDescMsg + RelayGetConsensusMsg + RelayGetDescMsg ) \
++ circ * ( 3*SinglePassCreateCircuitMsgNotLast + 2*SinglePassCreateCircuitMsgLast + 2*SinglePassCreatedCircuitCellLast + 2*SinglePassCreatedCircuitCellMiddle + SinglePassCreatedCircuitCellFirst + 20 )
+
+singlepass_totclient = \
+  C * ( RelayConsensusMsg + RelayDescMsg + RelayGetConsensusMsg + RelayGetDescMsg ) \
++ circ * ( SinglePassCreateCircuitMsgNotLast + SinglePassCreatedCircuitCellFirst + 4 )
+
+telescoping_totrelay = \
+  R_N * ( DirAuthConsensusMsg + DirAuthENDIVEDiffMsg + DirAuthGetConsensusMsg + DirAuthGetENDIVEDiffMsg + A*DirAuthUploadDescMsg ) \
++ R_B * ( DirAuthConsensusMsg + DirAuthENDIVEMsg + DirAuthGetConsensusMsg + DirAuthGetENDIVEMsg + A*DirAuthUploadDescMsg ) \
++ C * ( RelayConsensusMsg + RelayDescMsg + RelayGetConsensusMsg + RelayGetDescMsg ) \
++ circ * ( 5*TelescopingCreateCircuitMsg + 5*TelescopingCreatedCircuitCell + 4*TelescopingExtendCircuitCell + 4*TelescopingExtendedCircuitCell + 52 )
+
+telescoping_totclient = \
+  C * ( RelayConsensusMsg + RelayDescMsg + RelayGetConsensusMsg + RelayGetDescMsg ) \
++ circ * ( TelescopingCreateCircuitMsg + TelescopingCreatedCircuitCell + 2*TelescopingExtendCircuitCell + 2*TelescopingExtendedCircuitCell + 20 )
+
+vanilla_totrelay = \
+  R_N * ( DirAuthConsensusDiffMsg + DirAuthGetConsensusDiffMsg + A*DirAuthUploadDescMsg ) \
++ R_B * ( DirAuthConsensusMsg + DirAuthGetConsensusMsg + A*DirAuthUploadDescMsg ) \
++ C_N * ( RelayConsensusDiffMsg + RelayGetConsensusDiffMsg ) \
++ C_B * ( RelayConsensusMsg + RelayGetConsensusMsg ) \
++ circ * ( 5*VanillaCreateCircuitMsg + 5*VanillaCreatedCircuitCell + 4*VanillaExtendCircuitCell + 4*VanillaExtendedCircuitCell + 52 )
+
+vanilla_totclient = \
+  C_N * ( RelayConsensusDiffMsg + RelayGetConsensusDiffMsg ) \
++ C_B * ( RelayConsensusMsg + RelayGetConsensusMsg ) \
++ circ * ( VanillaCreateCircuitMsg + VanillaCreatedCircuitCell + 2*VanillaExtendCircuitCell + 2*VanillaExtendedCircuitCell + 20 )
+
+# Copy the output into plotdats.py, replacing 'R' by 'x' and 'logR' by
+# 'cail(log(x)/log(2))'
+
+print('singlepass_merkle_relay =', (singlepass_totrelay/C).subs(globalsubs).subs(singlepass_merkle_subs).simplify())
+print('singlepass_merkle_client =', (singlepass_totclient/C).subs(globalsubs).subs(singlepass_merkle_subs).simplify())
+print('singlepass_threshsig_relay =', (singlepass_totrelay/C).subs(globalsubs).subs(singlepass_threshsig_subs).simplify())
+print('singlepass_threshsig_client =', (singlepass_totclient/C).subs(globalsubs).subs(singlepass_threshsig_subs).simplify())
+print('telescoping_merkle_relay =', (telescoping_totrelay/C).subs(globalsubs).subs(telescoping_merkle_subs).simplify())
+print('telescoping_merkle_client =', (telescoping_totclient/C).subs(globalsubs).subs(telescoping_merkle_subs).simplify())
+print('telescoping_threshsig_relay =', (telescoping_totrelay/C).subs(globalsubs).subs(telescoping_threshsig_subs).simplify())
+print('telescoping_threshsig_client =', (telescoping_totclient/C).subs(globalsubs).subs(telescoping_threshsig_subs).simplify())
+print('vanilla_relay =', (vanilla_totrelay/C).subs(globalsubs).subs(vanilla_subs).simplify())
+print('vanilla_client =', (vanilla_totclient/C).subs(globalsubs).subs(vanilla_subs).simplify())

+ 233 - 0
analysis/bytecounts.py

@@ -0,0 +1,233 @@
+#!/usr/bin/env python3
+
+import random # For simulation, not cryptography!
+import math
+import sys
+import os
+import logging
+import resource
+import sympy
+
+sys.path.append("..")
+
+import network
+import dirauth
+import relay
+import client
+
+class BandwidthMeasurer:
+    def __init__(self, numdirauths, numrelays, numclients):
+        # Start some dirauths
+        self.dirauthaddrs = []
+        self.dirauths = []
+        for i in range(numdirauths):
+            dira = dirauth.DirAuth(i, numdirauths)
+            self.dirauths.append(dira)
+            self.dirauthaddrs.append(dira.netaddr)
+
+        # Start some relays
+        self.relays = []
+        for i in range(numrelays):
+            self.startrelay()
+
+        # The fallback relays are a hardcoded list of a small fraction
+        # of the relays, used by clients for bootstrapping
+        numfallbackrelays = 1
+        fallbackrelays = self.relays[0:1]
+        for r in fallbackrelays:
+            r.set_is_fallbackrelay()
+        network.thenetwork.setfallbackrelays(fallbackrelays)
+
+        # Tick the epoch to build the first consensus
+        network.thenetwork.nextepoch()
+
+        # Start some clients
+        self.clients = []
+        for i in range(numclients):
+            self.startclient()
+
+        # Throw away all the performance statistics to this point
+        for d in self.dirauths: d.perfstats.reset()
+        for r in self.relays: r.perfstats.reset()
+        # The clients' stats are already at 0, but they have the
+        # "bootstrapping" flag set, which we want to keep, so we
+        # won't reset them.
+
+        self.allcircs = []
+
+        # Tick the epoch to bootstrap the clients
+        network.thenetwork.nextepoch()
+
+    def startrelay(self):
+        # Relay bandwidths (at least the ones fast enough to get used)
+        # in the live Tor network (as of Dec 2019) are well approximated
+        # by (200000-(200000-25000)/3*log10(x)) where x is a
+        # uniform integer in [1,2500]
+        x = random.randint(1,2500)
+        bw = int(200000-(200000-25000)/3*math.log10(x))
+        self.relays.append(relay.Relay(self.dirauthaddrs, bw, 0))
+
+    def stoprelay(self):
+        self.relays[1].terminate()
+        del self.relays[1]
+
+    def startclient(self):
+        self.clients.append(client.Client(self.dirauthaddrs))
+
+    def stopclient(self):
+        self.clients[0].terminate()
+        del self.clients[0]
+
+    def buildcircuit(self):
+        bwm.allcircs.append(bwm.clients[0].channelmgr.new_circuit())
+
+    def getstats(self):
+
+        # gather stats
+        totsent = 0
+        totrecv = 0
+        totbytes = 0
+        dirasent = 0
+        dirarecv = 0
+        dirabytes = 0
+        relaysent = 0
+        relayrecv = 0
+        relaybytes = 0
+        clisent = 0
+        clirecv = 0
+        clibytes = 0
+        for d in self.dirauths:
+            logging.debug("%s", d.perfstats)
+            dirasent += d.perfstats.bytes_sent
+            dirarecv += d.perfstats.bytes_received
+            dirabytes += d.perfstats.bytes_sent + d.perfstats.bytes_received
+        totsent += dirasent
+        totrecv += dirarecv
+        totbytes += dirabytes
+        for r in self.relays:
+            logging.debug("%s", r.perfstats)
+            relaysent += r.perfstats.bytes_sent
+            relayrecv += r.perfstats.bytes_received
+            relaybytes += r.perfstats.bytes_sent + r.perfstats.bytes_received
+        totsent += relaysent
+        totrecv += relayrecv
+        totbytes += relaybytes
+        for c in self.clients:
+            logging.debug("%s", c.perfstats)
+            clisent += c.perfstats.bytes_sent
+            clirecv += c.perfstats.bytes_received
+            clibytes += c.perfstats.bytes_sent + c.perfstats.bytes_received
+        totsent += clisent
+        totrecv += clirecv
+        totbytes += clibytes
+        logging.info("DirAuths sent=%s recv=%s bytes=%s" % \
+                (dirasent, dirarecv, dirabytes))
+        logging.info("Relays sent=%s recv=%s bytes=%s" % \
+                (relaysent, relayrecv, relaybytes))
+        logging.info("Client sent=%s recv=%s bytes=%s" % \
+                (clisent, clirecv, clibytes))
+        logging.info("Total sent=%s recv=%s bytes=%s" % \
+                (totsent, totrecv, totbytes))
+
+        # Reset bootstrap flag
+        for d in self.dirauths: d.perfstats.is_bootstrapping = False
+        for r in self.relays: r.perfstats.is_bootstrapping = False
+        for c in self.clients: c.perfstats.is_bootstrapping = False
+
+        return (dirabytes, relaybytes, clibytes)
+
+    def endepoch(self):
+
+        # Close circuits
+        for c in self.allcircs:
+            c.close()
+        self.allcircs = []
+
+        # Reset stats
+        for d in self.dirauths: d.perfstats.reset()
+        for r in self.relays: r.perfstats.reset()
+        for c in self.clients: c.perfstats.reset()
+
+        network.thenetwork.nextepoch()
+
+
+if __name__ == '__main__':
+    # Args: womode snipauthmode numrelays randseed
+    if len(sys.argv) != 5:
+        sys.stderr.write("Usage: womode snipauthmode numrelays randseed\n")
+        sys.exit(1)
+
+    womode = network.WOMode[sys.argv[1].upper()]
+    snipauthmode = network.SNIPAuthMode[sys.argv[2].upper()]
+    numrelays = int(sys.argv[3])
+    randseed = int(sys.argv[4])
+
+    # Use symbolic byte counter mode
+    network.symbolic_byte_counters = True
+
+    # Seed the PRNG.  On Ubuntu 18.04, this in fact makes future calls
+    # to (non-cryptographic) random numbers deterministic.  On Ubuntu
+    # 16.04, it does not.
+    random.seed(randseed)
+
+    loglevel = logging.INFO
+    # Uncomment to see all the debug messages
+    # loglevel = logging.DEBUG
+
+    logging.basicConfig(level=loglevel,
+            format="%(asctime)s:%(levelname)s:%(message)s")
+
+    logging.info("Starting simulation")
+
+    # Set the Walking Onions style to use
+    network.thenetwork.set_wo_style(womode, snipauthmode)
+
+    bwm = BandwidthMeasurer(9, numrelays, 0)
+    stats = dict()
+
+    logging.info("R_N = %d, R_B = 0, C_N = 0, C_B = 0, circs = 0", numrelays)
+    stats[(numrelays, 0, 0, 0, 0)] = bwm.getstats()
+    # Bootstrap one relay
+    bwm.startrelay()
+    bwm.endepoch()
+    logging.info("R_N = %d, R_B = 1, C_N = 0, C_B = 0, circs = 0", numrelays)
+    stats[(numrelays, 1, 0, 0, 0)] = bwm.getstats()
+    # Bootstrap one client
+    bwm.stoprelay()
+    bwm.startclient()
+    bwm.endepoch()
+    logging.info("R_N = %d, R_B = 0, C_N = 0, C_B = 1, circs = 0", numrelays)
+    stats[(numrelays, 0, 0, 1, 0)] = bwm.getstats()
+    # No changes, so the client is now not bootstrapping
+    bwm.endepoch()
+    logging.info("R_N = %d, R_B = 0, C_N = 1, C_B = 0, circs = 0", numrelays)
+    stats[(numrelays, 0, 1, 0, 0)] = bwm.getstats()
+    # No more bootstrapping, but build one circuit
+    bwm.buildcircuit()
+    logging.info("R_N = %d, R_B = 0, C_N = 1, C_B = 0, circs = 1", numrelays)
+    stats[(numrelays, 0, 1, 0, 1)] = bwm.getstats()
+    bwm.endepoch()
+    # No more bootstrapping, but build two circuits
+    bwm.buildcircuit()
+    bwm.buildcircuit()
+    logging.info("R_N = %d, R_B = 0, C_N = 1, C_B = 0, circs = 2", numrelays)
+    stats[(numrelays, 0, 1, 0, 2)] = bwm.getstats()
+    bwm.endepoch()
+
+    print("\n")
+    print('Total relay bytes:')
+    print('  R_N * (', stats[(numrelays, 0, 0, 0, 0)][1]/numrelays, ')')
+    print('+ R_B * (', stats[(numrelays, 1, 0, 0, 0)][1] - stats[(numrelays, 0, 0, 0, 0)][1], ')')
+    print('+ C_N * (', stats[(numrelays, 0, 1, 0, 0)][1] - stats[(numrelays, 0, 0, 0, 0)][1], ')')
+    print('+ C_B * (', stats[(numrelays, 0, 0, 1, 0)][1] - stats[(numrelays, 0, 0, 0, 0)][1], ')')
+    print('+ circ * (', stats[(numrelays, 0, 1, 0, 1)][1] - stats[(numrelays, 0, 1, 0, 0)][1], ')')
+    print('  check   ', stats[(numrelays, 0, 1, 0, 2)][1] - stats[(numrelays, 0, 1, 0, 1)][1])
+
+    print("\n")
+    print('Total client bytes:')
+    print('  R_N * (', stats[(numrelays, 0, 0, 0, 0)][2]/numrelays, ')')
+    print('+ R_B * (', stats[(numrelays, 1, 0, 0, 0)][2] - stats[(numrelays, 0, 0, 0, 0)][2], ')')
+    print('+ C_N * (', stats[(numrelays, 0, 1, 0, 0)][2] - stats[(numrelays, 0, 0, 0, 0)][2], ')')
+    print('+ C_B * (', stats[(numrelays, 0, 0, 1, 0)][2] - stats[(numrelays, 0, 0, 0, 0)][2], ')')
+    print('+ circ * (', stats[(numrelays, 0, 1, 0, 1)][2] - stats[(numrelays, 0, 1, 0, 0)][2], ')')
+    print('  check   ', stats[(numrelays, 0, 1, 0, 2)][2] - stats[(numrelays, 0, 1, 0, 1)][2])

+ 253 - 0
analysis/parselogs.py

@@ -0,0 +1,253 @@
+#!/usr/bin/env python3
+
+# Parse the log files output by the simulator to get the overall average
+# and stddev of total bytes sent+received per epoch for each of:
+# - dirauths
+# - total relays
+# - bootstrapping fallback relays per bw
+# - non-bootstrapping fallback relays per bw
+# - bootstrapping non-fallback relays per bw
+# - non-bootstrapping non-fallback relays per bw
+# - total clients
+# - bootstrapping clients
+# - non-bootstrapping clients
+# - steady-state dirauths (skipping the first epoch in which all clients
+#       are bootstrapping)
+# - steady-state relays (as above)
+# - steady-state clients (as above)
+# - steady-state total relay traffic per client (as above)
+
+# Pass the names of the log files as command-line arguments, or the log
+# files themselves on stdin.
+# The output will be five files named:
+# - vanilla_none.dat
+# - telescoping_merkle.dat
+# - telescoping_threshsig.dat
+# - singlepass_merkle.dat
+# - singlepass_threshsig.dat
+
+# Each file will contain one line for every network scale seen in the
+# logs.  The line will consist of 27 fields:
+# - the network scale (float)
+# - the mean and stddev (two floats) for each of the above 13 classes,
+#   in the above order; for the steady states, the stats are computed
+#   over the per-epoch averages, while for the others, the stats are
+#   computed on a per-entity basis
+
+import math
+import re
+import fileinput
+
+modes = [ "vanilla_none", "telescoping_merkle", "telescoping_threshsig",
+          "singlepass_merkle", "singlepass_threshsig" ]
+
+class StatAccum:
+    """Accumulate (mean, stddev, N) triples to compute an overall (mean,
+    stddev, N)."""
+    def __init__(self):
+        self.sumX = 0
+        self.sumXsq = 0
+        self.totN = 0
+
+    def accum(self, mean, stddev, N):
+        if N == 0:
+            # Nothing to change
+            return
+
+        if N == 1:
+            # stddev will be None
+            this_sumX = mean
+            this_sumXsq = mean*mean
+
+        else:
+            this_sumX = mean * N
+            variance = stddev * stddev
+            this_sumXsq = variance * (N-1) + this_sumX * this_sumX / N
+
+        self.sumX += this_sumX
+        self.sumXsq += this_sumXsq
+        self.totN += N
+
+    def stats(self):
+        if self.totN == 0:
+            return (None, None, 0)
+
+        if self.totN == 1:
+            return (self.sumX, None, 1)
+
+        mean = self.sumX / self.totN
+        variance = (self.sumXsq - self.sumX * self.sumX / self.totN) \
+                    / (self.totN - 1)
+        stddev = math.sqrt(variance)
+        return (mean, stddev, self.totN)
+
+    def __str__(self):
+        mean, stddev, N = self.stats()
+        if mean is None:
+            mean = 0
+        if stddev is None:
+            stddev = 0
+        return "%f %f" % (mean, stddev)
+
+class StatBundle:
+    """A bundle of 13 StatAccums, corresponding to the 13 entity classes
+    listed above."""
+
+    def __init__(self):
+        self.stats = [StatAccum() for i in range(13)]
+
+    def dirauth(self, mean, stddev, N):
+        self.stats[0].accum(mean, stddev, N)
+
+    def relay_all(self, mean, stddev, N):
+        self.stats[1].accum(mean, stddev, N)
+
+    def relay_fb_boot(self, mean, stddev, N):
+        self.stats[2].accum(mean, stddev, N)
+
+    def relay_fb_nboot(self, mean, stddev, N):
+        self.stats[3].accum(mean, stddev, N)
+
+    def relay_nfb_boot(self, mean, stddev, N):
+        self.stats[4].accum(mean, stddev, N)
+
+    def relay_nfb_nboot(self, mean, stddev, N):
+        self.stats[5].accum(mean, stddev, N)
+
+    def client_all(self, mean, stddev, N):
+        self.stats[6].accum(mean, stddev, N)
+
+    def client_boot(self, mean, stddev, N):
+        self.stats[7].accum(mean, stddev, N)
+
+    def client_nboot(self, mean, stddev, N):
+        self.stats[8].accum(mean, stddev, N)
+
+    def steady_dirauth(self, mean, stddev, N):
+        self.stats[9].accum(mean, stddev, N)
+
+    def steady_relay(self, mean, stddev, N):
+        self.stats[10].accum(mean, stddev, N)
+
+    def steady_client(self, mean, stddev, N):
+        self.stats[11].accum(mean, stddev, N)
+
+    def steady_relay_perclient(self, mean, stddev, N):
+        self.stats[12].accum(mean, stddev, N)
+
+    def __str__(self):
+        return '%s %s %s %s %s %s %s %s %s %s %s %s %s' % (
+            self.stats[0], self.stats[1], self.stats[2], self.stats[3],
+            self.stats[4], self.stats[5], self.stats[6], self.stats[7],
+            self.stats[8], self.stats[9], self.stats[10],
+            self.stats[11], self.stats[12])
+
+class LogParser:
+    """A class to parse the logfiles output by simulator.py."""
+
+    def __init__(self):
+        # self.stats is a dict indexed by mode name (like
+        # "singlepass_merkle") whose value is a dict indexed
+        # by network scale whose value is a StatBundle
+        self.stats = dict()
+
+        self.curbundle = None
+        self.fbbootstrapping = None
+        self.steadystate = False
+
+        self.startre = re.compile('Starting simulation .*?\/([A-Z]+)_([A-Z]+)_([\d\.]+)_')
+        self.statperbwre = re.compile('(Relays\(N?F?B\)).*bytes=([\d\.]+)( .pm ([\d\.]+))?.*bytesperbw=([\d\.]+)( .pm ([\d\.]+))?.*N=(\d+)')
+        self.statre = re.compile('(Dirauths|Relays|Clients(\(N?B\))?).*bytes=([\d\.]+)( .pm ([\d\.]+))?.*N=(\d+)')
+
+    def parse_line(self, line):
+        m = self.startre.search(line)
+        if m:
+            mode = m.group(1).lower() + "_" + m.group(2).lower()
+            scale = m.group(3)
+            if mode not in self.stats:
+                self.stats[mode] = dict()
+            if scale not in self.stats[mode]:
+                self.stats[mode][scale] = StatBundle()
+            self.curbundle = self.stats[mode][scale]
+            self.fbbootstrapping = True
+            self.steadystate = False
+            return
+
+        m = self.statperbwre.search(line)
+        if m:
+            enttype, means, stddevs, meanperbws, stddevperbws, Ns = \
+                    m.group(1,2,4,5,7,8)
+        else:
+            m = self.statre.search(line)
+            if m:
+                enttype, means, stddevs, Ns = \
+                        m.group(1,3,5,6)
+                meanperbws, stddevperbws = None, None
+            else:
+                return
+
+        mean = float(means)
+        if stddevs:
+            stddev = float(stddevs)
+        else:
+            stddev = None
+        if meanperbws:
+            meanperbw = float(meanperbws)
+        else:
+            meanperbw = None
+        if stddevperbws:
+            stddevperbw = float(stddevperbws)
+        else:
+            stddevperbw = None
+        N = int(Ns)
+        # print('%s %s %s %s %s %s' % (enttype, mean, stddev, meanperbw, stddevperbw, N))
+        if enttype == 'Dirauths':
+            self.curbundle.dirauth(mean, stddev, N)
+            if self.steadystate:
+                self.curbundle.steady_dirauth(mean, None, 1)
+        elif enttype == 'Relays':
+            self.curbundle.relay_all(mean, stddev, N)
+            if self.steadystate:
+                self.curbundle.steady_relay(mean, None, 1)
+                self.totrelaybytes = mean * N
+        elif enttype == 'Relays(FB)':
+            if self.fbbootstrapping:
+                self.curbundle.relay_fb_boot(meanperbw, stddevperbw, N)
+                self.fbbootstrapping = False
+            else:
+                self.curbundle.relay_fb_nboot(meanperbw, stddevperbw, N)
+        elif enttype == 'Relays(B)':
+            self.curbundle.relay_nfb_boot(meanperbw, stddevperbw, N)
+        elif enttype == 'Relays(NB)':
+            self.curbundle.relay_nfb_nboot(meanperbw, stddevperbw, N)
+        elif enttype == 'Clients':
+            self.curbundle.client_all(mean, stddev, N)
+            if self.steadystate:
+                self.curbundle.steady_client(mean, None, 1)
+                self.curbundle.steady_relay_perclient(self.totrelaybytes / N, None, 1)
+        elif enttype == 'Clients(B)':
+            self.curbundle.client_boot(mean, stddev, N)
+        elif enttype == 'Clients(NB)':
+            self.curbundle.client_nboot(mean, stddev, N)
+            # Now we've reached steady state
+            self.steadystate = True
+        else:
+            raise ValueError('Unknown entity type "%s"' % enttype)
+
+    def write_output(self):
+        for mode in self.stats.keys():
+            with open("%s.dat" % mode, "w") as datout:
+                for scale in sorted(self.stats[mode].keys()):
+                    datout.write("%s %s\n" % \
+                            (6500*float(scale), self.stats[mode][scale]))
+                datout.close()
+
+
+if __name__ == '__main__':
+
+    logparser = LogParser()
+
+    for line in fileinput.input():
+        logparser.parse_line(line)
+
+    logparser.write_output()

+ 100 - 0
analysis/plotdats.py

@@ -0,0 +1,100 @@
+#!/usr/bin/env python3
+
+import subprocess
+
+# Plot the dat files generated by parselogs.py using gnuplot
+
+if __name__ == '__main__':
+
+    # The analytical functions come from analytical.py
+    # Replace 'R' in the output of that program by 'x' and replace
+    # 'logR' by 'ceil(log(x)/log(2))'.
+
+    relay_perclient_analyticals = {
+        'singlepass_merkle': '0.020524244*x + 1459.6*ceil(log(x)/log(2)) + 43404.140896',
+        'singlepass_threshsig': '0.9048*x + 45628.912096',
+        'telescoping_merkle': '0.017528004*x + 1459.6*ceil(log(x)/log(2)) + 36977.500696',
+        'telescoping_threshsig': '0.7982*x + 39135.071896',
+        'vanilla_none': '38.551644414*x + 25316.281696',
+    }
+
+    client_analyticals = {
+        'singlepass_merkle': '729.8*ceil(log(x)/log(2)) + 16211.1',
+        'singlepass_threshsig': '17261.3000000000',
+        'telescoping_merkle': '729.8*ceil(log(x)/log(2)) + 15090.1',
+        'telescoping_threshsig': '16126.5000000000',
+        'vanilla_none': '38.53524*x + 8735.66',
+    }
+
+    plots = [
+        ('dirauth', 'Directory authorities total bytes each', 2, False, None),
+        ('relay', 'Relay total bytes each', 4, False, None),
+        ('relay_bf', 'Bootstrapping fallback relays bytes per bw', 6, True, None),
+        ('relay_f', 'Non-bootstrapping fallback relays bytes per bw', 8, True, None),
+        ('relay_b', 'Bootstrapping normal relays bytes per bw', 10, True, None),
+        ('relay_n', 'Non-bootstrapping normal relays bytes per bw', 12, True, None),
+        ('client', 'Client total bytes each', 14, False, None),
+        ('client_b', 'Bootstrapping client total bytes', 16, True, None),
+        ('client_n', 'Non-bootstrapping client total bytes', 18, True, None),
+        ('dirauth_ss', 'Directory authority total bytes each', 20, True, None),
+        ('relay_ss', 'Relay total bytes each', 22, True, None),
+        ('client_ss', 'Client total bytes each', 24, True, client_analyticals),
+        ('relay_perclient_ss', 'Relay total bytes per client', 26, True, relay_perclient_analyticals),
+        ('client_ss_wide', 'Client total bytes each', 24, True, client_analyticals),
+        ('relay_perclient_ss_wide', 'Relay total bytes per client', 26, True, relay_perclient_analyticals),
+    ]
+
+    dats = [
+        ('vanilla_none', 'Vanilla', 1),
+        ('singlepass_merkle', 'Sing(M)', 2),
+        ('telescoping_merkle', 'Tele(M)', 3),
+        ('singlepass_threshsig', 'Sing(T)', 4),
+        ('telescoping_threshsig', 'Tele(T)', 5),
+    ]
+
+    for filename, title, col, errbars, analyticals in plots:
+        if analyticals is None:
+            analyticals = dict()
+        if filename[-5:] == '_wide':
+            ranges = "set xrange [300:300000]\nset logscale xy\nset yrange [10000:]"
+        else:
+            ranges = "set xrange [0:1750]\nset yrange [0:]"
+        gpcode = """set terminal pdf
+set output '%s.pdf'
+set title '%s'
+%s
+set key out
+set arrow from 6500, graph 0 to 6500, graph 1 nohead lc 0 lw 2
+set xlabel "Number of relays"
+set style line 1 lw 2 lc 1 pt 1
+set style line 2 lw 2 lc 2 pt 1
+set style line 3 lw 2 lc 3 pt 1
+set style line 4 lw 2 lc 4 pt 1
+set style line 5 lw 2 lc 5 pt 1
+set style line 10 lw 2 lc 0 dt 2
+set style line 11 lw 2 lc 1 dt 2
+set style line 12 lw 2 lc 2 dt 2
+set style line 13 lw 2 lc 3 dt 2
+set style line 14 lw 2 lc 4 dt 2
+set style line 15 lw 2 lc 5 dt 2
+plot """ % (filename, title, ranges)
+        firstplot = True
+        for datname, title, style in dats:
+            if firstplot is False:
+                gpcode += ", "
+            else:
+                firstplot = False
+            gpcode += "'%s.dat' using 1:%d with lines ls %d title '%s'" % \
+                (datname, col, style, title)
+            if errbars:
+                gpcode += ", '%s.dat' using 1:%d:%d with errorbars ls %d notitle" % \
+                    (datname, col, col+1, style)
+            if datname in analyticals:
+                gpcode += ", %s ls %d notitle" % \
+                    (analyticals[datname], style+10)
+        if analyticals:
+            gpcode += ", -100 ls 10 title 'Analytical'"
+
+        gp = subprocess.Popen(['gnuplot', '-'], stdin=subprocess.PIPE) 
+        gp.communicate(gpcode.encode('ascii'))
+