#!/usr/bin/env python3 # Parse the log files output by the simulator to get the overall average # and stddev of total bytes sent+received per epoch for each of: # - dirauths # - total relays # - bootstrapping fallback relays per bw # - non-bootstrapping fallback relays per bw # - bootstrapping non-fallback relays per bw # - non-bootstrapping non-fallback relays per bw # - total clients # - bootstrapping clients # - non-bootstrapping clients # - steady-state dirauths (skipping the first epoch in which all clients # are bootstrapping) # - steady-state relays (as above) # - steady-state clients (as above) # - steady-state total relay traffic per client (as above) # Pass the names of the log files as command-line arguments, or the log # files themselves on stdin. # The output will be five files named: # - vanilla_none.dat # - telescoping_merkle.dat # - telescoping_threshsig.dat # - singlepass_merkle.dat # - singlepass_threshsig.dat # Each file will contain one line for every network scale seen in the # logs. The line will consist of 27 fields: # - the network scale (float) # - the mean and stddev (two floats) for each of the above 13 classes, # in the above order; for the steady states, the stats are computed # over the per-epoch averages, while for the others, the stats are # computed on a per-entity basis import math import re import fileinput modes = [ "vanilla_none", "telescoping_merkle", "telescoping_threshsig", "singlepass_merkle", "singlepass_threshsig" ] class StatAccum: """Accumulate (mean, stddev, N) triples to compute an overall (mean, stddev, N).""" def __init__(self): self.sumX = 0 self.sumXsq = 0 self.totN = 0 def accum(self, mean, stddev, N): if N == 0: # Nothing to change return if N == 1: # stddev will be None this_sumX = mean this_sumXsq = mean*mean else: this_sumX = mean * N variance = stddev * stddev this_sumXsq = variance * (N-1) + this_sumX * this_sumX / N self.sumX += this_sumX self.sumXsq += this_sumXsq self.totN += N def stats(self): if self.totN == 0: return (None, None, 0) if self.totN == 1: return (self.sumX, None, 1) mean = self.sumX / self.totN variance = (self.sumXsq - self.sumX * self.sumX / self.totN) \ / (self.totN - 1) stddev = math.sqrt(variance) return (mean, stddev, self.totN) def __str__(self): mean, stddev, N = self.stats() if mean is None: mean = 0 if stddev is None: stddev = 0 return "%f %f" % (mean, stddev) class StatBundle: """A bundle of 13 StatAccums, corresponding to the 13 entity classes listed above.""" def __init__(self): self.stats = [StatAccum() for i in range(13)] def dirauth(self, mean, stddev, N): self.stats[0].accum(mean, stddev, N) def relay_all(self, mean, stddev, N): self.stats[1].accum(mean, stddev, N) def relay_fb_boot(self, mean, stddev, N): self.stats[2].accum(mean, stddev, N) def relay_fb_nboot(self, mean, stddev, N): self.stats[3].accum(mean, stddev, N) def relay_nfb_boot(self, mean, stddev, N): self.stats[4].accum(mean, stddev, N) def relay_nfb_nboot(self, mean, stddev, N): self.stats[5].accum(mean, stddev, N) def client_all(self, mean, stddev, N): self.stats[6].accum(mean, stddev, N) def client_boot(self, mean, stddev, N): self.stats[7].accum(mean, stddev, N) def client_nboot(self, mean, stddev, N): self.stats[8].accum(mean, stddev, N) def steady_dirauth(self, mean, stddev, N): self.stats[9].accum(mean, stddev, N) def steady_relay(self, mean, stddev, N): self.stats[10].accum(mean, stddev, N) def steady_client(self, mean, stddev, N): self.stats[11].accum(mean, stddev, N) def steady_relay_perclient(self, mean, stddev, N): self.stats[12].accum(mean, stddev, N) def __str__(self): return '%s %s %s %s %s %s %s %s %s %s %s %s %s' % ( self.stats[0], self.stats[1], self.stats[2], self.stats[3], self.stats[4], self.stats[5], self.stats[6], self.stats[7], self.stats[8], self.stats[9], self.stats[10], self.stats[11], self.stats[12]) class LogParser: """A class to parse the logfiles output by simulator.py.""" def __init__(self): # self.stats is a dict indexed by mode name (like # "singlepass_merkle") whose value is a dict indexed # by network scale whose value is a StatBundle self.stats = dict() self.curbundle = None self.fbbootstrapping = None self.steadystate = False self.startre = re.compile('Starting simulation .*?\/([A-Z]+)_([A-Z]+)_([\d\.]+)_') self.statperbwre = re.compile('(Relays\(N?F?B\)).*bytes=([\d\.]+)( .pm ([\d\.]+))?.*bytesperbw=([\d\.]+)( .pm ([\d\.]+))?.*N=(\d+)') self.statre = re.compile('(Dirauths|Relays|Clients(\(N?B\))?).*bytes=([\d\.]+)( .pm ([\d\.]+))?.*N=(\d+)') self.mibre = re.compile('MiB used') def parse_line(self, line): m = self.startre.search(line) if m: mode = m.group(1).lower() + "_" + m.group(2).lower() scale = m.group(3) if mode not in self.stats: self.stats[mode] = dict() if scale not in self.stats[mode]: self.stats[mode][scale] = StatBundle() self.curbundle = self.stats[mode][scale] self.fbbootstrapping = True self.steadystate = False return m = self.statperbwre.search(line) if m: enttype, means, stddevs, meanperbws, stddevperbws, Ns = \ m.group(1,2,4,5,7,8) else: m = self.statre.search(line) if m: enttype, means, stddevs, Ns = \ m.group(1,3,5,6) meanperbws, stddevperbws = None, None else: m = self.mibre.search(line) if m: # We've reached steady state self.steadystate = True return mean = float(means) if stddevs: stddev = float(stddevs) else: stddev = None if meanperbws: meanperbw = float(meanperbws) else: meanperbw = None if stddevperbws: stddevperbw = float(stddevperbws) else: stddevperbw = None N = int(Ns) # print('%s %s %s %s %s %s' % (enttype, mean, stddev, meanperbw, stddevperbw, N)) if enttype == 'Dirauths': self.curbundle.dirauth(mean, stddev, N) if self.steadystate: self.curbundle.steady_dirauth(mean, None, 1) elif enttype == 'Relays': self.curbundle.relay_all(mean, stddev, N) if self.steadystate: self.curbundle.steady_relay(mean, None, 1) self.totrelaybytes = mean * N elif enttype == 'Relays(FB)': if self.fbbootstrapping: self.curbundle.relay_fb_boot(meanperbw, stddevperbw, N) self.fbbootstrapping = False else: self.curbundle.relay_fb_nboot(meanperbw, stddevperbw, N) elif enttype == 'Relays(B)': self.curbundle.relay_nfb_boot(meanperbw, stddevperbw, N) elif enttype == 'Relays(NB)': self.curbundle.relay_nfb_nboot(meanperbw, stddevperbw, N) elif enttype == 'Clients': self.curbundle.client_all(mean, stddev, N) if self.steadystate: self.curbundle.steady_client(mean, None, 1) self.curbundle.steady_relay_perclient(self.totrelaybytes / N, None, 1) elif enttype == 'Clients(B)': self.curbundle.client_boot(mean, stddev, N) elif enttype == 'Clients(NB)': self.curbundle.client_nboot(mean, stddev, N) else: raise ValueError('Unknown entity type "%s"' % enttype) def write_output(self): for mode in self.stats.keys(): with open("%s.dat" % mode, "w") as datout: for scale in sorted(self.stats[mode].keys()): datout.write("%s %s\n" % \ (6500*float(scale), self.stats[mode][scale])) datout.close() if __name__ == '__main__': logparser = LogParser() for line in fileinput.input(): logparser.parse_line(line) logparser.write_output()