123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256 |
- #!/usr/bin/env python3
- # Parse the log files output by the simulator to get the overall average
- # and stddev of total bytes sent+received per epoch for each of:
- # - dirauths
- # - total relays
- # - bootstrapping fallback relays per bw
- # - non-bootstrapping fallback relays per bw
- # - bootstrapping non-fallback relays per bw
- # - non-bootstrapping non-fallback relays per bw
- # - total clients
- # - bootstrapping clients
- # - non-bootstrapping clients
- # - steady-state dirauths (skipping the first epoch in which all clients
- # are bootstrapping)
- # - steady-state relays (as above)
- # - steady-state clients (as above)
- # - steady-state total relay traffic per client (as above)
- # Pass the names of the log files as command-line arguments, or the log
- # files themselves on stdin.
- # The output will be five files named:
- # - vanilla_none.dat
- # - telescoping_merkle.dat
- # - telescoping_threshsig.dat
- # - singlepass_merkle.dat
- # - singlepass_threshsig.dat
- # Each file will contain one line for every network scale seen in the
- # logs. The line will consist of 27 fields:
- # - the network scale (float)
- # - the mean and stddev (two floats) for each of the above 13 classes,
- # in the above order; for the steady states, the stats are computed
- # over the per-epoch averages, while for the others, the stats are
- # computed on a per-entity basis
- import math
- import re
- import fileinput
- modes = [ "vanilla_none", "telescoping_merkle", "telescoping_threshsig",
- "singlepass_merkle", "singlepass_threshsig" ]
- class StatAccum:
- """Accumulate (mean, stddev, N) triples to compute an overall (mean,
- stddev, N)."""
- def __init__(self):
- self.sumX = 0
- self.sumXsq = 0
- self.totN = 0
- def accum(self, mean, stddev, N):
- if N == 0:
- # Nothing to change
- return
- if N == 1:
- # stddev will be None
- this_sumX = mean
- this_sumXsq = mean*mean
- else:
- this_sumX = mean * N
- variance = stddev * stddev
- this_sumXsq = variance * (N-1) + this_sumX * this_sumX / N
- self.sumX += this_sumX
- self.sumXsq += this_sumXsq
- self.totN += N
- def stats(self):
- if self.totN == 0:
- return (None, None, 0)
- if self.totN == 1:
- return (self.sumX, None, 1)
- mean = self.sumX / self.totN
- variance = (self.sumXsq - self.sumX * self.sumX / self.totN) \
- / (self.totN - 1)
- stddev = math.sqrt(variance)
- return (mean, stddev, self.totN)
- def __str__(self):
- mean, stddev, N = self.stats()
- if mean is None:
- mean = 0
- if stddev is None:
- stddev = 0
- return "%f %f" % (mean, stddev)
- class StatBundle:
- """A bundle of 13 StatAccums, corresponding to the 13 entity classes
- listed above."""
- def __init__(self):
- self.stats = [StatAccum() for i in range(13)]
- def dirauth(self, mean, stddev, N):
- self.stats[0].accum(mean, stddev, N)
- def relay_all(self, mean, stddev, N):
- self.stats[1].accum(mean, stddev, N)
- def relay_fb_boot(self, mean, stddev, N):
- self.stats[2].accum(mean, stddev, N)
- def relay_fb_nboot(self, mean, stddev, N):
- self.stats[3].accum(mean, stddev, N)
- def relay_nfb_boot(self, mean, stddev, N):
- self.stats[4].accum(mean, stddev, N)
- def relay_nfb_nboot(self, mean, stddev, N):
- self.stats[5].accum(mean, stddev, N)
- def client_all(self, mean, stddev, N):
- self.stats[6].accum(mean, stddev, N)
- def client_boot(self, mean, stddev, N):
- self.stats[7].accum(mean, stddev, N)
- def client_nboot(self, mean, stddev, N):
- self.stats[8].accum(mean, stddev, N)
- def steady_dirauth(self, mean, stddev, N):
- self.stats[9].accum(mean, stddev, N)
- def steady_relay(self, mean, stddev, N):
- self.stats[10].accum(mean, stddev, N)
- def steady_client(self, mean, stddev, N):
- self.stats[11].accum(mean, stddev, N)
- def steady_relay_perclient(self, mean, stddev, N):
- self.stats[12].accum(mean, stddev, N)
- def __str__(self):
- return '%s %s %s %s %s %s %s %s %s %s %s %s %s' % (
- self.stats[0], self.stats[1], self.stats[2], self.stats[3],
- self.stats[4], self.stats[5], self.stats[6], self.stats[7],
- self.stats[8], self.stats[9], self.stats[10],
- self.stats[11], self.stats[12])
- class LogParser:
- """A class to parse the logfiles output by simulator.py."""
- def __init__(self):
- # self.stats is a dict indexed by mode name (like
- # "singlepass_merkle") whose value is a dict indexed
- # by network scale whose value is a StatBundle
- self.stats = dict()
- self.curbundle = None
- self.fbbootstrapping = None
- self.steadystate = False
- self.startre = re.compile('Starting simulation .*?\/([A-Z]+)_([A-Z]+)_([\d\.]+)_')
- self.statperbwre = re.compile('(Relays\(N?F?B\)).*bytes=([\d\.]+)( .pm ([\d\.]+))?.*bytesperbw=([\d\.]+)( .pm ([\d\.]+))?.*N=(\d+)')
- self.statre = re.compile('(Dirauths|Relays|Clients(\(N?B\))?).*bytes=([\d\.]+)( .pm ([\d\.]+))?.*N=(\d+)')
- self.mibre = re.compile('MiB used')
- def parse_line(self, line):
- m = self.startre.search(line)
- if m:
- mode = m.group(1).lower() + "_" + m.group(2).lower()
- scale = m.group(3)
- if mode not in self.stats:
- self.stats[mode] = dict()
- if scale not in self.stats[mode]:
- self.stats[mode][scale] = StatBundle()
- self.curbundle = self.stats[mode][scale]
- self.fbbootstrapping = True
- self.steadystate = False
- return
- m = self.statperbwre.search(line)
- if m:
- enttype, means, stddevs, meanperbws, stddevperbws, Ns = \
- m.group(1,2,4,5,7,8)
- else:
- m = self.statre.search(line)
- if m:
- enttype, means, stddevs, Ns = \
- m.group(1,3,5,6)
- meanperbws, stddevperbws = None, None
- else:
- m = self.mibre.search(line)
- if m:
- # We've reached steady state
- self.steadystate = True
- return
- mean = float(means)
- if stddevs:
- stddev = float(stddevs)
- else:
- stddev = None
- if meanperbws:
- meanperbw = float(meanperbws)
- else:
- meanperbw = None
- if stddevperbws:
- stddevperbw = float(stddevperbws)
- else:
- stddevperbw = None
- N = int(Ns)
- # print('%s %s %s %s %s %s' % (enttype, mean, stddev, meanperbw, stddevperbw, N))
- if enttype == 'Dirauths':
- self.curbundle.dirauth(mean, stddev, N)
- if self.steadystate:
- self.curbundle.steady_dirauth(mean, None, 1)
- elif enttype == 'Relays':
- self.curbundle.relay_all(mean, stddev, N)
- if self.steadystate:
- self.curbundle.steady_relay(mean, None, 1)
- self.totrelaybytes = mean * N
- elif enttype == 'Relays(FB)':
- if self.fbbootstrapping:
- self.curbundle.relay_fb_boot(meanperbw, stddevperbw, N)
- self.fbbootstrapping = False
- else:
- self.curbundle.relay_fb_nboot(meanperbw, stddevperbw, N)
- elif enttype == 'Relays(B)':
- self.curbundle.relay_nfb_boot(meanperbw, stddevperbw, N)
- elif enttype == 'Relays(NB)':
- self.curbundle.relay_nfb_nboot(meanperbw, stddevperbw, N)
- elif enttype == 'Clients':
- self.curbundle.client_all(mean, stddev, N)
- if self.steadystate:
- self.curbundle.steady_client(mean, None, 1)
- self.curbundle.steady_relay_perclient(self.totrelaybytes / N, None, 1)
- elif enttype == 'Clients(B)':
- self.curbundle.client_boot(mean, stddev, N)
- elif enttype == 'Clients(NB)':
- self.curbundle.client_nboot(mean, stddev, N)
- else:
- raise ValueError('Unknown entity type "%s"' % enttype)
- def write_output(self):
- for mode in self.stats.keys():
- with open("%s.dat" % mode, "w") as datout:
- for scale in sorted(self.stats[mode].keys()):
- datout.write("%s %s\n" % \
- (6500*float(scale), self.stats[mode][scale]))
- datout.close()
- if __name__ == '__main__':
- logparser = LogParser()
- for line in fileinput.input():
- logparser.parse_line(line)
- logparser.write_output()
|