parselogs.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. #!/usr/bin/env python3
  2. # Parse the log files output by the simulator to get the overall average
  3. # and stddev of total bytes sent+received per epoch for each of:
  4. # - dirauths
  5. # - total relays
  6. # - bootstrapping fallback relays per bw
  7. # - non-bootstrapping fallback relays per bw
  8. # - bootstrapping non-fallback relays per bw
  9. # - non-bootstrapping non-fallback relays per bw
  10. # - total clients
  11. # - bootstrapping clients
  12. # - non-bootstrapping clients
  13. # - steady-state dirauths (skipping the first epoch in which all clients
  14. # are bootstrapping)
  15. # - steady-state relays (as above)
  16. # - steady-state clients (as above)
  17. # - steady-state total relay traffic per client (as above)
  18. # Pass the names of the log files as command-line arguments, or the log
  19. # files themselves on stdin.
  20. # The output will be five files named:
  21. # - vanilla_none.dat
  22. # - telescoping_merkle.dat
  23. # - telescoping_threshsig.dat
  24. # - singlepass_merkle.dat
  25. # - singlepass_threshsig.dat
  26. # Each file will contain one line for every network scale seen in the
  27. # logs. The line will consist of 27 fields:
  28. # - the network scale (float)
  29. # - the mean and stddev (two floats) for each of the above 13 classes,
  30. # in the above order; for the steady states, the stats are computed
  31. # over the per-epoch averages, while for the others, the stats are
  32. # computed on a per-entity basis
  33. import math
  34. import re
  35. import fileinput
  36. modes = [ "vanilla_none", "telescoping_merkle", "telescoping_threshsig",
  37. "singlepass_merkle", "singlepass_threshsig" ]
  38. class StatAccum:
  39. """Accumulate (mean, stddev, N) triples to compute an overall (mean,
  40. stddev, N)."""
  41. def __init__(self):
  42. self.sumX = 0
  43. self.sumXsq = 0
  44. self.totN = 0
  45. def accum(self, mean, stddev, N):
  46. if N == 0:
  47. # Nothing to change
  48. return
  49. if N == 1:
  50. # stddev will be None
  51. this_sumX = mean
  52. this_sumXsq = mean*mean
  53. else:
  54. this_sumX = mean * N
  55. variance = stddev * stddev
  56. this_sumXsq = variance * (N-1) + this_sumX * this_sumX / N
  57. self.sumX += this_sumX
  58. self.sumXsq += this_sumXsq
  59. self.totN += N
  60. def stats(self):
  61. if self.totN == 0:
  62. return (None, None, 0)
  63. if self.totN == 1:
  64. return (self.sumX, None, 1)
  65. mean = self.sumX / self.totN
  66. variance = (self.sumXsq - self.sumX * self.sumX / self.totN) \
  67. / (self.totN - 1)
  68. stddev = math.sqrt(variance)
  69. return (mean, stddev, self.totN)
  70. def __str__(self):
  71. mean, stddev, N = self.stats()
  72. if mean is None:
  73. mean = 0
  74. if stddev is None:
  75. stddev = 0
  76. return "%f %f" % (mean, stddev)
  77. class StatBundle:
  78. """A bundle of 13 StatAccums, corresponding to the 13 entity classes
  79. listed above."""
  80. def __init__(self):
  81. self.stats = [StatAccum() for i in range(13)]
  82. def dirauth(self, mean, stddev, N):
  83. self.stats[0].accum(mean, stddev, N)
  84. def relay_all(self, mean, stddev, N):
  85. self.stats[1].accum(mean, stddev, N)
  86. def relay_fb_boot(self, mean, stddev, N):
  87. self.stats[2].accum(mean, stddev, N)
  88. def relay_fb_nboot(self, mean, stddev, N):
  89. self.stats[3].accum(mean, stddev, N)
  90. def relay_nfb_boot(self, mean, stddev, N):
  91. self.stats[4].accum(mean, stddev, N)
  92. def relay_nfb_nboot(self, mean, stddev, N):
  93. self.stats[5].accum(mean, stddev, N)
  94. def client_all(self, mean, stddev, N):
  95. self.stats[6].accum(mean, stddev, N)
  96. def client_boot(self, mean, stddev, N):
  97. self.stats[7].accum(mean, stddev, N)
  98. def client_nboot(self, mean, stddev, N):
  99. self.stats[8].accum(mean, stddev, N)
  100. def steady_dirauth(self, mean, stddev, N):
  101. self.stats[9].accum(mean, stddev, N)
  102. def steady_relay(self, mean, stddev, N):
  103. self.stats[10].accum(mean, stddev, N)
  104. def steady_client(self, mean, stddev, N):
  105. self.stats[11].accum(mean, stddev, N)
  106. def steady_relay_perclient(self, mean, stddev, N):
  107. self.stats[12].accum(mean, stddev, N)
  108. def __str__(self):
  109. return '%s %s %s %s %s %s %s %s %s %s %s %s %s' % (
  110. self.stats[0], self.stats[1], self.stats[2], self.stats[3],
  111. self.stats[4], self.stats[5], self.stats[6], self.stats[7],
  112. self.stats[8], self.stats[9], self.stats[10],
  113. self.stats[11], self.stats[12])
  114. class LogParser:
  115. """A class to parse the logfiles output by simulator.py."""
  116. def __init__(self):
  117. # self.stats is a dict indexed by mode name (like
  118. # "singlepass_merkle") whose value is a dict indexed
  119. # by network scale whose value is a StatBundle
  120. self.stats = dict()
  121. self.curbundle = None
  122. self.fbbootstrapping = None
  123. self.steadystate = False
  124. self.startre = re.compile('Starting simulation .*?\/([A-Z]+)_([A-Z]+)_([\d\.]+)_')
  125. self.statperbwre = re.compile('(Relays\(N?F?B\)).*bytes=([\d\.]+)( .pm ([\d\.]+))?.*bytesperbw=([\d\.]+)( .pm ([\d\.]+))?.*N=(\d+)')
  126. self.statre = re.compile('(Dirauths|Relays|Clients(\(N?B\))?).*bytes=([\d\.]+)( .pm ([\d\.]+))?.*N=(\d+)')
  127. self.mibre = re.compile('MiB used')
  128. def parse_line(self, line):
  129. m = self.startre.search(line)
  130. if m:
  131. mode = m.group(1).lower() + "_" + m.group(2).lower()
  132. scale = m.group(3)
  133. if mode not in self.stats:
  134. self.stats[mode] = dict()
  135. if scale not in self.stats[mode]:
  136. self.stats[mode][scale] = StatBundle()
  137. self.curbundle = self.stats[mode][scale]
  138. self.fbbootstrapping = True
  139. self.steadystate = False
  140. return
  141. m = self.statperbwre.search(line)
  142. if m:
  143. enttype, means, stddevs, meanperbws, stddevperbws, Ns = \
  144. m.group(1,2,4,5,7,8)
  145. else:
  146. m = self.statre.search(line)
  147. if m:
  148. enttype, means, stddevs, Ns = \
  149. m.group(1,3,5,6)
  150. meanperbws, stddevperbws = None, None
  151. else:
  152. m = self.mibre.search(line)
  153. if m:
  154. # We've reached steady state
  155. self.steadystate = True
  156. return
  157. mean = float(means)
  158. if stddevs:
  159. stddev = float(stddevs)
  160. else:
  161. stddev = None
  162. if meanperbws:
  163. meanperbw = float(meanperbws)
  164. else:
  165. meanperbw = None
  166. if stddevperbws:
  167. stddevperbw = float(stddevperbws)
  168. else:
  169. stddevperbw = None
  170. N = int(Ns)
  171. # print('%s %s %s %s %s %s' % (enttype, mean, stddev, meanperbw, stddevperbw, N))
  172. if enttype == 'Dirauths':
  173. self.curbundle.dirauth(mean, stddev, N)
  174. if self.steadystate:
  175. self.curbundle.steady_dirauth(mean, None, 1)
  176. elif enttype == 'Relays':
  177. self.curbundle.relay_all(mean, stddev, N)
  178. if self.steadystate:
  179. self.curbundle.steady_relay(mean, None, 1)
  180. self.totrelaybytes = mean * N
  181. elif enttype == 'Relays(FB)':
  182. if self.fbbootstrapping:
  183. self.curbundle.relay_fb_boot(meanperbw, stddevperbw, N)
  184. self.fbbootstrapping = False
  185. else:
  186. self.curbundle.relay_fb_nboot(meanperbw, stddevperbw, N)
  187. elif enttype == 'Relays(B)':
  188. self.curbundle.relay_nfb_boot(meanperbw, stddevperbw, N)
  189. elif enttype == 'Relays(NB)':
  190. self.curbundle.relay_nfb_nboot(meanperbw, stddevperbw, N)
  191. elif enttype == 'Clients':
  192. self.curbundle.client_all(mean, stddev, N)
  193. if self.steadystate:
  194. self.curbundle.steady_client(mean, None, 1)
  195. self.curbundle.steady_relay_perclient(self.totrelaybytes / N, None, 1)
  196. elif enttype == 'Clients(B)':
  197. self.curbundle.client_boot(mean, stddev, N)
  198. elif enttype == 'Clients(NB)':
  199. self.curbundle.client_nboot(mean, stddev, N)
  200. else:
  201. raise ValueError('Unknown entity type "%s"' % enttype)
  202. def write_output(self):
  203. for mode in self.stats.keys():
  204. with open("%s.dat" % mode, "w") as datout:
  205. for scale in sorted(self.stats[mode].keys()):
  206. datout.write("%s %s\n" % \
  207. (6500*float(scale), self.stats[mode][scale]))
  208. datout.close()
  209. if __name__ == '__main__':
  210. logparser = LogParser()
  211. for line in fileinput.input():
  212. logparser.parse_line(line)
  213. logparser.write_output()