logs_to_csv.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. #!/usr/bin/python3
  2. import os
  3. import sys
  4. import math
  5. import numpy as np
  6. import re
  7. '''
  8. Produce the output csv given the LOGS_FOLDER with multiple experiment
  9. log folders within it.
  10. '''
  11. # The statistics to report in the output csv file. Each will be output
  12. # as mean,stddev,max
  13. stats_keys = ['epoch', 'wn', 'bytes']
  14. def get_epoch_stats(f, expected_real):
  15. epoch_stats = {}
  16. looking_for_epoch_start = True
  17. epoch_start = None
  18. num_real = 0
  19. waksman_precompute_start = None
  20. while True:
  21. line = f.readline()
  22. # If we get to the end of file, indicate that by returning 'EOF'
  23. if line == "":
  24. return 'EOF'
  25. # Look for the start of the next epoch
  26. if looking_for_epoch_start:
  27. if matches := re.match(r'(\d+\.\d+): Epoch \d+ start', line):
  28. [ts] = matches.groups()
  29. epoch_start = float(ts)
  30. looking_for_epoch_start = False
  31. next
  32. # If we see the start of a new experiment, then the current
  33. # epoch is interrupted, or we were scanning the trailing bit of
  34. # the previous experiment
  35. if re.match(r'Loaded sealed private key', line):
  36. return None
  37. # If we see the end of the epoch, check to see if the number of
  38. # real messages we received was as expected (that is, if we've
  39. # reached steady state, as opposed to just starting up)
  40. if re.match(r'(\d+\.\d+): Sleeping for ', line):
  41. if num_real >= expected_real:
  42. return epoch_stats
  43. else:
  44. return None
  45. # If we see a report of the number of real and padding messages
  46. # received, record the number of real messages
  47. if matches := re.match(r'(\d+) real, \d+ padding', line):
  48. [rm] = matches.groups()
  49. num_real = int(rm)
  50. # If we see the end of an epoch, record the epoch time
  51. if matches := re.match(r'(\d+\.\d+): Epoch \d+ complete', line):
  52. [ts] = matches.groups()
  53. epoch_stats['epoch'] = float(ts)-epoch_start
  54. # If we see Waksman network precompute start/stop times,
  55. # record those
  56. if matches := re.match(r'(\d+\.\d+): (Begin|End) Waksman networks precompute', line):
  57. [ts, be] = matches.groups()
  58. if be == 'Begin':
  59. waksman_precompute_start = float(ts)
  60. elif be == 'End':
  61. epoch_stats['wn'] = float(ts)-waksman_precompute_start
  62. # If we see the number of bytes sent, record that
  63. if matches := re.match(r'bytes_sent = (\d+)', line):
  64. [bs] = matches.groups()
  65. epoch_stats['bytes'] = int(bs)
  66. def stats_string(vals):
  67. """Return a string of the form ',mean,stddev,max' for the given
  68. input values"""
  69. if vals is None or len(vals) == 0:
  70. return ",0,0,0"
  71. if len(vals) == 1:
  72. return f",{vals[0]},0,{vals[0]}"
  73. mean = np.mean(vals)
  74. stddev = np.std(vals)
  75. mx = np.max(vals)
  76. return f",{mean:.3f},{stddev:.3f},{mx:.3f}"
  77. def parse_output_logs(LOGS_FOLDER, experiment_name, generate_csv = False, op_file = None):
  78. params = experiment_name.split('_')
  79. print (params)
  80. n = int(params[0])
  81. M = int(params[1])
  82. t = int(params[2])
  83. b = int(params[3].strip('/'))
  84. expected_real = math.floor(n/M)
  85. stats = {}
  86. for m in range(1,M+1):
  87. logfilename = os.path.join(LOGS_FOLDER, experiment_name, 's'+str(m)+'.log')
  88. f = open(logfilename,'r')
  89. print(logfilename)
  90. line_cnt = 0
  91. while True:
  92. epoch_stats = get_epoch_stats(f, expected_real)
  93. if epoch_stats == 'EOF':
  94. break
  95. if epoch_stats is not None:
  96. for key in epoch_stats:
  97. if key not in stats:
  98. stats[key] = []
  99. stats[key].append(epoch_stats[key])
  100. epochs = int(len(stats['epoch'])/M)
  101. print("Num epochs = %d" % epochs);
  102. op_line = f"{n},{M},{t},{b},{epochs}"
  103. for key in stats_keys:
  104. op_line += stats_string(stats[key] if key in stats else None)
  105. op_line += "\n"
  106. op_file.write(op_line)
  107. if __name__ == "__main__":
  108. if(len(sys.argv)!=3):
  109. print("Incorrect usage!\n")
  110. print("./logs_to_csv.py expects 2 parameters.")
  111. print("Usage: ./logs_to_csv.py <Path to logs folder> <output csv file name>")
  112. exit()
  113. LOGS_FOLDER = sys.argv[1]
  114. OUTPUT_FILE = sys.argv[2]
  115. op_file = open(OUTPUT_FILE, 'w')
  116. op_header = "N,M,T,B,E"
  117. for key in stats_keys:
  118. op_header += f",{key}_mean,{key}_stddev,{key}_max"
  119. op_header += "\n"
  120. op_file.write(op_header)
  121. for exp_name in os.listdir(LOGS_FOLDER):
  122. parse_output_logs(LOGS_FOLDER, exp_name, True, op_file)
  123. op_file.close()