123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149 |
- #!/usr/bin/python3
- import os
- import sys
- import math
- import numpy as np
- import re
- '''
- Produce the output csv given the LOGS_FOLDER with multiple experiment
- log folders within it.
- '''
- # The statistics to report in the output csv file. Each will be output
- # as mean,stddev,max
- stats_keys = ['epoch', 'wn', 'bytes']
- def get_epoch_stats(f, expected_real):
- epoch_stats = {}
- looking_for_epoch_start = True
- epoch_start = None
- num_real = 0
- waksman_precompute_start = None
- while True:
- line = f.readline()
- # If we get to the end of file, indicate that by returning 'EOF'
- if line == "":
- return 'EOF'
- # Look for the start of the next epoch
- if looking_for_epoch_start:
- if matches := re.match(r'(\d+\.\d+): Epoch \d+ start', line):
- [ts] = matches.groups()
- epoch_start = float(ts)
- looking_for_epoch_start = False
- next
- # If we see the start of a new experiment, then the current
- # epoch is interrupted, or we were scanning the trailing bit of
- # the previous experiment
- if re.match(r'Loaded sealed private key', line):
- return None
- # If we see the end of the epoch, check to see if the number of
- # real messages we received was as expected (that is, if we've
- # reached steady state, as opposed to just starting up)
- if re.match(r'(\d+\.\d+): Sleeping for ', line):
- if num_real >= expected_real:
- return epoch_stats
- else:
- return None
- # If we see a report of the number of real and padding messages
- # received, record the number of real messages
- if matches := re.match(r'(\d+) real, \d+ padding', line):
- [rm] = matches.groups()
- num_real = int(rm)
- # If we see the end of an epoch, record the epoch time
- if matches := re.match(r'(\d+\.\d+): Epoch \d+ complete', line):
- [ts] = matches.groups()
- epoch_stats['epoch'] = float(ts)-epoch_start
- # If we see Waksman network precompute start/stop times,
- # record those
- if matches := re.match(r'(\d+\.\d+): (Begin|End) Waksman networks precompute', line):
- [ts, be] = matches.groups()
- if be == 'Begin':
- waksman_precompute_start = float(ts)
- elif be == 'End':
- epoch_stats['wn'] = float(ts)-waksman_precompute_start
- # If we see the number of bytes sent, record that
- if matches := re.match(r'bytes_sent = (\d+)', line):
- [bs] = matches.groups()
- epoch_stats['bytes'] = int(bs)
- def stats_string(vals):
- """Return a string of the form ',mean,stddev,max' for the given
- input values"""
- if vals is None or len(vals) == 0:
- return ",0,0,0"
- if len(vals) == 1:
- return f",{vals[0]},0,{vals[0]}"
- mean = np.mean(vals)
- stddev = np.std(vals)
- mx = np.max(vals)
- return f",{mean:.3f},{stddev:.3f},{mx:.3f}"
- def parse_output_logs(LOGS_FOLDER, experiment_name, generate_csv = False, op_file = None):
- params = experiment_name.split('_')
- print (params)
- n = int(params[0])
- M = int(params[1])
- t = int(params[2])
- b = int(params[3].strip('/'))
- expected_real = math.floor(n/M)
- stats = {}
- for m in range(1,M+1):
- logfilename = os.path.join(LOGS_FOLDER, experiment_name, 's'+str(m)+'.log')
- f = open(logfilename,'r')
- print(logfilename)
- line_cnt = 0
- while True:
- epoch_stats = get_epoch_stats(f, expected_real)
- if epoch_stats == 'EOF':
- break
- if epoch_stats is not None:
- for key in epoch_stats:
- if key not in stats:
- stats[key] = []
- stats[key].append(epoch_stats[key])
- epochs = int(len(stats['epoch'])/M)
- print("Num epochs = %d" % epochs);
- op_line = f"{n},{M},{t},{b},{epochs}"
- for key in stats_keys:
- op_line += stats_string(stats[key] if key in stats else None)
- op_line += "\n"
- op_file.write(op_line)
- if __name__ == "__main__":
- if(len(sys.argv)!=3):
- print("Incorrect usage!\n")
- print("./logs_to_csv.py expects 2 parameters.")
- print("Usage: ./logs_to_csv.py <Path to logs folder> <output csv file name>")
- exit()
- LOGS_FOLDER = sys.argv[1]
- OUTPUT_FILE = sys.argv[2]
- op_file = open(OUTPUT_FILE, 'w')
- op_header = "N,M,T,B,E"
- for key in stats_keys:
- op_header += f",{key}_mean,{key}_stddev,{key}_max"
- op_header += "\n"
- op_file.write(op_header)
- for exp_name in os.listdir(LOGS_FOLDER):
- parse_output_logs(LOGS_FOLDER, exp_name, True, op_file)
- op_file.close()
|