sengler
/
relay-throughput-testing


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
							import sys
import os
import time
#
import numpy as np
import matplotlib.pylab as plt
#
import data_helpers
import log_system_usage
#
if __name__ == '__main__':
	initial_time = time.time()
	#
	title = sys.argv[1]
	(_, throughput_data) = data_helpers.read_relay_throughput(sys.argv[2])
	client_data = data_helpers.read_client_info(sys.argv[3])
	server_data = data_helpers.read_server_results(sys.argv[4])
	try:
		host_system_usage = log_system_usage.load_cpu_stats(sys.argv[5])
		remote_system_usage = log_system_usage.load_cpu_stats(sys.argv[6])
	except FileNotFoundError:
		print('The system usage logs weren\'t found, so skipping them...')
		plot_cpu_usage = False
	else:
		plot_cpu_usage = True
	#
	print('Loaded Files: {}'.format(time.time()-initial_time))
	#
	norm_throughput = data_helpers.normalize_relay_throughput(throughput_data)
	start_time = client_data['start_time']
	last_byte_times = np.array([x['time_of_last_byte'] for x in server_data])
	end_time = np.max(last_byte_times)
	#
	if plot_cpu_usage:
		host_system_usage['timestamps'] = np.array(host_system_usage['timestamps'])
		host_cpu_usage = {int(cpu): np.array(log_system_usage.calculate_cpu_usage_continuous(host_system_usage['stats']['cpus'][cpu])) for cpu in host_system_usage['stats']['cpus']}
		remote_system_usage['timestamps'] = np.array(remote_system_usage['timestamps'])
		remote_cpu_usage = {int(cpu): np.array(log_system_usage.calculate_cpu_usage_continuous(remote_system_usage['stats']['cpus'][cpu])) for cpu in remote_system_usage['stats']['cpus']}
		#
		plot_processes = ('processes' in remote_system_usage)
		plot_processes = False
		#
		if plot_processes:
			target_tor_proc_states = [[y['state'] for y in x if 'target/torrc' in y['args']] for x in remote_system_usage['processes']]
			host_tor_proc_states = [[y['state'] for y in x if '/torrc' in y['args'] and 'target/torrc' not in y['args']] for x in host_system_usage['processes']]
			host_all_proc_states = [[y['state'] for y in x] for x in host_system_usage['processes']]
			#print(remote_system_usage['processes'][700])
			any_host_tor_proc_states_D = ['D' in x for x in host_tor_proc_states]
			any_host_proc_states_D = ['D' in x for x in host_all_proc_states]
			#any_proc_states_D = ['D' in x for x in [[y['state'] for y in z] for z in remote_system_usage['processes']]]
			assert set([len(x) for x in target_tor_proc_states]) == {0,1}
			for x in target_tor_proc_states:
				if len(x) == 0:
					x.append(None)
				#
			#
			print(set([y['args'] for x in host_system_usage['processes'] for y in x if y['state'] == 'D']))
			#
			target_tor_proc_states = [x[0] for x in target_tor_proc_states]
		#
	#
	throughput_start_index = np.argmax(norm_throughput['timestamps'] > start_time)-5
	throughput_end_index = np.argmax(norm_throughput['timestamps'] > end_time)+5
	if plot_cpu_usage:
		host_cpu_start_index = np.argmax(host_system_usage['timestamps'] > start_time)-20
		host_cpu_end_index = np.argmax(host_system_usage['timestamps'] > end_time)+20
		remote_cpu_start_index = np.argmax(remote_system_usage['timestamps'] > start_time)-20
		remote_cpu_end_index = np.argmax(remote_system_usage['timestamps'] > end_time)+20
	#
	#start_time = 0
	#
	print('Processed Data: {}'.format(time.time()-initial_time))
	#
	fig, (ax1, ax2, ax3) = plt.subplots(3, sharex=True, figsize=(20,13))
	fig.suptitle('{}\n\n{}'.format(title, os.path.basename(sys.argv[2])))
	#
	ax1_colors = plt.get_cmap('tab20').colors[0:2]
	for x in range(2):
		ax1.step(norm_throughput['timestamps'][throughput_start_index:throughput_end_index]-start_time,
		         0.5*np.sum(norm_throughput['threads'][throughput_start_index:throughput_end_index,x::2],
		         axis=1)/2**20, where='post', color=ax1_colors[x])
	#for x in range(int(norm_throughput['threads'].shape[1]/2)):
	#	ax1.step(norm_throughput['timestamps'][throughput_start_index:throughput_end_index]-start_time,
	#	         0.5*np.sum(norm_throughput['threads'][throughput_start_index:throughput_end_index,x*2:x*2+2],
	#	         axis=1)/2**20, where='post', color=ax1_colors[0])
	#
	ax1.step(norm_throughput['timestamps'][throughput_start_index:throughput_end_index]-start_time,
	         0.5*np.sum(norm_throughput['threads'][throughput_start_index:throughput_end_index,:],
	         axis=1)/2**20, where='post', color='grey')
	ax1.set_ylabel('Throughput (MiB/s)', color=ax1_colors[0])
	#
	ax1_twin = ax1.twinx()
	ax1_twin_color = plt.get_cmap('tab20').colors[4]
	ax1_twin.plot(np.sort(last_byte_times)-start_time, np.arange(len(last_byte_times)), color=ax1_twin_color)
	#ax1_twin.set_ylim([0, None])
	ax1_twin.set_ylabel('Number of completed streams', color=ax1_twin_color)
	#
	print('Finished plotting ax1: {}'.format(time.time()-initial_time))
	#
	colormap = plt.get_cmap('tab20').colors #'tab10'
	assigned_colors = []
	#
	for transfer in server_data:
		color_selector = transfer['custom_data']['circuit'][1][-1]
		if color_selector in assigned_colors:
			color_index = assigned_colors.index(color_selector)
		else:
			color_index = len(assigned_colors)
			assigned_colors.append(color_selector)
		#
		#bins = np.arange(start_time, transfer['deltas']['timestamps'][-1], 0.1)
		#binned_indexes = np.digitize(transfer['deltas']['timestamps'], bins)
		#binned_deltas = np.zeros(bins.shape)
		#for x in range(len(binned_indexes)):
		#	binned_deltas[binned_indexes[x]-1] += transfer['deltas']['bytes'][x]
		#
		#zeros = (binned_deltas == 0).nonzero()[0]
		#bins = np.delete(bins, zeros)
		#binned_deltas = np.delete(binned_deltas, zeros)
		#ax2.step(bins-start_time, np.cumsum(binned_deltas), color=colormap[color_index%len(colormap)], where='post')
		ax2.step(transfer['deltas']['timestamps']-start_time, np.cumsum(transfer['deltas']['bytes']),
		         color=colormap[color_index%len(colormap)], where='post')
	#
	ax2.set_ylabel('Bytes')
	ax2_twin = ax2.twinx()
	ax2_twin.set_ylim([x/(2**20) for x in ax2.get_ylim()])
	ax2_twin.set_ylabel('MiB')
	#
	print('Finished plotting ax2: {}'.format(time.time()-initial_time))
	#
	if plot_cpu_usage:
		for cpu in remote_cpu_usage:
			ax3.step(remote_system_usage['timestamps'][remote_cpu_start_index:remote_cpu_end_index]-start_time,
			         100*remote_cpu_usage[cpu][remote_cpu_start_index:remote_cpu_end_index],
			         label='CPU {}'.format(cpu))
		#
		if plot_processes:
			vals = list(set(target_tor_proc_states))
			vals.remove(None)
			vals = sorted(vals) + [None]
			print(vals)
			ax3.step(remote_system_usage['timestamps'][remote_cpu_start_index:remote_cpu_end_index]-start_time,
			         [10*vals.index(x)+120 for x in target_tor_proc_states[remote_cpu_start_index:remote_cpu_end_index]])
			ax3.step(host_system_usage['timestamps'][host_cpu_start_index:host_cpu_end_index]-start_time,
			         [int(x)*20+160 for x in any_host_proc_states_D[host_cpu_start_index:host_cpu_end_index]])
		#
	#
	import matplotlib
	ax3.grid()
	ax3.xaxis.set_major_formatter(matplotlib.ticker.FormatStrFormatter('%.3f'))
	ax3.set_xlabel('Time (s)')
	ax3.set_ylabel('Per-Core CPU Usage (%)')
	ax3.legend()
	#
	print('Finished plotting ax3: {}'.format(time.time()-initial_time))
	#
	fig.tight_layout()
	plt.subplots_adjust(top=0.92)
	output_filename = title.lower().replace(' ','-').replace('/','').replace('(','').replace(')','').replace(',','')
	#plt.savefig(os.path.join('/tmp', output_filename))
	plt.show(fig)
#