123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597 |
- import sys
- import os
- import argparse
- import logging
- from datetime import datetime
- from random import randint
- from random import seed as stdseed
- from numpy.random import seed as numpyseed
- from multiprocessing import cpu_count
- from platform import platform, uname
- from tornettools.util import which, make_directories
- from tornettools._version import __version__
- import re
- import os
- import logging
- from itertools import cycle
- import matplotlib.pyplot as pyplot
- from matplotlib.ticker import FuncFormatter
- from matplotlib.backends.backend_pdf import PdfPages
- from tornettools.util import load_json_data, find_matching_files_in_dir
- from tornettools.plot_common import (DEFAULT_COLORS, DEFAULT_LINESTYLES, draw_cdf, draw_cdf_ci,
- draw_line, draw_line_ci, quantile, set_plot_options)
- from tornettools.plot_tgen import plot_tgen
- from tornettools.plot_oniontrace import plot_oniontrace
- HELP_MAIN = """
- Use 'tornettools <subcommand> --help' for more info
- """
- DESC_MAIN = """
- tornettools is a utility to guide you through the Tor network
- experimentation process using Shadow. tornettools must be run with a
- subcommand to specify a mode of operation.
- For more information, see https://github.com/shadow/tornettools.
- """
- HELP_STAGE = """
- Process Tor metrics data for staging network generation
- """
- DESC_STAGE = """
- Process Tor network consensuses, relay descriptors, and user files
- from Tor metrics to stage TorNet network generation.
- This command should be used before running generate. This command
- produces staging files that will be required for the generate
- command to succeed.
- """
- HELP_GENERATE = """
- Generate TorNet network configurations
- """
- DESC_GENERATE = """
- Loads the TorNet staging files produced with the stage command
- and uses them to generate a valid TorNet network configuration.
- This command should be used after running stage.
- """
- HELP_SIMULATE = """
- Run a TorNet simulation in Shadow
- """
- DESC_SIMULATE = """
- Runs a Tor simulation using Shadow and the TorNet network
- configurations files generated with the generate command.
- This command should be used after running generate.
- """
- HELP_PARSE = """
- Parse useful data from simulation log files
- """
- DESC_PARSE = """
- Parses log files created by simulations run with the simulate
- command; extracts and stores various useful performance metrics.
- This command should be used after running simulate.
- """
- HELP_PLOT = """
- Plot previously parsed data to visualize results
- """
- DESC_PLOT = """
- Visualizes various performance metrics that were extracted and
- stored with the parse command by producing graphical plots.
- This command should be used after running parse.
- """
- HELP_ARCHIVE = """
- Cleanup and compress Shadow simulation data
- """
- DESC_ARCHIVE = """
- Prepares a Shadow simulation directory for archival by compressing
- simulation output log files and data directories.
- This command can be used any time after running simulate, but
- ideally after parsing and plotting is also completed.
- """
- def __setup_logging_helper(logfilename=None):
- my_handlers = []
- stdout_handler = logging.StreamHandler(sys.stdout)
- my_handlers.append(stdout_handler)
- if logfilename != None:
- make_directories(logfilename)
- file_handler = logging.FileHandler(filename=logfilename)
- my_handlers.append(file_handler)
- logging.basicConfig(
- level=logging.INFO,
- format='%(asctime)s %(created)f [tornettools] [%(levelname)s] %(message)s',
- datefmt='%Y-%m-%d %H:%M:%S',
- handlers=my_handlers,
- )
- msg = "Logging system initialized! Logging events to stdout"
- if logfilename != None:
- msg += " and to '{}'".format(logfilename)
- logging.info(msg)
- def __setup_logging(args):
- if args.quiet <= 1:
- logfilename = None
- if args.quiet == 0 and hasattr(args, 'prefix'):
- # log to a file too
- prefixstr = str(args.prefix)
- funcstr = str(args.command) if args.command is not None else "none"
- datestr = datetime.now().strftime("%Y-%m-%d.%H.%M.%S")
- logfilename = "{}/tornettools.{}.{}.log".format(prefixstr, funcstr, datestr)
- __setup_logging_helper(logfilename)
- else:
- pass # no logging
- def run(args):
- logging.info("Plotting simulation results now")
- set_plot_options()
- logging.info("Plotting mgen comparisons")
- __plot_mnet(args)
- logging.info(f"Done plotting! PDF files are saved to {args.prefix}")
- def __pattern_for_basename(circuittype, basename):
- s = basename + r'\.' + circuittype + r'\.json'
- if circuittype == 'exit':
- # Data files without a circuittype contain exit circuits (from legacy
- # tornettools runs).
- s = basename + r'(\.' + circuittype + r')?\.json'
- else:
- s = basename + r'\.' + circuittype + r'\.json'
- return re.compile(s)
- def __plot_mnet(args):
- args.pdfpages = PdfPages(f"{args.prefix}/tornet.plot.pages.pdf")
- net_scale = __get_simulated_network_scale(args)
- logging.info("Loading mgen rtt_all data")
- dbs = __load_tornet_datasets(args, "rtt_all.mgen.json")
- logging.info("Plotting mgen rtt_all")
- __plot_mgen_rtt_all(args, dbs, net_scale)
- logging.info("Loading mgen rtt_timeout data")
- dbs = __load_tornet_datasets(args, "rtt_timeout.mgen.json")
- logging.info("Plotting mgen rtt_timeout")
- __plot_mgen_rtt_timeout(args, dbs, net_scale)
- logging.info("Loading mgen timeout_by_send data")
- dbs = __load_tornet_datasets(args, "timeout_by_send.mgen.json")
- logging.info("Plotting mgen rtt_by_send")
- __plot_mgen_timeout_by_send(args, dbs, net_scale)
- logging.info("Loading mgen timeout_by_receive data")
- dbs = __load_tornet_datasets(args, "timeout_by_receive.mgen.json")
- logging.info("Plotting mgen rtt_by_receive")
- __plot_mgen_timeout_by_receive(args, dbs, net_scale)
- logging.info("Loading mgen rtt_counts data")
- dbs = __load_tornet_datasets(args, "counts.mgen.json")
- logging.info("Plotting mgen rtt_counts")
- __plot_mgen_count(args, dbs, net_scale)
- args.pdfpages.close()
- def __plot_mgen_rtt_all(args, rtt_dbs, net_scale):
- # cache the corresponding data in the 'data' keyword for __plot_cdf_figure
- for rtt_db in rtt_dbs:
- rtt_db['data'] = rtt_db['dataset']
- __plot_cdf_figure(args, rtt_dbs, 'rtt_all.mgen', yscale='taillog',
- xscale='log',
- xlabel="Time (s)")
- def __plot_mgen_rtt_timeout(args, rtt_dbs, net_scale):
- # cache the corresponding data in the 'data' keyword for __plot_cdf_figure
- for rtt_db in rtt_dbs:
- rtt_db['data'] = rtt_db['dataset']
- __plot_cdf_figure(args, rtt_dbs, 'rtt_timeout.mgen', yscale='taillog',
- xlabel="Time (s)")
- def __plot_mgen_timeout_by_send(args, rtt_dbs, net_scale):
- # cache the corresponding data in the 'data' keyword for __plot_cdf_figure
- for rtt_db in rtt_dbs:
- rtt_db['data'] = rtt_db['dataset']
- __plot_cdf_figure(args, rtt_dbs, 'timeout_by_send.mgen', yscale='taillog',
- xscale='log',
- xlabel="Fraction of (user, group)'s expected receipts")
- def __plot_mgen_timeout_by_receive(args, rtt_dbs, net_scale):
- # cache the corresponding data in the 'data' keyword for __plot_cdf_figure
- for rtt_db in rtt_dbs:
- rtt_db['data'] = rtt_db['dataset']
- __plot_cdf_figure(args, rtt_dbs, 'timeout_by_receive.mgen', yscale='taillog',
- xscale='log',
- xlabel="Fraction of (user, group)'s receipts")
- def __plot_mgen_count(args, count_dbs, net_scale):
- # cache the corresponding data in the 'data' keyword for __plot_cdf_figure
- for count_db in count_dbs:
- count_db['data'] = count_db['dataset']
- __plot_cdf_figure(args, count_dbs, 'count.mgen',
- xlabel="Messages sent per user")
- def __plot_cdf_figure(args, dbs, filename, xscale=None, yscale=None, xlabel=None, ylabel="CDF"):
- color_cycle = cycle(DEFAULT_COLORS)
- linestyle_cycle = cycle(DEFAULT_LINESTYLES)
- pyplot.figure()
- lines, labels = [], []
- for db in dbs:
- if 'data' not in db or len(db['data']) < 1:
- continue
- elif len(db['data']) == 1:
- (plot_func, d) = draw_cdf, db['data'][0]
- else:
- (plot_func, d) = draw_cdf_ci, db['data']
- if len(d) < 1:
- continue
- line = plot_func(pyplot, d,
- yscale=yscale,
- label=db['label'],
- color=db['color'] or next(color_cycle),
- linestyle=next(linestyle_cycle))
- lines.append(line)
- labels.append(db['label'])
- if xscale is not None:
- pyplot.xscale(xscale)
- if xlabel is not None:
- xlabel += __get_scale_suffix(xscale)
- if yscale is not None:
- pyplot.yscale(yscale)
- if ylabel is not None:
- ylabel += __get_scale_suffix(yscale)
- if xlabel is not None:
- pyplot.xlabel(xlabel, fontsize=14)
- if ylabel is not None:
- pyplot.ylabel(ylabel, fontsize=14)
- m = 0.025
- pyplot.margins(m)
- # the plot will exit the visible space at the 99th percentile,
- # so make sure the x-axis is centered correctly
- # (this is usually only a problem if using the 'taillog' yscale)
- x_visible_max = None
- for db in dbs:
- if len(db['data']) >= 1 and len(db['data'][0]) >= 1:
- q = quantile(db['data'][0], 0.99)
- x_visible_max = q if x_visible_max is None else max(x_visible_max, q)
- if x_visible_max is not None:
- pyplot.xlim(xmin=max(0, -m * x_visible_max), xmax=(m + 1) * x_visible_max)
- __plot_finish(args, lines, labels, filename)
- def __plot_finish(args, lines, labels, filename):
- pyplot.tick_params(axis='y', which='major', labelsize=12)
- pyplot.tick_params(axis='x', which='major', labelsize=14)
- pyplot.tick_params(axis='both', which='minor', labelsize=8)
- pyplot.grid(True, axis='both', which='minor', color='0.1', linestyle=':', linewidth='0.5')
- pyplot.grid(True, axis='both', which='major', color='0.1', linestyle=':', linewidth='1.0')
- pyplot.legend(lines, labels, loc='lower right', fontsize=14)
- pyplot.tight_layout(pad=0.3)
- pyplot.savefig(f"{args.prefix}/{filename}.{'png' if args.plot_pngs else 'pdf'}")
- args.pdfpages.savefig()
- def __get_scale_suffix(scale):
- if scale == 'taillog':
- return " (tail log scale)"
- elif scale == 'log':
- return " (log scale)"
- else:
- return ""
- def __time_format_func(x, pos):
- hours = int(x // 3600)
- minutes = int((x % 3600) // 60)
- seconds = int(x % 60)
- return "{:d}:{:02d}:{:02d}".format(hours, minutes, seconds)
- def __load_tornet_datasets(args, filepattern):
- tornet_dbs = []
- print(args.labels)
- label_cycle = cycle(args.labels) if args.labels is not None else None
- color_cycle = cycle(args.colors) if args.colors is not None else None
- if args.tornet_collection_path is not None:
- for collection_dir in args.tornet_collection_path:
- tornet_db = {
- 'dataset': [load_json_data(p) for p in find_matching_files_in_dir(collection_dir, filepattern)],
- 'label': next(label_cycle) if label_cycle is not None else os.path.basename(collection_dir),
- 'color': next(color_cycle) if color_cycle is not None else None,
- }
- tornet_dbs.append(tornet_db)
- return tornet_dbs
- def __load_torperf_datasets(torperf_argset):
- torperf_dbs = []
- if torperf_argset is not None:
- for torperf_args in torperf_argset:
- torperf_db = {
- 'dataset': load_json_data(torperf_args[0]) if torperf_args[0] is not None else None,
- 'label': torperf_args[1] if torperf_args[1] is not None else "Public Tor",
- 'color': torperf_args[2],
- }
- torperf_dbs.append(torperf_db)
- return torperf_dbs
- def __get_simulated_network_scale(args):
- sim_info = __load_tornet_datasets(args, "simulation_info.json")
- net_scale = 0.0
- for db in sim_info:
- for i, d in enumerate(db['dataset']):
- if 'net_scale' in d:
- if net_scale == 0.0:
- net_scale = float(d['net_scale'])
- logging.info(f"Found simulated network scale {net_scale}")
- else:
- if float(d['net_scale']) != net_scale:
- logging.warning("Some of your tornet data is from networks of different scale")
- logging.critical(f"Found network scales {net_scale} and {float(d['net_scale'])} and they don't match")
- return net_scale
- def __compute_torperf_error_rates(daily_counts):
- err_rates = []
- for day in daily_counts:
- total = int(daily_counts[day]['requests'])
- if total <= 0:
- continue
- timeouts = int(daily_counts[day]['timeouts'])
- failures = int(daily_counts[day]['failures'])
- err_rates.append((timeouts + failures) / float(total) * 100.0)
- return err_rates
- def main():
- my_formatter_class = CustomHelpFormatter
- # construct the options
- main_parser = argparse.ArgumentParser(description=DESC_MAIN, formatter_class=my_formatter_class)
- main_parser.add_argument('-v', '--version',
- help="""Prints the version of the toolkit and exits.""",
- action="store_true", dest="do_version",
- default=False)
- main_parser.add_argument('-q', '--quiet',
- help="""Do not write log messages to file. Use twice to also not write to stdout.""",
- action="count", dest="quiet",
- default=0)
- main_parser.add_argument('-s', '--seed',
- help="""Initialize tornettools' PRNGs with a seed to allow for
- deterministic behavior. This does not affect the seed for the Shadow
- simulation.""",
- action="store", type=int, dest="seed", metavar="N",
- default=None)
- sub_parser = main_parser.add_subparsers(help=HELP_MAIN, dest='command')
- plot_parser = sub_parser.add_parser('plot',
- description=DESC_PLOT,
- help=HELP_PLOT,
- formatter_class=my_formatter_class)
- plot_parser.set_defaults(func=run, formatter_class=my_formatter_class)
- plot_parser.add_argument('tornet_collection_path',
- help="""Path to a directory containing one or more subdirectories of parsed
- tornet results from the 'parse' command. Confidence intervals are drawn
- when this path contains plot data from multiple simulations.""",
- action='store',
- type=__type_str_dir_path_in,
- nargs='+')
- plot_parser.add_argument('-t', '--tor_metrics_path',
- help="""Path to a tor_metrics.json file that was created by the 'stage' command,
- which we be compared against the tornet collections. The label and color
- to use in the graphs that we create are optional.""",
- action=PathStringArgsAction,
- nargs='+',
- metavar="PATH [LABEL [COLOR]]")
- plot_parser.add_argument('--prefix',
- help="""A directory PATH prefix where the graphs generated by this script
- will be written.""",
- action="store",
- type=__type_str_dir_path_out,
- dest="prefix",
- default=os.getcwd(),
- metavar="PATH")
- plot_parser.add_argument('-l', '--labels',
- help="""Labels for the tornet collections to be used in the graph legends.""",
- action='store',
- type=str,
- dest="labels",
- nargs='+',
- metavar='LABEL')
- plot_parser.add_argument('-c', '--colors',
- help="""Colors for the tornet collections to be used in the graph plots.""",
- action='store',
- type=str,
- dest="colors",
- nargs='+',
- metavar='COLOR')
- plot_parser.add_argument('-a', '--all',
- help="""Also generate individual tgentools and oniontracetools plots for each simulation.""",
- action="store_true",
- dest="plot_all",
- default=False)
- plot_parser.add_argument('--pngs',
- help="""Save individual plot images in png instead of pdf format.""",
- action="store_true",
- dest="plot_pngs",
- default=False)
- # get args and call the command handler for the chosen mode
- args = main_parser.parse_args()
- if not hasattr(args, "prefix") and hasattr(args, "tornet_config_path"):
- args.prefix = args.tornet_config_path
- if hasattr(args, "nprocesses"):
- args.nprocesses = args.nprocesses if args.nprocesses > 0 else cpu_count()
- # check if it's just a version check and we should short circuit
- if args.do_version:
- __setup_logging(args)
- logging.info("tornettools version {}".format(__version__))
- return
- # if it's anything other than version, we need a subcommand
- if args.command == None:
- main_parser.print_usage()
- return
- # now we know we can start
- __setup_logging(args)
- # seed the pseudo-random generators
- # if we don't have a seed, choose one and make sure we log it for reproducibility
- if args.seed == None:
- args.seed = randint(0, 2**31)
- stdseed(args.seed)
- numpyseed(args.seed)
- logging.info("Seeded standard and numpy PRNGs with seed={}".format(args.seed))
- logging.info("The argument namespace is: {}".format(str(args)))
- logging.info("The platform is: {}".format(str(platform())))
- logging.info("System info: {}".format(str(uname())))
- # now run the configured mode
- rv = run(args)
- if rv == 0 or rv == None:
- return 0
- elif isinstance(rv, int):
- return rv
- else:
- logging.warning(f"Unknown return value: {rv}")
- return 1
- def __type_nonnegative_integer(value):
- i = int(value)
- if i < 0:
- raise argparse.ArgumentTypeError("'%s' is an invalid non-negative int value" % value)
- return i
- def __type_nonnegative_float(value):
- i = float(value)
- if i < 0.0:
- raise argparse.ArgumentTypeError("'%s' is an invalid non-negative flat value" % value)
- return i
- def __type_fractional_float(value):
- i = float(value)
- if i <= 0.0 or i > 1.0:
- raise argparse.ArgumentTypeError("'%s' is an invalid fractional float value" % value)
- return i
- def __type_str_file_path_out(value):
- s = str(value)
- if s == "-":
- return s
- p = os.path.abspath(os.path.expanduser(s))
- make_directories(p)
- return p
- def __type_str_dir_path_out(value):
- s = str(value)
- p = os.path.abspath(os.path.expanduser(s))
- make_directories(p)
- return p
- def __type_str_file_path_in(value):
- s = str(value)
- if s == "-":
- return s
- p = os.path.abspath(os.path.expanduser(s))
- if not os.path.exists(p):
- raise argparse.ArgumentTypeError(f"Path does not exist: {p}")
- elif not os.path.isfile(p):
- raise argparse.ArgumentTypeError(f"Path is not a file: {p}")
- return p
- def __type_str_dir_path_in(value):
- s = str(value)
- p = os.path.abspath(os.path.expanduser(s))
- if not os.path.exists(p):
- raise argparse.ArgumentTypeError(f"Path does not exist: {p}")
- elif not os.path.isdir(p):
- raise argparse.ArgumentTypeError(f"Path is not a directory: {p}")
- return p
- def type_str_file_path_in(p):
- return __type_str_file_path_in(p)
- # adds the 'RawDescriptionHelpFormatter' to the ArgsDefault one
- class CustomHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
- def _fill_text(self, text, width, indent):
- return ''.join([indent + line for line in text.splitlines(True)])
- # a custom action for passing in experimental data directories when plotting
- class PathStringArgsAction(argparse.Action):
- def __call__(self, parser, namespace, values, option_string=None):
- if len(values) == 0:
- raise argparse.ArgumentError(self, "A path is required.")
- elif len(values) > 3:
- raise argparse.ArgumentError(self, "Must specify 3 or fewer strings.")
- # get the values
- path = values[0]
- label = values[1] if len(values) > 1 else None
- color = values[2] if len(values) > 2 else None
- # extract and validate the path
- path = type_str_file_path_in(path)
- # remove the default
- if "_didremovedefault" not in namespace:
- setattr(namespace, self.dest, [])
- setattr(namespace, "_didremovedefault", True)
- # append our new arg set
- dest = getattr(namespace, self.dest)
- dest.append([path, label, color])
- if __name__ == '__main__':
- sys.exit(main())
|