j3tracey
/
mnettools


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
							import sys
import os
import argparse
import logging

from datetime import datetime
from random import randint
from random import seed as stdseed
from numpy.random import seed as numpyseed
from multiprocessing import cpu_count
from platform import platform, uname

from tornettools.util import which, make_directories
from tornettools._version import __version__

import re
import os
import logging

from itertools import cycle
import matplotlib.pyplot as pyplot
from matplotlib.ticker import FuncFormatter
from matplotlib.backends.backend_pdf import PdfPages

from tornettools.util import load_json_data, find_matching_files_in_dir

from tornettools.plot_common import (DEFAULT_COLORS, DEFAULT_LINESTYLES, draw_cdf, draw_cdf_ci,
                                     draw_line, draw_line_ci, quantile, set_plot_options)
from tornettools.plot_tgen import plot_tgen
from tornettools.plot_oniontrace import plot_oniontrace


HELP_MAIN = """
Use 'tornettools <subcommand> --help' for more info
"""
DESC_MAIN = """
tornettools is a utility to guide you through the Tor network
experimentation process using Shadow. tornettools must be run with a
subcommand to specify a mode of operation.

For more information, see https://github.com/shadow/tornettools.
"""

HELP_STAGE = """
Process Tor metrics data for staging network generation
"""
DESC_STAGE = """
Process Tor network consensuses, relay descriptors, and user files
from Tor metrics to stage TorNet network generation.

This command should be used before running generate. This command
produces staging files that will be required for the generate
command to succeed.
"""

HELP_GENERATE = """
Generate TorNet network configurations
"""
DESC_GENERATE = """
Loads the TorNet staging files produced with the stage command
and uses them to generate a valid TorNet network configuration.

This command should be used after running stage.
"""

HELP_SIMULATE = """
Run a TorNet simulation in Shadow
"""
DESC_SIMULATE = """
Runs a Tor simulation using Shadow and the TorNet network
configurations files generated with the generate command.

This command should be used after running generate.
"""

HELP_PARSE = """
Parse useful data from simulation log files
"""
DESC_PARSE = """
Parses log files created by simulations run with the simulate
command; extracts and stores various useful performance metrics.

This command should be used after running simulate.
"""

HELP_PLOT = """
Plot previously parsed data to visualize results
"""
DESC_PLOT = """
Visualizes various performance metrics that were extracted and
stored with the parse command by producing graphical plots.

This command should be used after running parse.
"""

HELP_ARCHIVE = """
Cleanup and compress Shadow simulation data
"""
DESC_ARCHIVE = """
Prepares a Shadow simulation directory for archival by compressing
simulation output log files and data directories.

This command can be used any time after running simulate, but
ideally after parsing and plotting is also completed.
"""

def __setup_logging_helper(logfilename=None):
    my_handlers = []

    stdout_handler = logging.StreamHandler(sys.stdout)
    my_handlers.append(stdout_handler)

    if logfilename != None:
        make_directories(logfilename)
        file_handler = logging.FileHandler(filename=logfilename)
        my_handlers.append(file_handler)

    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s %(created)f [tornettools] [%(levelname)s] %(message)s',
        datefmt='%Y-%m-%d %H:%M:%S',
        handlers=my_handlers,
    )

    msg = "Logging system initialized! Logging events to stdout"
    if logfilename != None:
        msg += " and to '{}'".format(logfilename)
    logging.info(msg)

def __setup_logging(args):
    if args.quiet <= 1:
        logfilename = None
        if args.quiet == 0 and hasattr(args, 'prefix'):
            # log to a file too
            prefixstr = str(args.prefix)
            funcstr = str(args.command) if args.command is not None else "none"
            datestr = datetime.now().strftime("%Y-%m-%d.%H.%M.%S")
            logfilename = "{}/tornettools.{}.{}.log".format(prefixstr, funcstr, datestr)
        __setup_logging_helper(logfilename)
    else:
        pass # no logging

def run(args):
    logging.info("Plotting simulation results now")
    set_plot_options()

    logging.info("Plotting mgen comparisons")
    __plot_mnet(args)

    logging.info(f"Done plotting! PDF files are saved to {args.prefix}")

def __pattern_for_basename(circuittype, basename):
    s = basename + r'\.' + circuittype + r'\.json'
    if circuittype == 'exit':
        # Data files without a circuittype contain exit circuits (from legacy
        # tornettools runs).
        s = basename + r'(\.' + circuittype + r')?\.json'
    else:
        s = basename + r'\.' + circuittype + r'\.json'
    return re.compile(s)

def __plot_mnet(args):
    args.pdfpages = PdfPages(f"{args.prefix}/tornet.plot.pages.pdf")

    net_scale = __get_simulated_network_scale(args)

    logging.info("Loading mgen rtt_all data")
    dbs = __load_tornet_datasets(args, "rtt_all.mgen.json")
    logging.info("Plotting mgen rtt_all")
    __plot_mgen_rtt_all(args, dbs, net_scale)

    logging.info("Loading mgen rtt_timeout data")
    dbs = __load_tornet_datasets(args, "rtt_timeout.mgen.json")
    logging.info("Plotting mgen rtt_timeout")
    __plot_mgen_rtt_timeout(args, dbs, net_scale)

    logging.info("Loading mgen timeout_by_send data")
    dbs = __load_tornet_datasets(args, "timeout_by_send.mgen.json")
    logging.info("Plotting mgen rtt_by_send")
    __plot_mgen_timeout_by_send(args, dbs, net_scale)

    logging.info("Loading mgen timeout_by_receive data")
    dbs = __load_tornet_datasets(args, "timeout_by_receive.mgen.json")
    logging.info("Plotting mgen rtt_by_receive")
    __plot_mgen_timeout_by_receive(args, dbs, net_scale)

    logging.info("Loading mgen rtt_counts data")
    dbs = __load_tornet_datasets(args, "counts.mgen.json")
    logging.info("Plotting mgen rtt_counts")
    __plot_mgen_count(args, dbs, net_scale)

    args.pdfpages.close()


def __plot_mgen_rtt_all(args, rtt_dbs, net_scale):
    # cache the corresponding data in the 'data' keyword for __plot_cdf_figure
    for rtt_db in rtt_dbs:
        rtt_db['data'] = rtt_db['dataset']
    __plot_cdf_figure(args, rtt_dbs, 'rtt_all.mgen', yscale='taillog',
                      xscale='log',
                      xlabel="Time (s)")

def __plot_mgen_rtt_timeout(args, rtt_dbs, net_scale):
    # cache the corresponding data in the 'data' keyword for __plot_cdf_figure
    for rtt_db in rtt_dbs:
        rtt_db['data'] = rtt_db['dataset']
    __plot_cdf_figure(args, rtt_dbs, 'rtt_timeout.mgen', yscale='taillog',
                      xlabel="Time (s)")


def __plot_mgen_timeout_by_send(args, rtt_dbs, net_scale):
    # cache the corresponding data in the 'data' keyword for __plot_cdf_figure
    for rtt_db in rtt_dbs:
        rtt_db['data'] = rtt_db['dataset']
    __plot_cdf_figure(args, rtt_dbs, 'timeout_by_send.mgen', yscale='taillog',
                      xscale='log',
                      xlabel="Fraction of (user, group)'s expected receipts")

def __plot_mgen_timeout_by_receive(args, rtt_dbs, net_scale):
    # cache the corresponding data in the 'data' keyword for __plot_cdf_figure
    for rtt_db in rtt_dbs:
        rtt_db['data'] = rtt_db['dataset']
    __plot_cdf_figure(args, rtt_dbs, 'timeout_by_receive.mgen', yscale='taillog',
                      xscale='log',
                      xlabel="Fraction of (user, group)'s receipts")


def __plot_mgen_count(args, count_dbs, net_scale):
    # cache the corresponding data in the 'data' keyword for __plot_cdf_figure
    for count_db in count_dbs:
        count_db['data'] = count_db['dataset']
    __plot_cdf_figure(args, count_dbs, 'count.mgen',
                      xlabel="Messages sent per user")

def __plot_cdf_figure(args, dbs, filename, xscale=None, yscale=None, xlabel=None, ylabel="CDF"):
    color_cycle = cycle(DEFAULT_COLORS)
    linestyle_cycle = cycle(DEFAULT_LINESTYLES)

    pyplot.figure()
    lines, labels = [], []

    for db in dbs:
        if 'data' not in db or len(db['data']) < 1:
            continue
        elif len(db['data']) == 1:
            (plot_func, d) = draw_cdf, db['data'][0]
        else:
            (plot_func, d) = draw_cdf_ci, db['data']

        if len(d) < 1:
            continue

        line = plot_func(pyplot, d,
                         yscale=yscale,
                         label=db['label'],
                         color=db['color'] or next(color_cycle),
                         linestyle=next(linestyle_cycle))

        lines.append(line)
        labels.append(db['label'])

    if xscale is not None:
        pyplot.xscale(xscale)
        if xlabel is not None:
            xlabel += __get_scale_suffix(xscale)
    if yscale is not None:
        pyplot.yscale(yscale)
        if ylabel is not None:
            ylabel += __get_scale_suffix(yscale)
    if xlabel is not None:
        pyplot.xlabel(xlabel, fontsize=14)
    if ylabel is not None:
        pyplot.ylabel(ylabel, fontsize=14)

    m = 0.025
    pyplot.margins(m)

    # the plot will exit the visible space at the 99th percentile,
    # so make sure the x-axis is centered correctly
    # (this is usually only a problem if using the 'taillog' yscale)
    x_visible_max = None
    for db in dbs:
        if len(db['data']) >= 1 and len(db['data'][0]) >= 1:
            q = quantile(db['data'][0], 0.99)
            x_visible_max = q if x_visible_max is None else max(x_visible_max, q)
    if x_visible_max is not None:
        pyplot.xlim(xmin=max(0, -m * x_visible_max), xmax=(m + 1) * x_visible_max)

    __plot_finish(args, lines, labels, filename)

def __plot_finish(args, lines, labels, filename):
    pyplot.tick_params(axis='y', which='major', labelsize=12)
    pyplot.tick_params(axis='x', which='major', labelsize=14)
    pyplot.tick_params(axis='both', which='minor', labelsize=8)
    pyplot.grid(True, axis='both', which='minor', color='0.1', linestyle=':', linewidth='0.5')
    pyplot.grid(True, axis='both', which='major', color='0.1', linestyle=':', linewidth='1.0')

    pyplot.legend(lines, labels, loc='lower right', fontsize=14)
    pyplot.tight_layout(pad=0.3)
    pyplot.savefig(f"{args.prefix}/{filename}.{'png' if args.plot_pngs else 'pdf'}")
    args.pdfpages.savefig()

def __get_scale_suffix(scale):
    if scale == 'taillog':
        return " (tail log scale)"
    elif scale == 'log':
        return " (log scale)"
    else:
        return ""

def __time_format_func(x, pos):
    hours = int(x // 3600)
    minutes = int((x % 3600) // 60)
    seconds = int(x % 60)
    return "{:d}:{:02d}:{:02d}".format(hours, minutes, seconds)

def __load_tornet_datasets(args, filepattern):
    tornet_dbs = []

    print(args.labels)
    label_cycle = cycle(args.labels) if args.labels is not None else None
    color_cycle = cycle(args.colors) if args.colors is not None else None

    if args.tornet_collection_path is not None:
        for collection_dir in args.tornet_collection_path:
            tornet_db = {
                'dataset': [load_json_data(p) for p in find_matching_files_in_dir(collection_dir, filepattern)],
                'label': next(label_cycle) if label_cycle is not None else os.path.basename(collection_dir),
                'color': next(color_cycle) if color_cycle is not None else None,
            }
            tornet_dbs.append(tornet_db)

    return tornet_dbs

def __load_torperf_datasets(torperf_argset):
    torperf_dbs = []

    if torperf_argset is not None:
        for torperf_args in torperf_argset:
            torperf_db = {
                'dataset': load_json_data(torperf_args[0]) if torperf_args[0] is not None else None,
                'label': torperf_args[1] if torperf_args[1] is not None else "Public Tor",
                'color': torperf_args[2],
            }
            torperf_dbs.append(torperf_db)

    return torperf_dbs

def __get_simulated_network_scale(args):
    sim_info = __load_tornet_datasets(args, "simulation_info.json")

    net_scale = 0.0
    for db in sim_info:
        for i, d in enumerate(db['dataset']):
            if 'net_scale' in d:
                if net_scale == 0.0:
                    net_scale = float(d['net_scale'])
                    logging.info(f"Found simulated network scale {net_scale}")
                else:
                    if float(d['net_scale']) != net_scale:
                        logging.warning("Some of your tornet data is from networks of different scale")
                        logging.critical(f"Found network scales {net_scale} and {float(d['net_scale'])} and they don't match")

    return net_scale

def __compute_torperf_error_rates(daily_counts):
    err_rates = []
    for day in daily_counts:
        total = int(daily_counts[day]['requests'])
        if total <= 0:
            continue

        timeouts = int(daily_counts[day]['timeouts'])
        failures = int(daily_counts[day]['failures'])

        err_rates.append((timeouts + failures) / float(total) * 100.0)
    return err_rates


def main():
    my_formatter_class = CustomHelpFormatter

    # construct the options
    main_parser = argparse.ArgumentParser(description=DESC_MAIN, formatter_class=my_formatter_class)

    main_parser.add_argument('-v', '--version',
        help="""Prints the version of the toolkit and exits.""",
        action="store_true", dest="do_version",
        default=False)

    main_parser.add_argument('-q', '--quiet',
        help="""Do not write log messages to file. Use twice to also not write to stdout.""",
        action="count", dest="quiet",
        default=0)

    main_parser.add_argument('-s', '--seed',
        help="""Initialize tornettools' PRNGs with a seed to allow for
            deterministic behavior. This does not affect the seed for the Shadow
            simulation.""",
        action="store", type=int, dest="seed", metavar="N",
        default=None)

    sub_parser = main_parser.add_subparsers(help=HELP_MAIN, dest='command')

    plot_parser = sub_parser.add_parser('plot',
        description=DESC_PLOT,
        help=HELP_PLOT,
        formatter_class=my_formatter_class)
    plot_parser.set_defaults(func=run, formatter_class=my_formatter_class)

    plot_parser.add_argument('tornet_collection_path',
        help="""Path to a directory containing one or more subdirectories of parsed
            tornet results from the 'parse' command. Confidence intervals are drawn
            when this path contains plot data from multiple simulations.""",
        action='store',
        type=__type_str_dir_path_in,
        nargs='+')

    plot_parser.add_argument('-t', '--tor_metrics_path',
        help="""Path to a tor_metrics.json file that was created by the 'stage' command,
            which we be compared against the tornet collections. The label and color
            to use in the graphs that we create are optional.""",
        action=PathStringArgsAction,
        nargs='+',
        metavar="PATH [LABEL [COLOR]]")

    plot_parser.add_argument('--prefix',
        help="""A directory PATH prefix where the graphs generated by this script
            will be written.""",
        action="store",
        type=__type_str_dir_path_out,
        dest="prefix",
        default=os.getcwd(),
        metavar="PATH")

    plot_parser.add_argument('-l', '--labels',
        help="""Labels for the tornet collections to be used in the graph legends.""",
        action='store',
        type=str,
        dest="labels",
        nargs='+',
        metavar='LABEL')

    plot_parser.add_argument('-c', '--colors',
        help="""Colors for the tornet collections to be used in the graph plots.""",
        action='store',
        type=str,
        dest="colors",
        nargs='+',
        metavar='COLOR')

    plot_parser.add_argument('-a', '--all',
        help="""Also generate individual tgentools and oniontracetools plots for each simulation.""",
        action="store_true",
        dest="plot_all",
        default=False)

    plot_parser.add_argument('--pngs',
        help="""Save individual plot images in png instead of pdf format.""",
        action="store_true",
        dest="plot_pngs",
        default=False)

    # get args and call the command handler for the chosen mode
    args = main_parser.parse_args()

    if not hasattr(args, "prefix") and hasattr(args, "tornet_config_path"):
        args.prefix = args.tornet_config_path
    if hasattr(args, "nprocesses"):
        args.nprocesses = args.nprocesses if args.nprocesses > 0 else cpu_count()

    # check if it's just a version check and we should short circuit
    if args.do_version:
        __setup_logging(args)
        logging.info("tornettools version {}".format(__version__))
        return

    # if it's anything other than version, we need a subcommand
    if args.command == None:
        main_parser.print_usage()
        return

    # now we know we can start
    __setup_logging(args)

    # seed the pseudo-random generators
    # if we don't have a seed, choose one and make sure we log it for reproducibility
    if args.seed == None:
        args.seed = randint(0, 2**31)
    stdseed(args.seed)
    numpyseed(args.seed)
    logging.info("Seeded standard and numpy PRNGs with seed={}".format(args.seed))

    logging.info("The argument namespace is: {}".format(str(args)))
    logging.info("The platform is: {}".format(str(platform())))
    logging.info("System info: {}".format(str(uname())))

    # now run the configured mode
    rv = run(args)

    if rv == 0 or rv == None:
        return 0
    elif isinstance(rv, int):
        return rv
    else:
        logging.warning(f"Unknown return value: {rv}")
        return 1


def __type_nonnegative_integer(value):
    i = int(value)
    if i < 0:
        raise argparse.ArgumentTypeError("'%s' is an invalid non-negative int value" % value)
    return i

def __type_nonnegative_float(value):
    i = float(value)
    if i < 0.0:
        raise argparse.ArgumentTypeError("'%s' is an invalid non-negative flat value" % value)
    return i

def __type_fractional_float(value):
    i = float(value)
    if i <= 0.0 or i > 1.0:
        raise argparse.ArgumentTypeError("'%s' is an invalid fractional float value" % value)
    return i

def __type_str_file_path_out(value):
    s = str(value)
    if s == "-":
        return s
    p = os.path.abspath(os.path.expanduser(s))
    make_directories(p)
    return p

def __type_str_dir_path_out(value):
    s = str(value)
    p = os.path.abspath(os.path.expanduser(s))
    make_directories(p)
    return p

def __type_str_file_path_in(value):
    s = str(value)
    if s == "-":
        return s
    p = os.path.abspath(os.path.expanduser(s))
    if not os.path.exists(p):
        raise argparse.ArgumentTypeError(f"Path does not exist: {p}")
    elif not os.path.isfile(p):
        raise argparse.ArgumentTypeError(f"Path is not a file: {p}")
    return p

def __type_str_dir_path_in(value):
    s = str(value)
    p = os.path.abspath(os.path.expanduser(s))
    if not os.path.exists(p):
        raise argparse.ArgumentTypeError(f"Path does not exist: {p}")
    elif not os.path.isdir(p):
        raise argparse.ArgumentTypeError(f"Path is not a directory: {p}")
    return p

def type_str_file_path_in(p):
    return __type_str_file_path_in(p)

# adds the 'RawDescriptionHelpFormatter' to the ArgsDefault one
class CustomHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
    def _fill_text(self, text, width, indent):
        return ''.join([indent + line for line in text.splitlines(True)])

# a custom action for passing in experimental data directories when plotting
class PathStringArgsAction(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=None):
        if len(values) == 0:
            raise argparse.ArgumentError(self, "A path is required.")
        elif len(values) > 3:
            raise argparse.ArgumentError(self, "Must specify 3 or fewer strings.")

        # get the values
        path = values[0]
        label = values[1] if len(values) > 1 else None
        color = values[2] if len(values) > 2 else None

        # extract and validate the path
        path = type_str_file_path_in(path)

        # remove the default
        if "_didremovedefault" not in namespace:
            setattr(namespace, self.dest, [])
            setattr(namespace, "_didremovedefault", True)

        # append our new arg set
        dest = getattr(namespace, self.dest)
        dest.append([path, label, color])

if __name__ == '__main__':
    sys.exit(main())