plot_mgen.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. import sys
  2. import os
  3. import argparse
  4. import logging
  5. from datetime import datetime
  6. from random import randint
  7. from random import seed as stdseed
  8. from numpy.random import seed as numpyseed
  9. from multiprocessing import cpu_count
  10. from platform import platform, uname
  11. from tornettools.util import which, make_directories
  12. from tornettools._version import __version__
  13. import re
  14. import os
  15. import logging
  16. from itertools import cycle
  17. import matplotlib.pyplot as pyplot
  18. from matplotlib.ticker import FuncFormatter
  19. from matplotlib.backends.backend_pdf import PdfPages
  20. from tornettools.util import load_json_data, find_matching_files_in_dir
  21. from tornettools.plot_common import (DEFAULT_COLORS, DEFAULT_LINESTYLES, draw_cdf, draw_cdf_ci,
  22. draw_line, draw_line_ci, quantile, set_plot_options)
  23. from tornettools.plot_tgen import plot_tgen
  24. from tornettools.plot_oniontrace import plot_oniontrace
  25. HELP_MAIN = """
  26. Use 'tornettools <subcommand> --help' for more info
  27. """
  28. DESC_MAIN = """
  29. tornettools is a utility to guide you through the Tor network
  30. experimentation process using Shadow. tornettools must be run with a
  31. subcommand to specify a mode of operation.
  32. For more information, see https://github.com/shadow/tornettools.
  33. """
  34. HELP_STAGE = """
  35. Process Tor metrics data for staging network generation
  36. """
  37. DESC_STAGE = """
  38. Process Tor network consensuses, relay descriptors, and user files
  39. from Tor metrics to stage TorNet network generation.
  40. This command should be used before running generate. This command
  41. produces staging files that will be required for the generate
  42. command to succeed.
  43. """
  44. HELP_GENERATE = """
  45. Generate TorNet network configurations
  46. """
  47. DESC_GENERATE = """
  48. Loads the TorNet staging files produced with the stage command
  49. and uses them to generate a valid TorNet network configuration.
  50. This command should be used after running stage.
  51. """
  52. HELP_SIMULATE = """
  53. Run a TorNet simulation in Shadow
  54. """
  55. DESC_SIMULATE = """
  56. Runs a Tor simulation using Shadow and the TorNet network
  57. configurations files generated with the generate command.
  58. This command should be used after running generate.
  59. """
  60. HELP_PARSE = """
  61. Parse useful data from simulation log files
  62. """
  63. DESC_PARSE = """
  64. Parses log files created by simulations run with the simulate
  65. command; extracts and stores various useful performance metrics.
  66. This command should be used after running simulate.
  67. """
  68. HELP_PLOT = """
  69. Plot previously parsed data to visualize results
  70. """
  71. DESC_PLOT = """
  72. Visualizes various performance metrics that were extracted and
  73. stored with the parse command by producing graphical plots.
  74. This command should be used after running parse.
  75. """
  76. HELP_ARCHIVE = """
  77. Cleanup and compress Shadow simulation data
  78. """
  79. DESC_ARCHIVE = """
  80. Prepares a Shadow simulation directory for archival by compressing
  81. simulation output log files and data directories.
  82. This command can be used any time after running simulate, but
  83. ideally after parsing and plotting is also completed.
  84. """
  85. def __setup_logging_helper(logfilename=None):
  86. my_handlers = []
  87. stdout_handler = logging.StreamHandler(sys.stdout)
  88. my_handlers.append(stdout_handler)
  89. if logfilename != None:
  90. make_directories(logfilename)
  91. file_handler = logging.FileHandler(filename=logfilename)
  92. my_handlers.append(file_handler)
  93. logging.basicConfig(
  94. level=logging.INFO,
  95. format='%(asctime)s %(created)f [tornettools] [%(levelname)s] %(message)s',
  96. datefmt='%Y-%m-%d %H:%M:%S',
  97. handlers=my_handlers,
  98. )
  99. msg = "Logging system initialized! Logging events to stdout"
  100. if logfilename != None:
  101. msg += " and to '{}'".format(logfilename)
  102. logging.info(msg)
  103. def __setup_logging(args):
  104. if args.quiet <= 1:
  105. logfilename = None
  106. if args.quiet == 0 and hasattr(args, 'prefix'):
  107. # log to a file too
  108. prefixstr = str(args.prefix)
  109. funcstr = str(args.command) if args.command is not None else "none"
  110. datestr = datetime.now().strftime("%Y-%m-%d.%H.%M.%S")
  111. logfilename = "{}/tornettools.{}.{}.log".format(prefixstr, funcstr, datestr)
  112. __setup_logging_helper(logfilename)
  113. else:
  114. pass # no logging
  115. def run(args):
  116. logging.info("Plotting simulation results now")
  117. set_plot_options()
  118. logging.info("Plotting mgen comparisons")
  119. __plot_mnet(args)
  120. logging.info(f"Done plotting! PDF files are saved to {args.prefix}")
  121. def __pattern_for_basename(circuittype, basename):
  122. s = basename + r'\.' + circuittype + r'\.json'
  123. if circuittype == 'exit':
  124. # Data files without a circuittype contain exit circuits (from legacy
  125. # tornettools runs).
  126. s = basename + r'(\.' + circuittype + r')?\.json'
  127. else:
  128. s = basename + r'\.' + circuittype + r'\.json'
  129. return re.compile(s)
  130. def __plot_mnet(args):
  131. args.pdfpages = PdfPages(f"{args.prefix}/tornet.plot.pages.pdf")
  132. net_scale = __get_simulated_network_scale(args)
  133. logging.info("Loading mgen rtt_all data")
  134. dbs = __load_tornet_datasets(args, "rtt_all.mgen.json")
  135. logging.info("Plotting mgen rtt_all")
  136. __plot_mgen_rtt_all(args, dbs, net_scale)
  137. logging.info("Loading mgen rtt_timeout data")
  138. dbs = __load_tornet_datasets(args, "rtt_timeout.mgen.json")
  139. logging.info("Plotting mgen rtt_timeout")
  140. __plot_mgen_rtt_timeout(args, dbs, net_scale)
  141. logging.info("Loading mgen timeout_by_send data")
  142. dbs = __load_tornet_datasets(args, "timeout_by_send.mgen.json")
  143. logging.info("Plotting mgen rtt_by_send")
  144. __plot_mgen_timeout_by_send(args, dbs, net_scale)
  145. logging.info("Loading mgen timeout_by_receive data")
  146. dbs = __load_tornet_datasets(args, "timeout_by_receive.mgen.json")
  147. logging.info("Plotting mgen rtt_by_receive")
  148. __plot_mgen_timeout_by_receive(args, dbs, net_scale)
  149. logging.info("Loading mgen rtt_counts data")
  150. dbs = __load_tornet_datasets(args, "counts.mgen.json")
  151. logging.info("Plotting mgen rtt_counts")
  152. __plot_mgen_count(args, dbs, net_scale)
  153. args.pdfpages.close()
  154. def __plot_mgen_rtt_all(args, rtt_dbs, net_scale):
  155. # cache the corresponding data in the 'data' keyword for __plot_cdf_figure
  156. for rtt_db in rtt_dbs:
  157. rtt_db['data'] = rtt_db['dataset']
  158. __plot_cdf_figure(args, rtt_dbs, 'rtt_all.mgen', yscale='taillog',
  159. xscale='log',
  160. xlabel="Time (s)")
  161. def __plot_mgen_rtt_timeout(args, rtt_dbs, net_scale):
  162. # cache the corresponding data in the 'data' keyword for __plot_cdf_figure
  163. for rtt_db in rtt_dbs:
  164. rtt_db['data'] = rtt_db['dataset']
  165. __plot_cdf_figure(args, rtt_dbs, 'rtt_timeout.mgen', yscale='taillog',
  166. xlabel="Time (s)")
  167. def __plot_mgen_timeout_by_send(args, rtt_dbs, net_scale):
  168. # cache the corresponding data in the 'data' keyword for __plot_cdf_figure
  169. for rtt_db in rtt_dbs:
  170. rtt_db['data'] = rtt_db['dataset']
  171. __plot_cdf_figure(args, rtt_dbs, 'timeout_by_send.mgen', yscale='taillog',
  172. xscale='log',
  173. xlabel="Fraction of (user, group)'s expected receipts")
  174. def __plot_mgen_timeout_by_receive(args, rtt_dbs, net_scale):
  175. # cache the corresponding data in the 'data' keyword for __plot_cdf_figure
  176. for rtt_db in rtt_dbs:
  177. rtt_db['data'] = rtt_db['dataset']
  178. __plot_cdf_figure(args, rtt_dbs, 'timeout_by_receive.mgen', yscale='taillog',
  179. xscale='log',
  180. xlabel="Fraction of (user, group)'s receipts")
  181. def __plot_mgen_count(args, count_dbs, net_scale):
  182. # cache the corresponding data in the 'data' keyword for __plot_cdf_figure
  183. for count_db in count_dbs:
  184. count_db['data'] = count_db['dataset']
  185. __plot_cdf_figure(args, count_dbs, 'count.mgen',
  186. xlabel="Messages sent per user")
  187. def __plot_cdf_figure(args, dbs, filename, xscale=None, yscale=None, xlabel=None, ylabel="CDF"):
  188. color_cycle = cycle(DEFAULT_COLORS)
  189. linestyle_cycle = cycle(DEFAULT_LINESTYLES)
  190. pyplot.figure()
  191. lines, labels = [], []
  192. for db in dbs:
  193. if 'data' not in db or len(db['data']) < 1:
  194. continue
  195. elif len(db['data']) == 1:
  196. (plot_func, d) = draw_cdf, db['data'][0]
  197. else:
  198. (plot_func, d) = draw_cdf_ci, db['data']
  199. if len(d) < 1:
  200. continue
  201. line = plot_func(pyplot, d,
  202. yscale=yscale,
  203. label=db['label'],
  204. color=db['color'] or next(color_cycle),
  205. linestyle=next(linestyle_cycle))
  206. lines.append(line)
  207. labels.append(db['label'])
  208. if xscale is not None:
  209. pyplot.xscale(xscale)
  210. if xlabel is not None:
  211. xlabel += __get_scale_suffix(xscale)
  212. if yscale is not None:
  213. pyplot.yscale(yscale)
  214. if ylabel is not None:
  215. ylabel += __get_scale_suffix(yscale)
  216. if xlabel is not None:
  217. pyplot.xlabel(xlabel, fontsize=14)
  218. if ylabel is not None:
  219. pyplot.ylabel(ylabel, fontsize=14)
  220. m = 0.025
  221. pyplot.margins(m)
  222. # the plot will exit the visible space at the 99th percentile,
  223. # so make sure the x-axis is centered correctly
  224. # (this is usually only a problem if using the 'taillog' yscale)
  225. x_visible_max = None
  226. for db in dbs:
  227. if len(db['data']) >= 1 and len(db['data'][0]) >= 1:
  228. q = quantile(db['data'][0], 0.99)
  229. x_visible_max = q if x_visible_max is None else max(x_visible_max, q)
  230. if x_visible_max is not None:
  231. pyplot.xlim(xmin=max(0, -m * x_visible_max), xmax=(m + 1) * x_visible_max)
  232. __plot_finish(args, lines, labels, filename)
  233. def __plot_finish(args, lines, labels, filename):
  234. pyplot.tick_params(axis='y', which='major', labelsize=12)
  235. pyplot.tick_params(axis='x', which='major', labelsize=14)
  236. pyplot.tick_params(axis='both', which='minor', labelsize=8)
  237. pyplot.grid(True, axis='both', which='minor', color='0.1', linestyle=':', linewidth='0.5')
  238. pyplot.grid(True, axis='both', which='major', color='0.1', linestyle=':', linewidth='1.0')
  239. pyplot.legend(lines, labels, loc='lower right', fontsize=14)
  240. pyplot.tight_layout(pad=0.3)
  241. pyplot.savefig(f"{args.prefix}/{filename}.{'png' if args.plot_pngs else 'pdf'}")
  242. args.pdfpages.savefig()
  243. def __get_scale_suffix(scale):
  244. if scale == 'taillog':
  245. return " (tail log scale)"
  246. elif scale == 'log':
  247. return " (log scale)"
  248. else:
  249. return ""
  250. def __time_format_func(x, pos):
  251. hours = int(x // 3600)
  252. minutes = int((x % 3600) // 60)
  253. seconds = int(x % 60)
  254. return "{:d}:{:02d}:{:02d}".format(hours, minutes, seconds)
  255. def __load_tornet_datasets(args, filepattern):
  256. tornet_dbs = []
  257. print(args.labels)
  258. label_cycle = cycle(args.labels) if args.labels is not None else None
  259. color_cycle = cycle(args.colors) if args.colors is not None else None
  260. if args.tornet_collection_path is not None:
  261. for collection_dir in args.tornet_collection_path:
  262. tornet_db = {
  263. 'dataset': [load_json_data(p) for p in find_matching_files_in_dir(collection_dir, filepattern)],
  264. 'label': next(label_cycle) if label_cycle is not None else os.path.basename(collection_dir),
  265. 'color': next(color_cycle) if color_cycle is not None else None,
  266. }
  267. tornet_dbs.append(tornet_db)
  268. return tornet_dbs
  269. def __load_torperf_datasets(torperf_argset):
  270. torperf_dbs = []
  271. if torperf_argset is not None:
  272. for torperf_args in torperf_argset:
  273. torperf_db = {
  274. 'dataset': load_json_data(torperf_args[0]) if torperf_args[0] is not None else None,
  275. 'label': torperf_args[1] if torperf_args[1] is not None else "Public Tor",
  276. 'color': torperf_args[2],
  277. }
  278. torperf_dbs.append(torperf_db)
  279. return torperf_dbs
  280. def __get_simulated_network_scale(args):
  281. sim_info = __load_tornet_datasets(args, "simulation_info.json")
  282. net_scale = 0.0
  283. for db in sim_info:
  284. for i, d in enumerate(db['dataset']):
  285. if 'net_scale' in d:
  286. if net_scale == 0.0:
  287. net_scale = float(d['net_scale'])
  288. logging.info(f"Found simulated network scale {net_scale}")
  289. else:
  290. if float(d['net_scale']) != net_scale:
  291. logging.warning("Some of your tornet data is from networks of different scale")
  292. logging.critical(f"Found network scales {net_scale} and {float(d['net_scale'])} and they don't match")
  293. return net_scale
  294. def __compute_torperf_error_rates(daily_counts):
  295. err_rates = []
  296. for day in daily_counts:
  297. total = int(daily_counts[day]['requests'])
  298. if total <= 0:
  299. continue
  300. timeouts = int(daily_counts[day]['timeouts'])
  301. failures = int(daily_counts[day]['failures'])
  302. err_rates.append((timeouts + failures) / float(total) * 100.0)
  303. return err_rates
  304. def main():
  305. my_formatter_class = CustomHelpFormatter
  306. # construct the options
  307. main_parser = argparse.ArgumentParser(description=DESC_MAIN, formatter_class=my_formatter_class)
  308. main_parser.add_argument('-v', '--version',
  309. help="""Prints the version of the toolkit and exits.""",
  310. action="store_true", dest="do_version",
  311. default=False)
  312. main_parser.add_argument('-q', '--quiet',
  313. help="""Do not write log messages to file. Use twice to also not write to stdout.""",
  314. action="count", dest="quiet",
  315. default=0)
  316. main_parser.add_argument('-s', '--seed',
  317. help="""Initialize tornettools' PRNGs with a seed to allow for
  318. deterministic behavior. This does not affect the seed for the Shadow
  319. simulation.""",
  320. action="store", type=int, dest="seed", metavar="N",
  321. default=None)
  322. sub_parser = main_parser.add_subparsers(help=HELP_MAIN, dest='command')
  323. plot_parser = sub_parser.add_parser('plot',
  324. description=DESC_PLOT,
  325. help=HELP_PLOT,
  326. formatter_class=my_formatter_class)
  327. plot_parser.set_defaults(func=run, formatter_class=my_formatter_class)
  328. plot_parser.add_argument('tornet_collection_path',
  329. help="""Path to a directory containing one or more subdirectories of parsed
  330. tornet results from the 'parse' command. Confidence intervals are drawn
  331. when this path contains plot data from multiple simulations.""",
  332. action='store',
  333. type=__type_str_dir_path_in,
  334. nargs='+')
  335. plot_parser.add_argument('-t', '--tor_metrics_path',
  336. help="""Path to a tor_metrics.json file that was created by the 'stage' command,
  337. which we be compared against the tornet collections. The label and color
  338. to use in the graphs that we create are optional.""",
  339. action=PathStringArgsAction,
  340. nargs='+',
  341. metavar="PATH [LABEL [COLOR]]")
  342. plot_parser.add_argument('--prefix',
  343. help="""A directory PATH prefix where the graphs generated by this script
  344. will be written.""",
  345. action="store",
  346. type=__type_str_dir_path_out,
  347. dest="prefix",
  348. default=os.getcwd(),
  349. metavar="PATH")
  350. plot_parser.add_argument('-l', '--labels',
  351. help="""Labels for the tornet collections to be used in the graph legends.""",
  352. action='store',
  353. type=str,
  354. dest="labels",
  355. nargs='+',
  356. metavar='LABEL')
  357. plot_parser.add_argument('-c', '--colors',
  358. help="""Colors for the tornet collections to be used in the graph plots.""",
  359. action='store',
  360. type=str,
  361. dest="colors",
  362. nargs='+',
  363. metavar='COLOR')
  364. plot_parser.add_argument('-a', '--all',
  365. help="""Also generate individual tgentools and oniontracetools plots for each simulation.""",
  366. action="store_true",
  367. dest="plot_all",
  368. default=False)
  369. plot_parser.add_argument('--pngs',
  370. help="""Save individual plot images in png instead of pdf format.""",
  371. action="store_true",
  372. dest="plot_pngs",
  373. default=False)
  374. # get args and call the command handler for the chosen mode
  375. args = main_parser.parse_args()
  376. if not hasattr(args, "prefix") and hasattr(args, "tornet_config_path"):
  377. args.prefix = args.tornet_config_path
  378. if hasattr(args, "nprocesses"):
  379. args.nprocesses = args.nprocesses if args.nprocesses > 0 else cpu_count()
  380. # check if it's just a version check and we should short circuit
  381. if args.do_version:
  382. __setup_logging(args)
  383. logging.info("tornettools version {}".format(__version__))
  384. return
  385. # if it's anything other than version, we need a subcommand
  386. if args.command == None:
  387. main_parser.print_usage()
  388. return
  389. # now we know we can start
  390. __setup_logging(args)
  391. # seed the pseudo-random generators
  392. # if we don't have a seed, choose one and make sure we log it for reproducibility
  393. if args.seed == None:
  394. args.seed = randint(0, 2**31)
  395. stdseed(args.seed)
  396. numpyseed(args.seed)
  397. logging.info("Seeded standard and numpy PRNGs with seed={}".format(args.seed))
  398. logging.info("The argument namespace is: {}".format(str(args)))
  399. logging.info("The platform is: {}".format(str(platform())))
  400. logging.info("System info: {}".format(str(uname())))
  401. # now run the configured mode
  402. rv = run(args)
  403. if rv == 0 or rv == None:
  404. return 0
  405. elif isinstance(rv, int):
  406. return rv
  407. else:
  408. logging.warning(f"Unknown return value: {rv}")
  409. return 1
  410. def __type_nonnegative_integer(value):
  411. i = int(value)
  412. if i < 0:
  413. raise argparse.ArgumentTypeError("'%s' is an invalid non-negative int value" % value)
  414. return i
  415. def __type_nonnegative_float(value):
  416. i = float(value)
  417. if i < 0.0:
  418. raise argparse.ArgumentTypeError("'%s' is an invalid non-negative flat value" % value)
  419. return i
  420. def __type_fractional_float(value):
  421. i = float(value)
  422. if i <= 0.0 or i > 1.0:
  423. raise argparse.ArgumentTypeError("'%s' is an invalid fractional float value" % value)
  424. return i
  425. def __type_str_file_path_out(value):
  426. s = str(value)
  427. if s == "-":
  428. return s
  429. p = os.path.abspath(os.path.expanduser(s))
  430. make_directories(p)
  431. return p
  432. def __type_str_dir_path_out(value):
  433. s = str(value)
  434. p = os.path.abspath(os.path.expanduser(s))
  435. make_directories(p)
  436. return p
  437. def __type_str_file_path_in(value):
  438. s = str(value)
  439. if s == "-":
  440. return s
  441. p = os.path.abspath(os.path.expanduser(s))
  442. if not os.path.exists(p):
  443. raise argparse.ArgumentTypeError(f"Path does not exist: {p}")
  444. elif not os.path.isfile(p):
  445. raise argparse.ArgumentTypeError(f"Path is not a file: {p}")
  446. return p
  447. def __type_str_dir_path_in(value):
  448. s = str(value)
  449. p = os.path.abspath(os.path.expanduser(s))
  450. if not os.path.exists(p):
  451. raise argparse.ArgumentTypeError(f"Path does not exist: {p}")
  452. elif not os.path.isdir(p):
  453. raise argparse.ArgumentTypeError(f"Path is not a directory: {p}")
  454. return p
  455. def type_str_file_path_in(p):
  456. return __type_str_file_path_in(p)
  457. # adds the 'RawDescriptionHelpFormatter' to the ArgsDefault one
  458. class CustomHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
  459. def _fill_text(self, text, width, indent):
  460. return ''.join([indent + line for line in text.splitlines(True)])
  461. # a custom action for passing in experimental data directories when plotting
  462. class PathStringArgsAction(argparse.Action):
  463. def __call__(self, parser, namespace, values, option_string=None):
  464. if len(values) == 0:
  465. raise argparse.ArgumentError(self, "A path is required.")
  466. elif len(values) > 3:
  467. raise argparse.ArgumentError(self, "Must specify 3 or fewer strings.")
  468. # get the values
  469. path = values[0]
  470. label = values[1] if len(values) > 1 else None
  471. color = values[2] if len(values) > 2 else None
  472. # extract and validate the path
  473. path = type_str_file_path_in(path)
  474. # remove the default
  475. if "_didremovedefault" not in namespace:
  476. setattr(namespace, self.dest, [])
  477. setattr(namespace, "_didremovedefault", True)
  478. # append our new arg set
  479. dest = getattr(namespace, self.dest)
  480. dest.append([path, label, color])
  481. if __name__ == '__main__':
  482. sys.exit(main())