simulator.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. #!/usr/bin/env python3
  2. import random # For simulation, not cryptography!
  3. import math
  4. import sys
  5. import logging
  6. import resource
  7. import network
  8. import dirauth
  9. import relay
  10. import client
  11. class Simulator:
  12. def __init__(self, relaytarget, clienttarget, statslogger):
  13. self.relaytarget = relaytarget
  14. self.clienttarget = clienttarget
  15. self.statslogger = statslogger
  16. # Some (for now) hard-coded parameters
  17. # The number of directory authorities
  18. numdirauths = 9
  19. # The fraction of relays that are fallback relays
  20. fracfallbackrelays = 0.05
  21. # Mean number of circuits created per client per epoch
  22. self.gamma = 8.9
  23. # Churn is controlled by three parameters:
  24. # newmean: the mean number of new arrivals per epoch
  25. # newstddev: the stddev number of new arrivals per epoch
  26. # oldprob: the probability any given existing one leaves per epoch
  27. # If target is the desired steady state number, then it should
  28. # be the case that target * oldprob = newmean. That way, if the
  29. # current number is below target, on average you add more than
  30. # you remove, and if the current number is above target, on
  31. # average you add fewer than you remove.
  32. # For relays, looking at all the consensuses for Nov and Dec
  33. # 2019, newmean is about 1.0% of the network size, and newstddev
  34. # is about 0.3% of the network size.
  35. self.relay_newmean = 0.010 * self.relaytarget
  36. self.relay_newstddev = 0.003 * self.relaytarget
  37. self.relay_oldprob = 0.010
  38. # For clients, looking at how many clients request a consensus
  39. # with an if-modified-since date more than 3 hours old (and so
  40. # we treat them as "new") over several days in late Dec 2019,
  41. # newmean is about 16% of all clients, and newstddev is about 4%
  42. # of all clients.
  43. self.client_newmean = 0.16 * self.clienttarget
  44. self.client_newstddev = 0.04 * self.clienttarget
  45. self.client_oldprob = 0.16
  46. # Start some dirauths
  47. self.dirauthaddrs = []
  48. self.dirauths = []
  49. for i in range(numdirauths):
  50. dira = dirauth.DirAuth(i, numdirauths)
  51. self.dirauths.append(dira)
  52. self.dirauthaddrs.append(dira.netaddr)
  53. # Start some relays
  54. self.relays = []
  55. for i in range(self.relaytarget):
  56. # Relay bandwidths (at least the ones fast enough to get used)
  57. # in the live Tor network (as of Dec 2019) are well approximated
  58. # by (200000-(200000-25000)/3*log10(x)) where x is a
  59. # uniform integer in [1,2500]
  60. x = random.randint(1,2500)
  61. bw = int(200000-(200000-25000)/3*math.log10(x))
  62. self.relays.append(relay.Relay(self.dirauthaddrs, bw, 0))
  63. # The fallback relays are a hardcoded list of a small fraction
  64. # of the relays, used by clients for bootstrapping
  65. numfallbackrelays = int(self.relaytarget * fracfallbackrelays) + 1
  66. fallbackrelays = random.sample(self.relays, numfallbackrelays)
  67. for r in fallbackrelays:
  68. r.set_is_fallbackrelay()
  69. network.thenetwork.setfallbackrelays(fallbackrelays)
  70. # Tick the epoch to build the first consensus
  71. network.thenetwork.nextepoch()
  72. # Start some clients
  73. self.clients = []
  74. for i in range(clienttarget):
  75. self.clients.append(client.Client(self.dirauthaddrs))
  76. # Throw away all the performance statistics to this point
  77. for d in self.dirauths: d.perfstats.reset()
  78. for r in self.relays: r.perfstats.reset()
  79. # The clients' stats are already at 0, but they have the
  80. # "bootstrapping" flag set, which we want to keep, so we
  81. # won't reset them.
  82. # Tick the epoch to bootstrap the clients
  83. network.thenetwork.nextepoch()
  84. def one_epoch(self):
  85. """Simulate one epoch."""
  86. epoch = network.thenetwork.getepoch()
  87. # Each client will start a random number of circuits in a
  88. # Poisson distribution with mean gamma. To randomize the order
  89. # of the clients creating each circuit, we actually use a
  90. # Poisson distribution with mean (gamma*num_clients), and assign
  91. # each event to a uniformly random client. (This does in fact
  92. # give the required distribution.)
  93. numclients = len(self.clients)
  94. # simtime is the simulated time, measured in epochs (i.e.,
  95. # 0=start of this epoch; 1=end of this epoch)
  96. simtime = 0
  97. numcircs = 0
  98. allcircs = []
  99. lastpercent = -1
  100. while simtime < 1.0:
  101. allcircs.append(
  102. random.choice(self.clients).channelmgr.new_circuit())
  103. simtime += random.expovariate(self.gamma * numclients)
  104. numcircs += 1
  105. percent = int(100*simtime)
  106. #if percent != lastpercent:
  107. if numcircs % 100 == 0:
  108. logging.info("Creating circuits in epoch %s: %d%% (%d circuits)",
  109. epoch, percent, numcircs)
  110. lastpercent = percent
  111. # gather stats
  112. totsent = 0
  113. totrecv = 0
  114. dirasent = 0
  115. dirarecv = 0
  116. relaysent = 0
  117. relayrecv = 0
  118. clisent = 0
  119. clirecv = 0
  120. dirastats = network.PerfStatsStats()
  121. for d in self.dirauths:
  122. logging.debug("%s", d.perfstats)
  123. dirasent += d.perfstats.bytes_sent
  124. dirarecv += d.perfstats.bytes_received
  125. dirastats.accum(d.perfstats)
  126. totsent += dirasent
  127. totrecv += dirarecv
  128. relaystats = network.PerfStatsStats(True)
  129. relaybstats = network.PerfStatsStats(True)
  130. relaynbstats = network.PerfStatsStats(True)
  131. relayfbstats = network.PerfStatsStats(True)
  132. for r in self.relays:
  133. logging.debug("%s", r.perfstats)
  134. relaysent += r.perfstats.bytes_sent
  135. relayrecv += r.perfstats.bytes_received
  136. relaystats.accum(r.perfstats)
  137. if r.perfstats.is_bootstrapping:
  138. if r.is_fallbackrelay:
  139. self.statslogger.error(
  140. "ERROR: fallback relay is bootstrapping?")
  141. else:
  142. relaybstats.accum(r.perfstats)
  143. else:
  144. if r.is_fallbackrelay:
  145. relayfbstats.accum(r.perfstats)
  146. else:
  147. relaynbstats.accum(r.perfstats)
  148. totsent += relaysent
  149. totrecv += relayrecv
  150. clistats = network.PerfStatsStats()
  151. clibstats = network.PerfStatsStats()
  152. clinbstats = network.PerfStatsStats()
  153. for c in self.clients:
  154. logging.debug("%s", c.perfstats)
  155. clisent += c.perfstats.bytes_sent
  156. clirecv += c.perfstats.bytes_received
  157. clistats.accum(c.perfstats)
  158. if c.perfstats.is_bootstrapping:
  159. clibstats.accum(c.perfstats)
  160. else:
  161. clinbstats.accum(c.perfstats)
  162. totsent += clisent
  163. totrecv += clirecv
  164. self.statslogger.info("DirAuths sent=%s recv=%s" % (dirasent, dirarecv))
  165. self.statslogger.info("Relays sent=%s recv=%s" % (relaysent, relayrecv))
  166. self.statslogger.info("Client sent=%s recv=%s" % (clisent, clirecv))
  167. self.statslogger.info("Total sent=%s recv=%s" % (totsent, totrecv))
  168. numdirauths = len(self.dirauths)
  169. numrelays = len(self.relays)
  170. numclients = len(self.clients)
  171. self.statslogger.info("Dirauths %s", dirastats)
  172. self.statslogger.info("Relays %s", relaystats)
  173. self.statslogger.info("Relays(FB) %s", relayfbstats)
  174. self.statslogger.info("Relays(B) %s", relaybstats)
  175. self.statslogger.info("Relays(NB) %s", relaynbstats)
  176. self.statslogger.info("Clients %s", clistats)
  177. self.statslogger.info("Clients(B) %s", clibstats)
  178. self.statslogger.info("Clients(NB) %s", clinbstats)
  179. # Close circuits
  180. for c in allcircs:
  181. c.close()
  182. # Reset stats
  183. for d in self.dirauths: d.perfstats.reset()
  184. for r in self.relays: r.perfstats.reset()
  185. for c in self.clients: c.perfstats.reset()
  186. # Churn relays
  187. # Stop some of the (non-fallback) relays
  188. relays_remaining = []
  189. numrelays = len(self.relays)
  190. numrelaysterminated = 0
  191. lastpercent = 0
  192. logging.info("Terminating some relays")
  193. for i, r in enumerate(self.relays):
  194. percent = int(100*(i+1)/numrelays)
  195. if not r.is_fallbackrelay and \
  196. random.random() < self.relay_oldprob:
  197. r.terminate()
  198. numrelaysterminated += 1
  199. else:
  200. # Keep this relay
  201. relays_remaining.append(r)
  202. if percent != lastpercent:
  203. lastpercent = percent
  204. logging.info("%d%% relays considered, %d terminated",
  205. percent, numrelaysterminated)
  206. self.relays = relays_remaining
  207. # Start some new relays
  208. relays_new = int(random.normalvariate(self.relay_newmean,
  209. self.relay_newstddev))
  210. logging.info("Starting %d new relays", relays_new)
  211. if relays_new > 0:
  212. for i in range(relays_new):
  213. x = random.randint(1,2500)
  214. bw = int(200000-(200000-25000)/3*math.log10(x))
  215. self.relays.append(relay.Relay(self.dirauthaddrs, bw, 0))
  216. # TODO: churn clients
  217. # Stop some of the clients
  218. clients_remaining = []
  219. numclients = len(self.clients)
  220. numclientsterminated = 0
  221. lastpercent = 0
  222. logging.info("Terminating some clients")
  223. for i, c in enumerate(self.clients):
  224. percent = int(100*(i+1)/numclients)
  225. if random.random() < self.client_oldprob:
  226. c.terminate()
  227. numclientsterminated += 1
  228. else:
  229. # Keep this client
  230. clients_remaining.append(c)
  231. if percent != lastpercent:
  232. lastpercent = percent
  233. logging.info("%d%% clients considered, %d terminated",
  234. percent, numclientsterminated)
  235. self.clients = clients_remaining
  236. # Start some new clients
  237. clients_new = int(random.normalvariate(self.client_newmean,
  238. self.client_newstddev))
  239. logging.info("Starting %d new clients", clients_new)
  240. if clients_new > 0:
  241. for i in range(clients_new):
  242. self.clients.append(client.Client(self.dirauthaddrs))
  243. # Tick the epoch
  244. network.thenetwork.nextepoch()
  245. if __name__ == '__main__':
  246. # Args: womode snipauthmode networkscale numepochs randseed
  247. if len(sys.argv) != 7:
  248. sys.stderr.write("Usage: womode snipauthmode networkscale numepochs randseed logdir\n")
  249. sys.exit(1)
  250. womode = network.WOMode[sys.argv[1].upper()]
  251. snipauthmode = network.SNIPAuthMode[sys.argv[2].upper()]
  252. networkscale = float(sys.argv[3])
  253. numepochs = int(sys.argv[4])
  254. randseed = int(sys.argv[5])
  255. logfile = "%s/%s_%s_%f_%s_%s.log" % (sys.argv[6], womode.name,
  256. snipauthmode.name, networkscale, numepochs, randseed)
  257. # Seed the PRNG. On Ubuntu 18.04, this in fact makes future calls
  258. # to (non-cryptographic) random numbers deterministic. On Ubuntu
  259. # 16.04, it does not.
  260. random.seed(randseed)
  261. loglevel = logging.INFO
  262. # Uncomment to see all the debug messages
  263. # loglevel = logging.DEBUG
  264. logging.basicConfig(level=loglevel,
  265. format="%(asctime)s:%(levelname)s:%(message)s")
  266. # The gathered statistics get logged separately
  267. statslogger = logging.getLogger("simulator")
  268. handler = logging.FileHandler(logfile)
  269. handler.setFormatter(logging.Formatter("%(asctime)s:%(message)s"))
  270. statslogger.addHandler(handler)
  271. statslogger.setLevel(logging.INFO)
  272. statslogger.info("Starting simulation %s", logfile)
  273. # Set the Walking Onions style to use
  274. network.thenetwork.set_wo_style(womode, snipauthmode)
  275. # The steady-state numbers of relays and clients
  276. relaytarget = math.ceil(6500 * networkscale)
  277. clienttarget = math.ceil(2500000 * networkscale)
  278. # Create the simulation
  279. simulator = Simulator(relaytarget, clienttarget, statslogger)
  280. for e in range(numepochs):
  281. statslogger.info("Starting epoch %s simulation", e+3)
  282. simulator.one_epoch()
  283. maxmemmib = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss/1024
  284. statslogger.info("%d MiB used", maxmemmib)