|
@@ -112,17 +112,13 @@ CUTOFF_GUARD = .95
|
|
|
# .00 means no bad exits
|
|
|
PERMITTED_BADEXIT = .00
|
|
|
|
|
|
-# Clients will time out after 30 seconds trying to download a consensus
|
|
|
-# So allow fallback directories half that to deliver a consensus
|
|
|
-# The exact download times might change based on the network connection
|
|
|
-# running this script, but only by a few seconds
|
|
|
-# There is also about a second of python overhead
|
|
|
-CONSENSUS_DOWNLOAD_SPEED_MAX = 15.0
|
|
|
-# If the relay fails a consensus check, retry the download
|
|
|
-# This avoids delisting a relay due to transient network conditions
|
|
|
-CONSENSUS_DOWNLOAD_RETRY = True
|
|
|
+# older entries' weights are adjusted with ALPHA^(age in days)
|
|
|
+AGE_ALPHA = 0.99
|
|
|
+
|
|
|
+# this factor is used to scale OnionOO entries to [0,1]
|
|
|
+ONIONOO_SCALE_ONE = 999.
|
|
|
|
|
|
-## List Length Limits
|
|
|
+## Fallback Count Limits
|
|
|
|
|
|
# The target for these parameters is 20% of the guards in the network
|
|
|
# This is around 200 as of October 2015
|
|
@@ -130,37 +126,53 @@ _FB_POG = 0.2
|
|
|
FALLBACK_PROPORTION_OF_GUARDS = None if OUTPUT_CANDIDATES else _FB_POG
|
|
|
|
|
|
# We want exactly 100 fallbacks for the initial release
|
|
|
-# Limit the number of fallbacks (eliminating lowest by weight)
|
|
|
+# This gives us scope to add extra fallbacks to the list as needed
|
|
|
+# Limit the number of fallbacks (eliminating lowest by advertised bandwidth)
|
|
|
MAX_FALLBACK_COUNT = None if OUTPUT_CANDIDATES else 100
|
|
|
# Emit a C #error if the number of fallbacks is below
|
|
|
MIN_FALLBACK_COUNT = 100
|
|
|
|
|
|
-## Fallback Weight Settings
|
|
|
+## Fallback Bandwidth Requirements
|
|
|
|
|
|
-# Any fallback with the Exit flag has its consensus weight multipled by this
|
|
|
-EXIT_WEIGHT_FRACTION = 1.0
|
|
|
+# Any fallback with the Exit flag has its bandwidth multipled by this fraction
|
|
|
+# to make sure we aren't further overloading exits
|
|
|
+# (Set to 1.0, because we asked that only lightly loaded exits opt-in,
|
|
|
+# and the extra load really isn't that much for large relays.)
|
|
|
+EXIT_BANDWIDTH_FRACTION = 1.0
|
|
|
|
|
|
-# If a single fallback's consensus weight is too low, it's pointless adding it
|
|
|
+# If a single fallback's bandwidth is too low, it's pointless adding it
|
|
|
# We expect fallbacks to handle an extra 30 kilobytes per second of traffic
|
|
|
-# Make sure they support a hundred times that
|
|
|
-MIN_CONSENSUS_WEIGHT = 30.0 * 100.0
|
|
|
+# Make sure they can support a hundred times the expected extra load
|
|
|
+# (Use 102.4 to make it come out nicely in MB/s)
|
|
|
+# We convert this to a consensus weight before applying the filter,
|
|
|
+# because all the bandwidth amounts are specified by the relay
|
|
|
+MIN_BANDWIDTH = 102.4 * 30.0 * 1024.0
|
|
|
+
|
|
|
+# Clients will time out after 30 seconds trying to download a consensus
|
|
|
+# So allow fallback directories half that to deliver a consensus
|
|
|
+# The exact download times might change based on the network connection
|
|
|
+# running this script, but only by a few seconds
|
|
|
+# There is also about a second of python overhead
|
|
|
+CONSENSUS_DOWNLOAD_SPEED_MAX = 15.0
|
|
|
+# If the relay fails a consensus check, retry the download
|
|
|
+# This avoids delisting a relay due to transient network conditions
|
|
|
+CONSENSUS_DOWNLOAD_RETRY = True
|
|
|
+
|
|
|
+## Fallback Weights for Client Selection
|
|
|
|
|
|
# All fallback weights are equal, and set to the value below
|
|
|
# Authorities are weighted 1.0 by default
|
|
|
# Clients use these weights to select fallbacks and authorities at random
|
|
|
# If there are 100 fallbacks and 9 authorities:
|
|
|
-# - each fallback is chosen with probability 10/(1000 + 9) ~= 0.99%
|
|
|
-# - each authority is chosen with probability 1/(1000 + 9) ~= 0.09%
|
|
|
+# - each fallback is chosen with probability 10.0/(10.0*100 + 1.0*9) ~= 0.99%
|
|
|
+# - each authority is chosen with probability 1.0/(10.0*100 + 1.0*9) ~= 0.09%
|
|
|
+# A client choosing a bootstrap directory server will choose a fallback for
|
|
|
+# 10.0/(10.0*100 + 1.0*9) * 100 = 99.1% of attempts, and an authority for
|
|
|
+# 1.0/(10.0*100 + 1.0*9) * 9 = 0.9% of attempts.
|
|
|
+# (This disregards the bootstrap schedules, where clients start by choosing
|
|
|
+# from fallbacks & authoritites, then later choose from only authorities.)
|
|
|
FALLBACK_OUTPUT_WEIGHT = 10.0
|
|
|
|
|
|
-## Other Configuration Parameters
|
|
|
-
|
|
|
-# older entries' weights are adjusted with ALPHA^(age in days)
|
|
|
-AGE_ALPHA = 0.99
|
|
|
-
|
|
|
-# this factor is used to scale OnionOO entries to [0,1]
|
|
|
-ONIONOO_SCALE_ONE = 999.
|
|
|
-
|
|
|
## Parsing Functions
|
|
|
|
|
|
def parse_ts(t):
|
|
@@ -448,6 +460,11 @@ class Candidate(object):
|
|
|
details['contact'] = None
|
|
|
if not 'flags' in details or details['flags'] is None:
|
|
|
details['flags'] = []
|
|
|
+ if (not 'advertised_bandwidth' in details
|
|
|
+ or details['advertised_bandwidth'] is None):
|
|
|
+ # relays without advertised bandwdith have it calculated from their
|
|
|
+ # consensus weight
|
|
|
+ details['advertised_bandwidth'] = 0
|
|
|
details['last_changed_address_or_port'] = parse_ts(
|
|
|
details['last_changed_address_or_port'])
|
|
|
self._data = details
|
|
@@ -462,10 +479,6 @@ class Candidate(object):
|
|
|
self._compute_ipv6addr()
|
|
|
if self.ipv6addr is None:
|
|
|
logging.debug("Failed to get an ipv6 address for %s."%(self._fpr,))
|
|
|
- # Reduce the weight of exits to EXIT_WEIGHT_FRACTION * consensus_weight
|
|
|
- if self.is_exit():
|
|
|
- exit_weight = self._data['consensus_weight'] * EXIT_WEIGHT_FRACTION
|
|
|
- self._data['consensus_weight'] = exit_weight
|
|
|
|
|
|
def _stable_sort_or_addresses(self):
|
|
|
# replace self._data['or_addresses'] with a stable ordering,
|
|
@@ -754,11 +767,9 @@ class Candidate(object):
|
|
|
logging.info('%s not a candidate: guard avg too low (%lf)',
|
|
|
self._fpr, self._guard)
|
|
|
return False
|
|
|
- if (MIN_CONSENSUS_WEIGHT is not None
|
|
|
- and self._data['consensus_weight'] < MIN_CONSENSUS_WEIGHT):
|
|
|
- logging.info('%s not a candidate: consensus weight %.0f too low, must ' +
|
|
|
- 'be at least %.0f', self._fpr,
|
|
|
- self._data['consensus_weight'], MIN_CONSENSUS_WEIGHT)
|
|
|
+ if (not self._data.has_key('consensus_weight')
|
|
|
+ or self._data['consensus_weight'] < 1):
|
|
|
+ logging.info('%s not a candidate: consensus weight invalid', self._fpr)
|
|
|
return False
|
|
|
return True
|
|
|
|
|
@@ -889,6 +900,30 @@ class Candidate(object):
|
|
|
ipv6 if has_ipv6 else value)
|
|
|
return False
|
|
|
|
|
|
+ def cw_to_bw_factor(self):
|
|
|
+ # any relays with a missing or zero consensus weight are not candidates
|
|
|
+ # any relays with a missing advertised bandwidth have it set to zero
|
|
|
+ return self._data['advertised_bandwidth'] / self._data['consensus_weight']
|
|
|
+
|
|
|
+ # since advertised_bandwidth is reported by the relay, it can be gamed
|
|
|
+ # to avoid this, use the median consensus weight to bandwidth factor to
|
|
|
+ # estimate this relay's measured bandwidth, and make that the upper limit
|
|
|
+ def measured_bandwidth(self, median_cw_to_bw_factor):
|
|
|
+ cw_to_bw= median_cw_to_bw_factor
|
|
|
+ # Reduce exit bandwidth to make sure we're not overloading them
|
|
|
+ if self.is_exit():
|
|
|
+ cw_to_bw *= EXIT_BANDWIDTH_FRACTION
|
|
|
+ measured_bandwidth = self._data['consensus_weight'] * cw_to_bw
|
|
|
+ if self._data['advertised_bandwidth'] != 0:
|
|
|
+ # limit advertised bandwidth (if available) to measured bandwidth
|
|
|
+ return min(measured_bandwidth, self._data['advertised_bandwidth'])
|
|
|
+ else:
|
|
|
+ return measured_bandwidth
|
|
|
+
|
|
|
+ def set_measured_bandwidth(self, median_cw_to_bw_factor):
|
|
|
+ self._data['measured_bandwidth'] = self.measured_bandwidth(
|
|
|
+ median_cw_to_bw_factor)
|
|
|
+
|
|
|
def is_exit(self):
|
|
|
return 'Exit' in self._data['flags']
|
|
|
|
|
@@ -1056,8 +1091,8 @@ class CandidateList(dict):
|
|
|
logging.debug('Loading details document.')
|
|
|
d = fetch('details',
|
|
|
fields=('fingerprint,nickname,contact,last_changed_address_or_port,' +
|
|
|
- 'consensus_weight,or_addresses,dir_address,' +
|
|
|
- 'recommended_version,flags'))
|
|
|
+ 'consensus_weight,advertised_bandwidth,or_addresses,' +
|
|
|
+ 'dir_address,recommended_version,flags'))
|
|
|
logging.debug('Loading details document done.')
|
|
|
|
|
|
if not 'relays' in d: raise Exception("No relays found in document.")
|
|
@@ -1083,15 +1118,24 @@ class CandidateList(dict):
|
|
|
guard_count += 1
|
|
|
return guard_count
|
|
|
|
|
|
- # Find fallbacks that fit the uptime, stability, and flags criteria
|
|
|
+ # Find fallbacks that fit the uptime, stability, and flags criteria,
|
|
|
+ # and make an array of them in self.fallbacks
|
|
|
def compute_fallbacks(self):
|
|
|
self.fallbacks = map(lambda x: self[x],
|
|
|
- sorted(
|
|
|
- filter(lambda x: self[x].is_candidate(),
|
|
|
- self.keys()),
|
|
|
- key=lambda x: self[x]._data['consensus_weight'],
|
|
|
+ filter(lambda x: self[x].is_candidate(),
|
|
|
+ self.keys()))
|
|
|
+
|
|
|
+ # sort fallbacks by their consensus weight to advertised bandwidth factor,
|
|
|
+ # lowest to highest
|
|
|
+ # used to find the median cw_to_bw_factor()
|
|
|
+ def sort_fallbacks_by_cw_to_bw_factor(self):
|
|
|
+ self.fallbacks.sort(key=lambda x: self[x].cw_to_bw_factor())
|
|
|
+
|
|
|
+ # sort fallbacks by their measured bandwidth, highest to lowest
|
|
|
+ # calculate_measured_bandwidth before calling this
|
|
|
+ def sort_fallbacks_by_measured_bandwidth(self):
|
|
|
+ self.fallbacks.sort(key=lambda x: self[x].self._data['measured_bandwidth'],
|
|
|
reverse=True)
|
|
|
- )
|
|
|
|
|
|
@staticmethod
|
|
|
def load_relaylist(file_name):
|
|
@@ -1194,13 +1238,64 @@ class CandidateList(dict):
|
|
|
return '/* Whitelist & blacklist excluded %d of %d candidates. */'%(
|
|
|
excluded_count, initial_count)
|
|
|
|
|
|
- def fallback_min_weight(self):
|
|
|
+ # calculate each fallback's measured bandwidth based on the median
|
|
|
+ # consensus weight to advertised bandwdith ratio
|
|
|
+ def calculate_measured_bandwidth(self):
|
|
|
+ self.sort_fallbacks_by_cw_to_bw_factor()
|
|
|
+ median_fallback = self.fallback_median(True)
|
|
|
+ median_cw_to_bw_factor = median_fallback.cw_to_bw_factor()
|
|
|
+ for f in self.fallbacks:
|
|
|
+ f.set_measured_bandwidth(median_cw_to_bw_factor)
|
|
|
+
|
|
|
+ # remove relays with low measured bandwidth from the fallback list
|
|
|
+ # calculate_measured_bandwidth for each relay before calling this
|
|
|
+ def remove_low_bandwidth_relays(self):
|
|
|
+ if MIN_BANDWIDTH is None:
|
|
|
+ return
|
|
|
+ above_min_bw_fallbacks = []
|
|
|
+ for f in self.fallbacks:
|
|
|
+ if f._data['measured_bandwidth'] >= MIN_BANDWIDTH:
|
|
|
+ above_min_bw_fallbacks.append(f)
|
|
|
+ else:
|
|
|
+ # the bandwidth we log here is limited by the relay's consensus weight
|
|
|
+ # as well as its adverttised bandwidth. See set_measured_bandwidth
|
|
|
+ # for details
|
|
|
+ logging.info('%s not a candidate: bandwidth %.1fMB/s too low, must ' +
|
|
|
+ 'be at least %.1fMB/s', f._fpr,
|
|
|
+ f._data['measured_bandwidth']/(1024.0*1024.0),
|
|
|
+ MIN_BANDWIDTH/(1024.0*1024.0))
|
|
|
+ self.fallbacks = above_min_bw_fallbacks
|
|
|
+
|
|
|
+ # the minimum fallback in the list
|
|
|
+ # call one of the sort_fallbacks_* functions before calling this
|
|
|
+ def fallback_min(self):
|
|
|
if len(self.fallbacks) > 0:
|
|
|
return self.fallbacks[-1]
|
|
|
else:
|
|
|
return None
|
|
|
|
|
|
- def fallback_max_weight(self):
|
|
|
+ # the median fallback in the list
|
|
|
+ # call one of the sort_fallbacks_* functions before calling this
|
|
|
+ def fallback_median(self, require_advertised_bandwidth):
|
|
|
+ # use the low-median when there are an evan number of fallbacks,
|
|
|
+ # for consistency with the bandwidth authorities
|
|
|
+ if len(self.fallbacks) > 0:
|
|
|
+ median_position = (len(self.fallbacks) - 1) / 2
|
|
|
+ if not require_advertised_bandwidth:
|
|
|
+ return self.fallbacks[median_position]
|
|
|
+ # if we need advertised_bandwidth but this relay doesn't have it,
|
|
|
+ # move to a fallback with greater consensus weight until we find one
|
|
|
+ while not self.fallbacks[median_position]._data['advertised_bandwidth']:
|
|
|
+ median_position += 1
|
|
|
+ if median_position >= len(self.fallbacks):
|
|
|
+ return None
|
|
|
+ return self.fallbacks[median_position]
|
|
|
+ else:
|
|
|
+ return None
|
|
|
+
|
|
|
+ # the maximum fallback in the list
|
|
|
+ # call one of the sort_fallbacks_* functions before calling this
|
|
|
+ def fallback_max(self):
|
|
|
if len(self.fallbacks) > 0:
|
|
|
return self.fallbacks[0]
|
|
|
else:
|
|
@@ -1211,7 +1306,7 @@ class CandidateList(dict):
|
|
|
# Report:
|
|
|
# whether we checked consensus download times
|
|
|
# the number of fallback directories (and limits/exclusions, if relevant)
|
|
|
- # min & max fallback weights
|
|
|
+ # min & max fallback bandwidths
|
|
|
# #error if below minimum count
|
|
|
if PERFORM_IPV4_DIRPORT_CHECKS or PERFORM_IPV6_DIRPORT_CHECKS:
|
|
|
s = '/* Checked %s%s%s DirPorts served a consensus within %.1fs. */'%(
|
|
@@ -1243,11 +1338,12 @@ class CandidateList(dict):
|
|
|
s += 'Excluded: %d (Eligible Count Exceeded Target Count)'%(
|
|
|
eligible_count - fallback_count)
|
|
|
s += '\n'
|
|
|
- min_fb = self.fallback_min_weight()
|
|
|
- min_weight = min_fb._data['consensus_weight']
|
|
|
- max_fb = self.fallback_max_weight()
|
|
|
- max_weight = max_fb._data['consensus_weight']
|
|
|
- s += 'Consensus Weight Range: %d - %d'%(min_weight, max_weight)
|
|
|
+ min_fb = self.fallback_min()
|
|
|
+ min_bw = min_fb._data['measured_bandwidth']
|
|
|
+ max_fb = self.fallback_max()
|
|
|
+ max_bw = max_fb._data['measured_bandwidth']
|
|
|
+ s += 'Bandwidth Range: %.1f - %.1f MB/s'%(min_bw/(1024.0*1024.0),
|
|
|
+ max_bw/(1024.0*1024.0))
|
|
|
s += '\n'
|
|
|
s += '*/'
|
|
|
if fallback_count < MIN_FALLBACK_COUNT:
|
|
@@ -1293,6 +1389,14 @@ def list_fallbacks():
|
|
|
print candidates.summarise_filters(initial_count, excluded_count)
|
|
|
eligible_count = len(candidates.fallbacks)
|
|
|
|
|
|
+ # calculate the measured bandwidth of each relay,
|
|
|
+ # then remove low-bandwidth relays
|
|
|
+ candidates.calculate_measured_bandwidth()
|
|
|
+ candidates.remove_low_bandwidth_relays()
|
|
|
+ # make sure the list is sorted by bandwidth when we output it
|
|
|
+ # so that we include the active fallbacks with the greatest bandwidth
|
|
|
+ candidates.sort_fallbacks_by_measured_bandwidth()
|
|
|
+
|
|
|
# print the raw fallback list
|
|
|
#for x in candidates.fallbacks:
|
|
|
# print x.fallbackdir_line(True)
|