|
@@ -18,8 +18,8 @@
|
|
|
# Optionally uses ipaddress (python 3 builtin) or py2-ipaddress (package)
|
|
|
# for netblock analysis.
|
|
|
#
|
|
|
-# Then read the logs to make sure the fallbacks aren't dominated by a single
|
|
|
-# netblock or port.
|
|
|
+# After running this script, read the logs to make sure the fallbacks aren't
|
|
|
+# dominated by a single netblock or port.
|
|
|
|
|
|
# Script by weasel, April 2015
|
|
|
# Portions by gsathya & karsten, 2013
|
|
@@ -39,8 +39,6 @@ import urllib
|
|
|
import urllib2
|
|
|
import hashlib
|
|
|
import dateutil.parser
|
|
|
-# bson_lazy provides bson
|
|
|
-#from bson import json_util
|
|
|
import copy
|
|
|
import re
|
|
|
|
|
@@ -100,19 +98,29 @@ MUST_BE_RUNNING_NOW = (PERFORM_IPV4_DIRPORT_CHECKS
|
|
|
# Clients have been using microdesc consensuses by default for a while now
|
|
|
DOWNLOAD_MICRODESC_CONSENSUS = True
|
|
|
|
|
|
-# If a relay delivers an expired consensus, if it expired less than this many
|
|
|
-# seconds ago, we still allow the relay. This should never be less than -90,
|
|
|
-# as all directory mirrors should have downloaded a consensus 90 minutes
|
|
|
-# before it expires. It should never be more than 24 hours, because clients
|
|
|
-# reject consensuses that are older than REASONABLY_LIVE_TIME.
|
|
|
-# For the consensus expiry check to be accurate, the machine running this
|
|
|
-# script needs an accurate clock.
|
|
|
+# If a relay delivers an invalid consensus, if it will become valid less than
|
|
|
+# this many seconds in the future, or expired less than this many seconds ago,
|
|
|
+# accept the relay as a fallback. For the consensus expiry check to be
|
|
|
+# accurate, the machine running this script needs an accurate clock.
|
|
|
#
|
|
|
-# Relays on 0.3.0 and later return a 404 when they are about to serve an
|
|
|
-# expired consensus. This makes them fail the download check.
|
|
|
-# We use a tolerance of 0, so that 0.2.x series relays also fail the download
|
|
|
-# check if they serve an expired consensus.
|
|
|
-CONSENSUS_EXPIRY_TOLERANCE = 0
|
|
|
+# Relays on 0.3.0 and later return a 404 when they are about to serve a
|
|
|
+# consensus that expired more than 24 hours ago. 0.2.9 and earlier relays
|
|
|
+# will serve consensuses that are very old.
|
|
|
+#
|
|
|
+# Relays on 0.3.5.6-rc? and later return a 404 when they are about to serve a
|
|
|
+# consensus that will become valid more than 24 hours in the future. Older
|
|
|
+# relays don't serve future consensuses.
|
|
|
+#
|
|
|
+# A 404 makes relays fail the download check. We use a tolerance of 24 hours,
|
|
|
+# so that 0.2.9 relays also fail the download check if they serve a consensus
|
|
|
+# that is not reasonably live.
|
|
|
+#
|
|
|
+# REASONABLY_LIVE_TIME should never be more than Tor's REASONABLY_LIVE_TIME,
|
|
|
+# (24 hours), because clients reject consensuses that are older than that.
|
|
|
+# Clients on 0.3.5.5-alpha? and earlier also won't select guards from
|
|
|
+# consensuses that have expired, but can bootstrap if they already have guards
|
|
|
+# in their state file.
|
|
|
+REASONABLY_LIVE_TIME = 24*60*60
|
|
|
|
|
|
# Output fallback name, flags, bandwidth, and ContactInfo in a C comment?
|
|
|
OUTPUT_COMMENTS = True if OUTPUT_CANDIDATES else False
|
|
@@ -912,61 +920,181 @@ class Candidate(object):
|
|
|
return False
|
|
|
return True
|
|
|
|
|
|
- def is_in_whitelist(self, relaylist):
|
|
|
- """ A fallback matches if each key in the whitelist line matches:
|
|
|
+ def id_matches(self, id, exact=False):
|
|
|
+ """ Does this fallback's id match id?
|
|
|
+ exact is ignored. """
|
|
|
+ return self._fpr == id
|
|
|
+
|
|
|
+ def ipv4_addr_matches(self, ipv4_addr, exact=False):
|
|
|
+ """ Does this fallback's IPv4 address match ipv4_addr?
|
|
|
+ exact is ignored. """
|
|
|
+ return self.dirip == ipv4_addr
|
|
|
+
|
|
|
+ def ipv4_dirport_matches(self, ipv4_dirport, exact=False):
|
|
|
+ """ Does this fallback's IPv4 dirport match ipv4_dirport?
|
|
|
+ If exact is False, always return True. """
|
|
|
+ if exact:
|
|
|
+ return self.dirport == int(ipv4_dirport)
|
|
|
+ else:
|
|
|
+ return True
|
|
|
+
|
|
|
+ def ipv4_and_dirport_matches(self, ipv4_addr, ipv4_dirport, exact=False):
|
|
|
+ """ Does this fallback's IPv4 address match ipv4_addr?
|
|
|
+ If exact is True, also check ipv4_dirport. """
|
|
|
+ ipv4_match = self.ipv4_addr_matches(ipv4_addr, exact=exact)
|
|
|
+ if exact:
|
|
|
+ return ipv4_match and self.ipv4_dirport_matches(ipv4_dirport,
|
|
|
+ exact=exact)
|
|
|
+ else:
|
|
|
+ return ipv4_match
|
|
|
+
|
|
|
+ def ipv4_orport_matches(self, ipv4_orport, exact=False):
|
|
|
+ """ Does this fallback's IPv4 orport match ipv4_orport?
|
|
|
+ If exact is False, always return True. """
|
|
|
+ if exact:
|
|
|
+ return self.orport == int(ipv4_orport)
|
|
|
+ else:
|
|
|
+ return True
|
|
|
+
|
|
|
+ def ipv4_and_orport_matches(self, ipv4_addr, ipv4_orport, exact=False):
|
|
|
+ """ Does this fallback's IPv4 address match ipv4_addr?
|
|
|
+ If exact is True, also check ipv4_orport. """
|
|
|
+ ipv4_match = self.ipv4_addr_matches(ipv4_addr, exact=exact)
|
|
|
+ if exact:
|
|
|
+ return ipv4_match and self.ipv4_orport_matches(ipv4_orport,
|
|
|
+ exact=exact)
|
|
|
+ else:
|
|
|
+ return ipv4_match
|
|
|
+
|
|
|
+ def ipv6_addr_matches(self, ipv6_addr, exact=False):
|
|
|
+ """ Does this fallback's IPv6 address match ipv6_addr?
|
|
|
+ Both addresses must be present to match.
|
|
|
+ exact is ignored. """
|
|
|
+ if self.has_ipv6() and ipv6_addr is not None:
|
|
|
+ # Check that we have a bracketed IPv6 address without a port
|
|
|
+ assert(ipv6_addr.startswith('[') and ipv6_addr.endswith(']'))
|
|
|
+ return self.ipv6addr == ipv6_addr
|
|
|
+ else:
|
|
|
+ return False
|
|
|
+
|
|
|
+ def ipv6_orport_matches(self, ipv6_orport, exact=False):
|
|
|
+ """ Does this fallback's IPv6 orport match ipv6_orport?
|
|
|
+ Both ports must be present to match.
|
|
|
+ If exact is False, always return True. """
|
|
|
+ if exact:
|
|
|
+ return (self.has_ipv6() and ipv6_orport is not None and
|
|
|
+ self.ipv6orport == int(ipv6_orport))
|
|
|
+ else:
|
|
|
+ return True
|
|
|
+
|
|
|
+ def ipv6_and_orport_matches(self, ipv6_addr, ipv6_orport, exact=False):
|
|
|
+ """ Does this fallback's IPv6 address match ipv6_addr?
|
|
|
+ If exact is True, also check ipv6_orport. """
|
|
|
+ ipv6_match = self.ipv6_addr_matches(ipv6_addr, exact=exact)
|
|
|
+ if exact:
|
|
|
+ return ipv6_match and self.ipv6_orport_matches(ipv6_orport,
|
|
|
+ exact=exact)
|
|
|
+ else:
|
|
|
+ return ipv6_match
|
|
|
+
|
|
|
+ def entry_matches_exact(self, entry):
|
|
|
+ """ Is entry an exact match for this fallback?
|
|
|
+ A fallback is an exact match for entry if each key in entry matches:
|
|
|
ipv4
|
|
|
dirport
|
|
|
orport
|
|
|
id
|
|
|
- ipv6 address and port (if present)
|
|
|
+ ipv6 address and port (if present in the fallback or the whitelist)
|
|
|
If the fallback has an ipv6 key, the whitelist line must also have
|
|
|
- it, and vice versa, otherwise they don't match. """
|
|
|
- ipv6 = None
|
|
|
- if self.has_ipv6():
|
|
|
- ipv6 = '%s:%d'%(self.ipv6addr, self.ipv6orport)
|
|
|
- for entry in relaylist:
|
|
|
- if entry['id'] != self._fpr:
|
|
|
- # can't log here unless we match an IP and port, because every relay's
|
|
|
- # fingerprint is compared to every entry's fingerprint
|
|
|
- if entry['ipv4'] == self.dirip and int(entry['orport']) == self.orport:
|
|
|
- logging.warning('%s excluded: has OR %s:%d changed fingerprint to ' +
|
|
|
- '%s?', entry['id'], self.dirip, self.orport,
|
|
|
- self._fpr)
|
|
|
- if self.has_ipv6() and entry.has_key('ipv6') and entry['ipv6'] == ipv6:
|
|
|
- logging.warning('%s excluded: has OR %s changed fingerprint to ' +
|
|
|
- '%s?', entry['id'], ipv6, self._fpr)
|
|
|
- continue
|
|
|
- if entry['ipv4'] != self.dirip:
|
|
|
- logging.warning('%s excluded: has it changed IPv4 from %s to %s?',
|
|
|
- self._fpr, entry['ipv4'], self.dirip)
|
|
|
- continue
|
|
|
- if int(entry['dirport']) != self.dirport:
|
|
|
- logging.warning('%s excluded: has it changed DirPort from %s:%d to ' +
|
|
|
- '%s:%d?', self._fpr, self.dirip, int(entry['dirport']),
|
|
|
- self.dirip, self.dirport)
|
|
|
- continue
|
|
|
- if int(entry['orport']) != self.orport:
|
|
|
- logging.warning('%s excluded: has it changed ORPort from %s:%d to ' +
|
|
|
- '%s:%d?', self._fpr, self.dirip, int(entry['orport']),
|
|
|
- self.dirip, self.orport)
|
|
|
- continue
|
|
|
- if entry.has_key('ipv6') and self.has_ipv6():
|
|
|
- # if both entry and fallback have an ipv6 address, compare them
|
|
|
- if entry['ipv6'] != ipv6:
|
|
|
- logging.warning('%s excluded: has it changed IPv6 ORPort from %s ' +
|
|
|
- 'to %s?', self._fpr, entry['ipv6'], ipv6)
|
|
|
- continue
|
|
|
- # if the fallback has an IPv6 address but the whitelist entry
|
|
|
- # doesn't, or vice versa, the whitelist entry doesn't match
|
|
|
- elif entry.has_key('ipv6') and not self.has_ipv6():
|
|
|
- logging.warning('%s excluded: has it lost its former IPv6 address %s?',
|
|
|
- self._fpr, entry['ipv6'])
|
|
|
- continue
|
|
|
- elif not entry.has_key('ipv6') and self.has_ipv6():
|
|
|
- logging.warning('%s excluded: has it gained an IPv6 address %s?',
|
|
|
- self._fpr, ipv6)
|
|
|
- continue
|
|
|
+ it, otherwise they don't match.
|
|
|
+
|
|
|
+ Logs a warning-level message if the fallback would be an exact match,
|
|
|
+ but one of the id, ipv4, ipv4 orport, ipv4 dirport, or ipv6 orport
|
|
|
+ have changed. """
|
|
|
+ if not self.id_matches(entry['id'], exact=True):
|
|
|
+ # can't log here unless we match an IP and port, because every relay's
|
|
|
+ # fingerprint is compared to every entry's fingerprint
|
|
|
+ if self.ipv4_and_orport_matches(entry['ipv4'],
|
|
|
+ entry['orport'],
|
|
|
+ exact=True):
|
|
|
+ logging.warning('%s excluded: has OR %s:%d changed fingerprint to ' +
|
|
|
+ '%s?', entry['id'], self.dirip, self.orport,
|
|
|
+ self._fpr)
|
|
|
+ if self.ipv6_and_orport_matches(entry.get('ipv6_addr'),
|
|
|
+ entry.get('ipv6_orport'),
|
|
|
+ exact=True):
|
|
|
+ logging.warning('%s excluded: has OR %s changed fingerprint to ' +
|
|
|
+ '%s?', entry['id'], entry['ipv6'], self._fpr)
|
|
|
+ return False
|
|
|
+ if not self.ipv4_addr_matches(entry['ipv4'], exact=True):
|
|
|
+ logging.warning('%s excluded: has it changed IPv4 from %s to %s?',
|
|
|
+ self._fpr, entry['ipv4'], self.dirip)
|
|
|
+ return False
|
|
|
+ if not self.ipv4_dirport_matches(entry['dirport'], exact=True):
|
|
|
+ logging.warning('%s excluded: has it changed DirPort from %s:%d to ' +
|
|
|
+ '%s:%d?', self._fpr, self.dirip, int(entry['dirport']),
|
|
|
+ self.dirip, self.dirport)
|
|
|
+ return False
|
|
|
+ if not self.ipv4_orport_matches(entry['orport'], exact=True):
|
|
|
+ logging.warning('%s excluded: has it changed ORPort from %s:%d to ' +
|
|
|
+ '%s:%d?', self._fpr, self.dirip, int(entry['orport']),
|
|
|
+ self.dirip, self.orport)
|
|
|
+ return False
|
|
|
+ if entry.has_key('ipv6') and self.has_ipv6():
|
|
|
+ # if both entry and fallback have an ipv6 address, compare them
|
|
|
+ if not self.ipv6_and_orport_matches(entry['ipv6_addr'],
|
|
|
+ entry['ipv6_orport'],
|
|
|
+ exact=True):
|
|
|
+ logging.warning('%s excluded: has it changed IPv6 ORPort from %s ' +
|
|
|
+ 'to %s:%d?', self._fpr, entry['ipv6'],
|
|
|
+ self.ipv6addr, self.ipv6orport)
|
|
|
+ return False
|
|
|
+ # if the fallback has an IPv6 address but the whitelist entry
|
|
|
+ # doesn't, or vice versa, the whitelist entry doesn't match
|
|
|
+ elif entry.has_key('ipv6') and not self.has_ipv6():
|
|
|
+ logging.warning('%s excluded: has it lost its former IPv6 address %s?',
|
|
|
+ self._fpr, entry['ipv6'])
|
|
|
+ return False
|
|
|
+ elif not entry.has_key('ipv6') and self.has_ipv6():
|
|
|
+ logging.warning('%s excluded: has it gained an IPv6 address %s:%d?',
|
|
|
+ self._fpr, self.ipv6addr, self.ipv6orport)
|
|
|
+ return False
|
|
|
+ return True
|
|
|
+
|
|
|
+ def entry_matches_fuzzy(self, entry):
|
|
|
+ """ Is entry a fuzzy match for this fallback?
|
|
|
+ A fallback is a fuzzy match for entry if at least one of these keys
|
|
|
+ in entry matches:
|
|
|
+ id
|
|
|
+ ipv4
|
|
|
+ ipv6 (if present in both the fallback and whitelist)
|
|
|
+ The ports and nickname are ignored. Missing or extra ipv6 addresses
|
|
|
+ are ignored.
|
|
|
+
|
|
|
+ Doesn't log any warning messages. """
|
|
|
+ if self.id_matches(entry['id'], exact=False):
|
|
|
return True
|
|
|
+ if self.ipv4_addr_matches(entry['ipv4'], exact=False):
|
|
|
+ return True
|
|
|
+ if entry.has_key('ipv6') and self.has_ipv6():
|
|
|
+ # if both entry and fallback have an ipv6 address, compare them
|
|
|
+ if self.ipv6_addr_matches(entry['ipv6_addr'], exact=False):
|
|
|
+ return True
|
|
|
+ return False
|
|
|
+
|
|
|
+ def is_in_whitelist(self, relaylist, exact=False):
|
|
|
+ """ If exact is True (existing fallback list), check if this fallback is
|
|
|
+ an exact match for any whitelist entry, using entry_matches_exact().
|
|
|
+
|
|
|
+ If exact is False (new fallback whitelist), check if this fallback is
|
|
|
+ a fuzzy match for any whitelist entry, using entry_matches_fuzzy(). """
|
|
|
+ for entry in relaylist:
|
|
|
+ if exact:
|
|
|
+ if self.entry_matches_exact(entry):
|
|
|
+ return True
|
|
|
+ else:
|
|
|
+ if self.entry_matches_fuzzy(entry):
|
|
|
+ return True
|
|
|
return False
|
|
|
|
|
|
def cw_to_bw_factor(self):
|
|
@@ -1124,6 +1252,7 @@ class Candidate(object):
|
|
|
).run()[0]
|
|
|
end = datetime.datetime.utcnow()
|
|
|
time_since_expiry = (end - consensus.valid_until).total_seconds()
|
|
|
+ time_until_valid = (consensus.valid_after - end).total_seconds()
|
|
|
except Exception, stem_error:
|
|
|
end = datetime.datetime.utcnow()
|
|
|
log_excluded('Unable to retrieve a consensus from %s: %s', nickname,
|
|
@@ -1141,8 +1270,17 @@ class Candidate(object):
|
|
|
download_failed = True
|
|
|
elif (time_since_expiry > 0):
|
|
|
status = 'outdated consensus, expired %ds ago'%(int(time_since_expiry))
|
|
|
- if time_since_expiry <= CONSENSUS_EXPIRY_TOLERANCE:
|
|
|
- status += ', tolerating up to %ds'%(CONSENSUS_EXPIRY_TOLERANCE)
|
|
|
+ if time_since_expiry <= REASONABLY_LIVE_TIME:
|
|
|
+ status += ', tolerating up to %ds'%(REASONABLY_LIVE_TIME)
|
|
|
+ level = logging.INFO
|
|
|
+ else:
|
|
|
+ status += ', invalid'
|
|
|
+ level = logging.WARNING
|
|
|
+ download_failed = True
|
|
|
+ elif (time_until_valid > 0):
|
|
|
+ status = 'future consensus, valid in %ds'%(int(time_until_valid))
|
|
|
+ if time_until_valid <= REASONABLY_LIVE_TIME:
|
|
|
+ status += ', tolerating up to %ds'%(REASONABLY_LIVE_TIME)
|
|
|
level = logging.INFO
|
|
|
else:
|
|
|
status += ', invalid'
|
|
@@ -1400,7 +1538,7 @@ class CandidateList(dict):
|
|
|
each line's key/value pairs are placed in a dictonary,
|
|
|
(of string -> string key/value pairs),
|
|
|
and these dictionaries are placed in an array.
|
|
|
- comments start with # and are ignored """
|
|
|
+ comments start with # and are ignored. """
|
|
|
file_data = file_obj['data']
|
|
|
file_name = file_obj['name']
|
|
|
relaylist = []
|
|
@@ -1440,18 +1578,28 @@ class CandidateList(dict):
|
|
|
relay_entry['dirport'] = ipv4_maybe_dirport_split[1]
|
|
|
elif kvl == 2:
|
|
|
relay_entry[key_value_split[0]] = key_value_split[1]
|
|
|
+ # split ipv6 addresses and orports
|
|
|
+ if key_value_split[0] == 'ipv6':
|
|
|
+ ipv6_orport_split = key_value_split[1].rsplit(':', 1)
|
|
|
+ ipv6l = len(ipv6_orport_split)
|
|
|
+ if ipv6l != 2:
|
|
|
+ print '#error Bad %s IPv6 item: %s, format is [ipv6]:orport.'%(
|
|
|
+ file_name, item)
|
|
|
+ relay_entry['ipv6_addr'] = ipv6_orport_split[0]
|
|
|
+ relay_entry['ipv6_orport'] = ipv6_orport_split[1]
|
|
|
relaylist.append(relay_entry)
|
|
|
return relaylist
|
|
|
|
|
|
- # apply the fallback whitelist
|
|
|
- def apply_filter_lists(self, whitelist_obj):
|
|
|
+ def apply_filter_lists(self, whitelist_obj, exact=False):
|
|
|
+ """ Apply the fallback whitelist_obj to this fallback list,
|
|
|
+ passing exact to is_in_whitelist(). """
|
|
|
excluded_count = 0
|
|
|
logging.debug('Applying whitelist')
|
|
|
# parse the whitelist
|
|
|
whitelist = self.load_relaylist(whitelist_obj)
|
|
|
filtered_fallbacks = []
|
|
|
for f in self.fallbacks:
|
|
|
- in_whitelist = f.is_in_whitelist(whitelist)
|
|
|
+ in_whitelist = f.is_in_whitelist(whitelist, exact=exact)
|
|
|
if in_whitelist:
|
|
|
# include
|
|
|
filtered_fallbacks.append(f)
|
|
@@ -2064,14 +2212,14 @@ def process_existing():
|
|
|
logging.getLogger('stem').setLevel(logging.INFO)
|
|
|
whitelist = {'data': parse_fallback_file(FALLBACK_FILE_NAME),
|
|
|
'name': FALLBACK_FILE_NAME}
|
|
|
- list_fallbacks(whitelist)
|
|
|
+ list_fallbacks(whitelist, exact=True)
|
|
|
|
|
|
def process_default():
|
|
|
logging.basicConfig(level=logging.WARNING)
|
|
|
logging.getLogger('stem').setLevel(logging.WARNING)
|
|
|
whitelist = {'data': read_from_file(WHITELIST_FILE_NAME, MAX_LIST_FILE_SIZE),
|
|
|
'name': WHITELIST_FILE_NAME}
|
|
|
- list_fallbacks(whitelist)
|
|
|
+ list_fallbacks(whitelist, exact=False)
|
|
|
|
|
|
## Main Function
|
|
|
def main():
|
|
@@ -2092,10 +2240,10 @@ def log_excluded(msg, *args):
|
|
|
else:
|
|
|
logging.info(msg, *args)
|
|
|
|
|
|
-def list_fallbacks(whitelist):
|
|
|
+def list_fallbacks(whitelist, exact=False):
|
|
|
""" Fetches required onionoo documents and evaluates the
|
|
|
- fallback directory criteria for each of the relays """
|
|
|
-
|
|
|
+ fallback directory criteria for each of the relays,
|
|
|
+ passing exact to apply_filter_lists(). """
|
|
|
print "/* type=fallback */"
|
|
|
print ("/* version={} */"
|
|
|
.format(cleanse_c_multiline_comment(FALLBACK_FORMAT_VERSION)))
|
|
@@ -2135,7 +2283,7 @@ def list_fallbacks(whitelist):
|
|
|
# warning that the details have changed from those in the whitelist.
|
|
|
# instead, there will be an info-level log during the eligibility check.
|
|
|
initial_count = len(candidates.fallbacks)
|
|
|
- excluded_count = candidates.apply_filter_lists(whitelist)
|
|
|
+ excluded_count = candidates.apply_filter_lists(whitelist, exact=exact)
|
|
|
print candidates.summarise_filters(initial_count, excluded_count)
|
|
|
eligible_count = len(candidates.fallbacks)
|
|
|
|