Browse Source

Merge remote-tracking branch 'teor/bug17887-v2'

Nick Mathewson 8 years ago
parent
commit
7616922e81
2 changed files with 41 additions and 19 deletions
  1. 6 0
      changes/bug17887
  2. 35 19
      scripts/maint/updateFallbackDirs.py

+ 6 - 0
changes/bug17887

@@ -0,0 +1,6 @@
+  o Minor fixes (fallback directories):
+    - Allow cached or outdated Onionoo data to be used to choose
+      fallback directories, as long as it's less than a day old.
+      Modify last modified date checks in preparation for Onionoo change
+      #16907. Closes ticket #17887. Patch by "teor". Not in any released
+      version of tor.

+ 35 - 19
scripts/maint/updateFallbackDirs.py

@@ -266,6 +266,17 @@ def load_json_from_file(json_file_name):
 
 ## OnionOO Functions
 
+def datestr_to_datetime(datestr):
+  # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT
+  if datestr is not None:
+    dt = dateutil.parser.parse(datestr)
+  else:
+    # Never modified - use start of epoch
+    dt = datetime.datetime.utcfromtimestamp(0)
+  # strip any timezone out (in case they're supported in future)
+  dt = dt.replace(tzinfo=None)
+  return dt
+
 def onionoo_fetch(what, **kwargs):
   params = kwargs
   params['type'] = 'relay'
@@ -304,36 +315,41 @@ def onionoo_fetch(what, **kwargs):
     if last_mod_date is not None:
       request.add_header('If-modified-since', last_mod_date)
 
-    # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT
-    if last_mod_date is not None:
-      last_mod = dateutil.parser.parse(last_mod_date)
-    else:
-      # Never modified - use start of epoch
-      last_mod = datetime.datetime.utcfromtimestamp(0)
-    # strip any timezone out (in case they're supported in future)
-    last_mod = last_mod.replace(tzinfo=None)
+    # Parse last modified date
+    last_mod = datestr_to_datetime(last_mod_date)
+
+    # Not Modified and still recent enough to be useful
+    # Onionoo / Globe used to use 6 hours, but we can afford a day
+    required_freshness = datetime.datetime.utcnow()
+    # strip any timezone out (to match dateutil.parser)
+    required_freshness = required_freshness.replace(tzinfo=None)
+    required_freshness -= datetime.timedelta(hours=24)
 
+    # Make the OnionOO request
     response_code = 0
     try:
       response = urllib2.urlopen(request)
       response_code = response.getcode()
     except urllib2.HTTPError, error:
       response_code = error.code
-      # strip any timezone out (to match dateutil.parser)
-      six_hours_ago = datetime.datetime.utcnow()
-      six_hours_ago = six_hours_ago.replace(tzinfo=None)
-      six_hours_ago -= datetime.timedelta(hours=6)
-      # Not Modified and still recent enough to be useful (Globe uses 6 hours)
-      if response_code == 304:
-        if last_mod < six_hours_ago:
-          raise Exception("Outdated data from " + url + ": "
-                          + str(error.code) + ": " + error.reason)
-        else:
-          pass
+      if response_code == 304: # not modified
+        pass
       else:
         raise Exception("Could not get " + url + ": "
                         + str(error.code) + ": " + error.reason)
 
+    if response_code == 200: # OK
+      last_mod = datestr_to_datetime(response.info().get('Last-Modified'))
+
+    # Check for freshness
+    if last_mod < required_freshness:
+      if last_mod_date is not None:
+        date_message = "Outdated data: last updated " + last_mod_date
+      else:
+        date_message = "No data: never downloaded "
+      raise Exception(date_message + " from " + url)
+
+    # Process the data
     if response_code == 200: # OK
 
       response_json = load_possibly_compressed_response_json(response)