|
@@ -266,6 +266,17 @@ def load_json_from_file(json_file_name):
|
|
|
|
|
|
## OnionOO Functions
|
|
|
|
|
|
+def datestr_to_datetime(datestr):
|
|
|
+ # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT
|
|
|
+ if datestr is not None:
|
|
|
+ dt = dateutil.parser.parse(datestr)
|
|
|
+ else:
|
|
|
+ # Never modified - use start of epoch
|
|
|
+ dt = datetime.datetime.utcfromtimestamp(0)
|
|
|
+ # strip any timezone out (in case they're supported in future)
|
|
|
+ dt = dt.replace(tzinfo=None)
|
|
|
+ return dt
|
|
|
+
|
|
|
def onionoo_fetch(what, **kwargs):
|
|
|
params = kwargs
|
|
|
params['type'] = 'relay'
|
|
@@ -304,36 +315,41 @@ def onionoo_fetch(what, **kwargs):
|
|
|
if last_mod_date is not None:
|
|
|
request.add_header('If-modified-since', last_mod_date)
|
|
|
|
|
|
- # Parse datetimes like: Fri, 02 Oct 2015 13:34:14 GMT
|
|
|
- if last_mod_date is not None:
|
|
|
- last_mod = dateutil.parser.parse(last_mod_date)
|
|
|
- else:
|
|
|
- # Never modified - use start of epoch
|
|
|
- last_mod = datetime.datetime.utcfromtimestamp(0)
|
|
|
- # strip any timezone out (in case they're supported in future)
|
|
|
- last_mod = last_mod.replace(tzinfo=None)
|
|
|
+ # Parse last modified date
|
|
|
+ last_mod = datestr_to_datetime(last_mod_date)
|
|
|
+
|
|
|
+ # Not Modified and still recent enough to be useful
|
|
|
+ # Onionoo / Globe used to use 6 hours, but we can afford a day
|
|
|
+ required_freshness = datetime.datetime.utcnow()
|
|
|
+ # strip any timezone out (to match dateutil.parser)
|
|
|
+ required_freshness = required_freshness.replace(tzinfo=None)
|
|
|
+ required_freshness -= datetime.timedelta(hours=24)
|
|
|
|
|
|
+ # Make the OnionOO request
|
|
|
response_code = 0
|
|
|
try:
|
|
|
response = urllib2.urlopen(request)
|
|
|
response_code = response.getcode()
|
|
|
except urllib2.HTTPError, error:
|
|
|
response_code = error.code
|
|
|
- # strip any timezone out (to match dateutil.parser)
|
|
|
- six_hours_ago = datetime.datetime.utcnow()
|
|
|
- six_hours_ago = six_hours_ago.replace(tzinfo=None)
|
|
|
- six_hours_ago -= datetime.timedelta(hours=6)
|
|
|
- # Not Modified and still recent enough to be useful (Globe uses 6 hours)
|
|
|
- if response_code == 304:
|
|
|
- if last_mod < six_hours_ago:
|
|
|
- raise Exception("Outdated data from " + url + ": "
|
|
|
- + str(error.code) + ": " + error.reason)
|
|
|
- else:
|
|
|
- pass
|
|
|
+ if response_code == 304: # not modified
|
|
|
+ pass
|
|
|
else:
|
|
|
raise Exception("Could not get " + url + ": "
|
|
|
+ str(error.code) + ": " + error.reason)
|
|
|
|
|
|
+ if response_code == 200: # OK
|
|
|
+ last_mod = datestr_to_datetime(response.info().get('Last-Modified'))
|
|
|
+
|
|
|
+ # Check for freshness
|
|
|
+ if last_mod < required_freshness:
|
|
|
+ if last_mod_date is not None:
|
|
|
+ date_message = "Outdated data: last updated " + last_mod_date
|
|
|
+ else:
|
|
|
+ date_message = "No data: never downloaded "
|
|
|
+ raise Exception(date_message + " from " + url)
|
|
|
+
|
|
|
+ # Process the data
|
|
|
if response_code == 200: # OK
|
|
|
|
|
|
response_json = load_possibly_compressed_response_json(response)
|