| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134 |
- #!/usr/bin/env python3
- import csv
- import os
- # Algorithm used in https://research.torproject.org/techreports/blocking-2011-09-15.pdf
- def evaluate_loesing (fingerprint):
- fingerprint = fingerprint.upper()
- bridge_ips_max = None
- # First get the max value in whole range
- with open (f"data/bridge_data_cleaned/{fingerprint}", 'r') as file:
- bridge_data = csv.reader(file, delimiter=',')
- for row in bridge_data:
- bridge_ips_today = int(row[1])
- if bridge_ips_max is None:
- if bridge_ips_today > 0:
- bridge_ips_max = bridge_ips_today
- #print (f"{fingerprint} bridge_ips_max set to {bridge_ips_today} on {row[0]}")
- elif bridge_ips_today > bridge_ips_max:
- bridge_ips_max = bridge_ips_today
- #print (f"{fingerprint} bridge_ips_max set to {bridge_ips_today} on {row[0]}")
- # Now evaluate
- with open (f"data/bridge_data_cleaned/{fingerprint}", 'r') as file:
- bridge_data = csv.reader(file, delimiter=',')
- for row in bridge_data:
- bridge_ips_today = int(row[1])
- if not bridge_ips_max is None and bridge_ips_max >= 100 and bridge_ips_today <= 32:
- return row[0]
- return None
- # Consider a bridge blocked if it receives <= t connections in one day,
- # when it has previously received >= m connections. Note that t=32,
- # m=100 is NOT the same as Loesing's algorithm, as we consider m only
- # for previous days, while Loesing's algorithm considers all days within
- # the period. t must be less than m.
- def is_blocked_abs (t, m, bridge_ips_max, bridge_ips_today):
- if bridge_ips_max is None:
- return False
- return bridge_ips_max >= m and bridge_ips_today <= t
- def evaluate_abs (t, m, fingerprint):
- fingerprint = fingerprint.upper()
- bridge_ips_max = None
- with open (f"data/bridge_data_cleaned/{fingerprint}", 'r') as file:
- bridge_data = csv.reader(file, delimiter=',')
- for row in bridge_data:
- bridge_ips_today = int(row[1])
- if not bridge_ips_max is None:
- if is_blocked_abs (t, m, bridge_ips_max, bridge_ips_today):
- return row[0]
- if bridge_ips_today > bridge_ips_max:
- bridge_ips_max = bridge_ips_today
- # Start bridge_ips_max only when we have a non-zero
- # connection count
- elif bridge_ips_today > 0:
- bridge_ips_max = bridge_ips_today
- # If we got here, the bridge is not blocked
- return None
- # Consider a bridge blocked if we observe a drop of at least d from the
- # previous day's count. d must be at least 8.
- def is_blocked_rel (d, bridge_ips_yesterday, bridge_ips_today):
- if bridge_ips_yesterday is None:
- return False
- return bridge_ips_today <= bridge_ips_yesterday - d
- def evaluate_rel (d, fingerprint):
- fingerprint = fingerprint.upper()
- bridge_ips_yesterday = None
- bridge_ips_today = None
- with open (f"data/bridge_data_cleaned/{fingerprint}", 'r') as file:
- bridge_data = csv.reader(file, delimiter=',')
- for row in bridge_data:
- bridge_ips_today = int(row[1])
- if is_blocked_rel (d, bridge_ips_yesterday, bridge_ips_today):
- return row[0]
- bridge_ips_yesterday = bridge_ips_today
- # Remove any previous blocked_* files, start over
- if os.path.exists (f"data/blocked_loesing"):
- os.remove (f"data/blocked_loesing")
- for t in range (8, 40, 8):
- for m in range (t+8, 112, 8):
- if os.path.exists (f"data/blocked_abs_{t}_{m}"):
- os.remove (f"data/blocked_abs_{t}_{m}")
- for d in range (8, 112, 8):
- if os.path.exists (f"data/blocked_rel_{d}"):
- os.remove (f"data/blocked_rel_{d}")
- with open ("data/all-bridges", 'r') as all_bridges:
- for fingerprint in all_bridges:
- fingerprint = fingerprint.strip()
- if fingerprint:
- # Evaluate with Loesing's algorithm
- blocked = evaluate_loesing (fingerprint)
- if not blocked is None:
- with open (f"data/blocked_loesing", 'a') as f:
- f.write(f"{fingerprint},{blocked}\n")
- # Absolute threshold with t and m
- for t in range (8, 40, 8):
- for m in range (t+8, 112, 8):
- blocked = evaluate_abs (t, m, fingerprint)
- if not blocked is None:
- with open (f"data/blocked_abs_{t}_{m}", 'a') as f:
- f.write (f"{fingerprint},{blocked}\n")
- # Relative threshold with d
- for d in range (8, 112, 8):
- blocked = evaluate_rel (d, fingerprint)
- if not blocked is None:
- with open (f"data/blocked_rel_{d}", 'a') as f:
- f.write (f"{fingerprint},{blocked}\n")
|