|
|
@@ -0,0 +1,217 @@
|
|
|
+#!/usr/bin/env python3
|
|
|
+
|
|
|
+import csv
|
|
|
+import numpy
|
|
|
+import os
|
|
|
+import sys
|
|
|
+
|
|
|
+# Starting day: February Nth
|
|
|
+#N = 1
|
|
|
+N = 20
|
|
|
+
|
|
|
+# See note in readme
|
|
|
+JAN_31 = 2459245
|
|
|
+
|
|
|
+# 2021 February Nth as Julian date
|
|
|
+FIRST_DAY = JAN_31 + N
|
|
|
+
|
|
|
+TOTAL_BRIDGES = 1890
|
|
|
+
|
|
|
+OBFS4_EMAIL_BRIDGES = 93
|
|
|
+
|
|
|
+def sigfigs(n):
|
|
|
+ if n == 0.0:
|
|
|
+ n = 0 # as an int
|
|
|
+ else:
|
|
|
+ i=0
|
|
|
+ while n * (10**i) < 1:
|
|
|
+ i += 1
|
|
|
+ n = round(n * 10**i) / 10**i
|
|
|
+
|
|
|
+ return n
|
|
|
+
|
|
|
+email_bridges = set()
|
|
|
+
|
|
|
+with open ("data/obfs4-email-bridges", 'r') as f:
|
|
|
+ for line in f:
|
|
|
+ if line != "":
|
|
|
+ email_bridges.add(line.strip())
|
|
|
+
|
|
|
+rel_table = """
|
|
|
+\\hline
|
|
|
+$h$ & \\textbf{TP} & \\textbf{TN} & \\textbf{FP} & \\textbf{FN} & \\textbf{Precision} & \\textbf{Recall} \\\\
|
|
|
+\\hline
|
|
|
+"""
|
|
|
+abs_table = rel_table
|
|
|
+nomin_table = rel_table
|
|
|
+
|
|
|
+for harshness in range(5):
|
|
|
+ for suffix in ["", "_abs", "_nomin"]:
|
|
|
+ with open (f"data/blocked_{harshness}{suffix}", 'r') as f:
|
|
|
+ # obfs4 email bridges correctly identified as blocked
|
|
|
+ correct = 0
|
|
|
+
|
|
|
+ # obfs4 email bridges identified as blocked before they actually were
|
|
|
+ too_soon = 0
|
|
|
+
|
|
|
+ # non-obfs4-email bridges incorrectly identified as blocked
|
|
|
+ incorrect = 0
|
|
|
+
|
|
|
+ for line in f:
|
|
|
+ if line != "":
|
|
|
+ line = line.strip()
|
|
|
+ fingerprint = line[:40]
|
|
|
+ date = int(line[41:])
|
|
|
+
|
|
|
+ if fingerprint in email_bridges:
|
|
|
+ if date >= FIRST_DAY:
|
|
|
+ correct += 1
|
|
|
+ else:
|
|
|
+ too_soon += 1
|
|
|
+ else:
|
|
|
+ incorrect += 1
|
|
|
+
|
|
|
+ tn = TOTAL_BRIDGES - OBFS4_EMAIL_BRIDGES - incorrect
|
|
|
+ tp = correct
|
|
|
+ fn = OBFS4_EMAIL_BRIDGES - correct - too_soon
|
|
|
+ fp = too_soon + incorrect
|
|
|
+
|
|
|
+ precision = sigfigs(tp / (tp + fp))
|
|
|
+
|
|
|
+ recall = sigfigs(tp / (tp + fn))
|
|
|
+
|
|
|
+ newline = f"{harshness} & {tp} & {tn} & {fp} & {fn} & {precision} & {recall} \\\\\n"
|
|
|
+
|
|
|
+ if suffix == "":
|
|
|
+ rel_table += newline
|
|
|
+ elif suffix == "_abs":
|
|
|
+ abs_table += newline
|
|
|
+ else:
|
|
|
+ nomin_table += newline
|
|
|
+
|
|
|
+print ("Absolute threshold without a minimum:")
|
|
|
+print (nomin_table)
|
|
|
+
|
|
|
+print ("Absolute threshold:")
|
|
|
+print (abs_table)
|
|
|
+
|
|
|
+print ("Relative threshold:")
|
|
|
+print (rel_table)
|
|
|
+
|
|
|
+
|
|
|
+# Now let's look at stddevs
|
|
|
+
|
|
|
+email_bridges = list(email_bridges)
|
|
|
+email_bridge_data = []
|
|
|
+email_bridge_max = []
|
|
|
+
|
|
|
+for fingerprint in email_bridges:
|
|
|
+ # We're going to get all the data for each bridge
|
|
|
+ bridge_data = dict()
|
|
|
+ begun = False
|
|
|
+ max_count = 0
|
|
|
+
|
|
|
+ filename = f"data/bridge_data_cleaned/{fingerprint.upper()}"
|
|
|
+
|
|
|
+ if os.path.isfile(filename) and os.path.getsize(filename) > 0:
|
|
|
+ with open(filename, 'r') as csvfile:
|
|
|
+ data = csv.reader(csvfile, delimiter=',')
|
|
|
+
|
|
|
+ for line in data:
|
|
|
+
|
|
|
+ # Ignore 0 values until we see a non-zero value
|
|
|
+ if not begun:
|
|
|
+ if line[1] != "0":
|
|
|
+ begun = True
|
|
|
+
|
|
|
+ if begun:
|
|
|
+ date = int(line[0][:line[0].find(' ')])
|
|
|
+ val = int(line[1])
|
|
|
+ bridge_data[date] = val
|
|
|
+ max_count = max(max_count, val)
|
|
|
+
|
|
|
+ if begun:
|
|
|
+ email_bridge_data.append(bridge_data)
|
|
|
+ email_bridge_max.append(max_count)
|
|
|
+
|
|
|
+# Look at bridges individually
|
|
|
+for i in range(len(email_bridge_data)):
|
|
|
+ bridge = email_bridge_data[i]
|
|
|
+
|
|
|
+ vals = []
|
|
|
+
|
|
|
+ # Get smallest key, i.e., first date we have data for
|
|
|
+ start_date = min(bridge)
|
|
|
+
|
|
|
+ # Get counts before censorship started
|
|
|
+ #for d in range(start_date, FIRST_DAY):
|
|
|
+ # if d in bridge:
|
|
|
+ # vals.append(bridge[d])
|
|
|
+ # If this day is not represented, the bridge did not report
|
|
|
+ # stats; this is different from 0.
|
|
|
+
|
|
|
+ # Note: This is cheaper than the above impelmentation.
|
|
|
+ for date, val in bridge.items():
|
|
|
+ if date < FIRST_DAY:
|
|
|
+ vals.append(val)
|
|
|
+
|
|
|
+ # If we have no data, don't worry about it
|
|
|
+ if len(vals) == 0:
|
|
|
+ continue
|
|
|
+
|
|
|
+ mu = numpy.mean(vals)
|
|
|
+ sigma = numpy.std(vals)
|
|
|
+
|
|
|
+ if sigma > 0:
|
|
|
+ print (f"Single: Bridge {i}: max={email_bridge_max[i]}, mean={mu}, std={sigma}")
|
|
|
+ print (f"Single: Zero is {mu / sigma} standard deviations away from the mean ({mu})")
|
|
|
+ print (f"Single: We are looking at data from {len(vals)} days, starting on {start_date}\n")
|
|
|
+
|
|
|
+# Look at pairs of bridges
|
|
|
+for i in range(len(email_bridge_data)):
|
|
|
+ for j in range(i+1, len(email_bridge_data)):
|
|
|
+ max_count = 0
|
|
|
+
|
|
|
+ bridge_i = email_bridge_data[i]
|
|
|
+ bridge_j = email_bridge_data[j]
|
|
|
+
|
|
|
+ vals = []
|
|
|
+
|
|
|
+ # Get smallest key, i.e., the first date BOTH bridges have data for
|
|
|
+ start_date = max(min(bridge_i), min(bridge_j))
|
|
|
+
|
|
|
+ # Get counts before censorship started
|
|
|
+ #for d in range(start_date, FIRST_DAY):
|
|
|
+
|
|
|
+ # Get set of keys between start_date and FIRST_DAY
|
|
|
+ keys = set()
|
|
|
+ for d in bridge_i:
|
|
|
+ if d >= start_date and d <= FIRST_DAY:
|
|
|
+ keys.add(d)
|
|
|
+ for d in bridge_j:
|
|
|
+ if d >= start_date and d <= FIRST_DAY:
|
|
|
+ keys.add(d)
|
|
|
+
|
|
|
+ for d in keys:
|
|
|
+ val = 0
|
|
|
+ if d in bridge_i and d in bridge_j:
|
|
|
+ val = bridge_i[d] + bridge_j[d]
|
|
|
+ elif d in bridge_i:
|
|
|
+ val = bridge_i[d]
|
|
|
+ elif d in bridge_j:
|
|
|
+ val = bridge_j[d]
|
|
|
+
|
|
|
+ vals.append(val)
|
|
|
+ max_count = max(max_count, val)
|
|
|
+
|
|
|
+ # If we have no data, don't worry about it
|
|
|
+ if len(vals) == 0:
|
|
|
+ continue
|
|
|
+
|
|
|
+ mu = numpy.mean(vals)
|
|
|
+ sigma = numpy.std(vals)
|
|
|
+
|
|
|
+ if sigma > 0:
|
|
|
+ print (f"Double: Bridges {i} and {j}: max={max_count}, mean={mu}, std={sigma}")
|
|
|
+ print (f"Double: Zero is {mu / sigma} standard deviations away from the mean ({mu})")
|
|
|
+ print (f"Double: We are looking at data from {len(vals)} days, starting on {start_date}\n")
|