Просмотр исходного кода

Fix Loesing's algorithm, fix error in computing max_bridge_ips

Vecna 1 месяц назад
Родитель
Сommit
665360d262
2 измененных файлов с 70 добавлено и 20 удалено
  1. 49 13
      scripts/evaluate-blockages.py
  2. 21 7
      scripts/get-stats.py

+ 49 - 13
scripts/evaluate-blockages.py

@@ -3,9 +3,41 @@
 import csv
 import os
 
+# Algorithm used in https://research.torproject.org/techreports/blocking-2011-09-15.pdf
+def evaluate_loesing (fingerprint):
+    fingerprint = fingerprint.upper()
+    bridge_ips_max = None
+
+    # First get the max value in whole range
+    with open (f"data/bridge_data_cleaned/{fingerprint}", 'r') as file:
+        bridge_data = csv.reader(file, delimiter=',')
+
+        for row in bridge_data:
+            bridge_ips_today = int(row[1])
+
+            if bridge_ips_max is None:
+                if bridge_ips_today > 0:
+                    bridge_ips_max = bridge_ips_today
+                    #print (f"{fingerprint} bridge_ips_max set to {bridge_ips_today} on {row[0]}")
+            elif bridge_ips_today > bridge_ips_max:
+                bridge_ips_max = bridge_ips_today
+                #print (f"{fingerprint} bridge_ips_max set to {bridge_ips_today} on {row[0]}")
+
+    # Now evaluate
+    with open (f"data/bridge_data_cleaned/{fingerprint}", 'r') as file:
+        bridge_data = csv.reader(file, delimiter=',')
+
+        for row in bridge_data:
+            bridge_ips_today = int(row[1])
+
+            if not bridge_ips_max is None and bridge_ips_max >= 100 and bridge_ips_today <= 32:
+                return row[0]
+
+    return None
+
 # If abs is True, we need to have observed more than 32 connections on
 # some day to be considered blocked.
-def is_blocked (harshness, bridge_ips_max, bridge_ips_today, abs, no_min):
+def is_blocked (harshness, bridge_ips_max, bridge_ips_today, abs):
     if bridge_ips_max is None:
         return False
 
@@ -13,12 +45,12 @@ def is_blocked (harshness, bridge_ips_max, bridge_ips_today, abs, no_min):
 
     if abs:
         # absolute threshold
-        return bridge_ips_today <= threshold and (no_min or bridge_ips_max > 32)
+        return bridge_ips_today <= threshold and bridge_ips_max > 32
     else:
         # relative threshold based on prior connection counts
-        return bridge_ips_today <= threshold and (no_min or bridge_ips_today <= bridge_ips_max - 32 + threshold)
+        return bridge_ips_today <= threshold and bridge_ips_today <= bridge_ips_max - 32 + threshold
 
-def evaluate (harshness, fingerprint, abs=True, no_min=False):
+def evaluate (harshness, fingerprint, abs=True):
     fingerprint = fingerprint.upper()
     bridge_ips_max = None
 
@@ -29,9 +61,12 @@ def evaluate (harshness, fingerprint, abs=True, no_min=False):
             bridge_ips_today = int(row[1])
 
             if not bridge_ips_max is None:
-                if is_blocked (harshness, bridge_ips_max, bridge_ips_today, abs, no_min):
+                if is_blocked (harshness, bridge_ips_max, bridge_ips_today, abs):
                     return row[0]
 
+                if bridge_ips_today > bridge_ips_max:
+                    bridge_ips_max = bridge_ips_today
+
             # Start bridge_ips_max only when we have a non-zero
             # connection count
             elif bridge_ips_today > 0:
@@ -46,8 +81,8 @@ for i in range(5):
         os.remove (f"data/blocked_{i}")
     if os.path.exists (f"data/blocked_{i}_abs"):
         os.remove (f"data/blocked_{i}_abs")
-    if os.path.exists (f"data/blocked_{i}_nomin"):
-        os.remove (f"data/blocked_{i}_nomin")
+if os.path.exists (f"data/blocked_loesing"):
+    os.remove (f"data/blocked_loesing")
 
 with open ("data/all-bridges", 'r') as all_bridges:
     for fingerprint in all_bridges:
@@ -55,19 +90,20 @@ with open ("data/all-bridges", 'r') as all_bridges:
         if fingerprint:
             # Go through all harshness values
             for harshness in range(5):
-                blocked = evaluate (harshness, fingerprint, False)
+                blocked = evaluate (harshness, fingerprint)
                 # If the bridge is blocked add its fingerprint and
                 # blocked date to the list for that harshness level
                 if not blocked is None:
                     with open (f"data/blocked_{harshness}", 'a') as f:
                         f.write(f"{fingerprint},{blocked}\n")
 
-                blocked = evaluate (harshness, fingerprint, True)
+                blocked = evaluate (harshness, fingerprint)
                 if not blocked is None:
                     with open (f"data/blocked_{harshness}_abs", 'a') as f:
                         f.write(f"{fingerprint},{blocked}\n")
 
-                blocked = evaluate (harshness, fingerprint, False, True)
-                if not blocked is None:
-                    with open (f"data/blocked_{harshness}_nomin", 'a') as f:
-                        f.write(f"{fingerprint},{blocked}\n")
+            # Evaluate with Loesing's algorithm
+            blocked = evaluate_loesing (fingerprint)
+            if not blocked is None:
+                with open (f"data/blocked_loesing", 'a') as f:
+                    f.write(f"{fingerprint},{blocked}\n")

+ 21 - 7
scripts/get-stats.py

@@ -43,11 +43,22 @@ $h$ & \\textbf{TP} & \\textbf{TN} & \\textbf{FP} & \\textbf{FN} & \\textbf{Preci
 \\hline
 """
 abs_table = rel_table
-nomin_table = rel_table
+loesing_table = """
+\\hline
+\\textbf{TP} & \\textbf{TN} & \\textbf{FP} & \\textbf{FN} & \\textbf{Precision} & \\textbf{Recall} \\\\
+\\hline
+"""
 
 for harshness in range(5):
-    for suffix in ["", "_abs", "_nomin"]:
-        with open (f"data/blocked_{harshness}{suffix}", 'r') as f:
+    for suffix in ["_loesing", "_abs", ""]:
+        if suffix == "_loesing":
+            if harshness > 0:
+                continue
+            else:
+                filename = f"data/blocked_loesing"
+        else:
+            filename = f"data/blocked_{harshness}{suffix}"
+        with open (filename, 'r') as f:
             # obfs4 email bridges correctly identified as blocked
             correct = 0
 
@@ -80,17 +91,20 @@ for harshness in range(5):
 
             recall = sigfigs(tp / (tp + fn))
 
-            newline = f"{harshness} & {tp} & {tn} & {fp} & {fn} & {precision} & {recall} \\\\\n"
+            if suffix == "_loesing":
+                newline = f"{tp} & {tn} & {fp} & {fn} & {precision} & {recall} \\\\\n"
+            else:
+                newline = f"{harshness} & {tp} & {tn} & {fp} & {fn} & {precision} & {recall} \\\\\n"
 
             if suffix == "":
                 rel_table += newline
             elif suffix == "_abs":
                 abs_table += newline
             else:
-                nomin_table += newline
+                loesing_table += newline
 
-print ("Absolute threshold without a minimum:")
-print (nomin_table)
+print ("Loesing's algorithm:")
+print (loesing_table)
 
 print ("Absolute threshold:")
 print (abs_table)