Vecna пре 1 месец
родитељ
комит
1e6e90922f
3 измењених фајлова са 110 додато и 86 уклоњено
  1. 57 32
      scripts/evaluate-blockages.py
  2. 51 52
      scripts/get-stats.py
  3. 2 2
      scripts/get-stats.sh

+ 57 - 32
scripts/evaluate-blockages.py

@@ -35,22 +35,18 @@ def evaluate_loesing (fingerprint):
 
     return None
 
-# If abs is True, we need to have observed more than 32 connections on
-# some day to be considered blocked.
-def is_blocked (harshness, bridge_ips_max, bridge_ips_today, abs):
+# Consider a bridge blocked if it receives <= t connections in one day,
+# when it has previously received >= m connections. Note that t=32,
+# m=100 is NOT the same as Loesing's algorithm, as we consider m only
+# for previous days, while Loesing's algorithm considers all days within
+# the period. t must be less than m.
+def is_blocked_abs (t, m, bridge_ips_max, bridge_ips_today):
     if bridge_ips_max is None:
         return False
 
-    threshold = 8 * harshness
+    return bridge_ips_max >= m and bridge_ips_today <= t
 
-    if abs:
-        # absolute threshold
-        return bridge_ips_today <= threshold and bridge_ips_max > 32
-    else:
-        # relative threshold based on prior connection counts
-        return bridge_ips_today <= threshold and bridge_ips_today <= bridge_ips_max - 32 + threshold
-
-def evaluate (harshness, fingerprint, abs=True):
+def evaluate_abs (t, m, fingerprint):
     fingerprint = fingerprint.upper()
     bridge_ips_max = None
 
@@ -61,7 +57,7 @@ def evaluate (harshness, fingerprint, abs=True):
             bridge_ips_today = int(row[1])
 
             if not bridge_ips_max is None:
-                if is_blocked (harshness, bridge_ips_max, bridge_ips_today, abs):
+                if is_blocked_abs (t, m, bridge_ips_max, bridge_ips_today):
                     return row[0]
 
                 if bridge_ips_today > bridge_ips_max:
@@ -75,35 +71,64 @@ def evaluate (harshness, fingerprint, abs=True):
         # If we got here, the bridge is not blocked
         return None
 
+# Consider a bridge blocked if we observe a drop of at least d from the
+# previous day's count. d must be at least 8.
+def is_blocked_rel (d, bridge_ips_yesterday, bridge_ips_today):
+    if bridge_ips_yesterday is None:
+        return False
+
+    return bridge_ips_today <= bridge_ips_yesterday - d
+
+def evaluate_rel (d, fingerprint):
+    fingerprint = fingerprint.upper()
+    bridge_ips_yesterday = None
+    bridge_ips_today = None
+
+    with open (f"data/bridge_data_cleaned/{fingerprint}", 'r') as file:
+        bridge_data = csv.reader(file, delimiter=',')
+
+        for row in bridge_data:
+            bridge_ips_today = int(row[1])
+
+            if is_blocked_rel (d, bridge_ips_yesterday, bridge_ips_today):
+                return row[0]
+
+            bridge_ips_yesterday = bridge_ips_today
+
 # Remove any previous blocked_* files, start over
-for i in range(5):
-    if os.path.exists (f"data/blocked_{i}"):
-        os.remove (f"data/blocked_{i}")
-    if os.path.exists (f"data/blocked_{i}_abs"):
-        os.remove (f"data/blocked_{i}_abs")
 if os.path.exists (f"data/blocked_loesing"):
     os.remove (f"data/blocked_loesing")
 
+for t in range (8, 40, 8):
+    for m in range (t+8, 112, 8):
+        if os.path.exists (f"data/blocked_abs_{t}_{m}"):
+            os.remove (f"data/blocked_abs_{t}_{m}")
+
+for d in range (8, 112, 8):
+    if os.path.exists (f"data/blocked_rel_{d}"):
+        os.remove (f"data/blocked_rel_{d}")
+
 with open ("data/all-bridges", 'r') as all_bridges:
     for fingerprint in all_bridges:
         fingerprint = fingerprint.strip()
         if fingerprint:
-            # Go through all harshness values
-            for harshness in range(5):
-                blocked = evaluate (harshness, fingerprint)
-                # If the bridge is blocked add its fingerprint and
-                # blocked date to the list for that harshness level
-                if not blocked is None:
-                    with open (f"data/blocked_{harshness}", 'a') as f:
-                        f.write(f"{fingerprint},{blocked}\n")
-
-                blocked = evaluate (harshness, fingerprint)
-                if not blocked is None:
-                    with open (f"data/blocked_{harshness}_abs", 'a') as f:
-                        f.write(f"{fingerprint},{blocked}\n")
-
             # Evaluate with Loesing's algorithm
             blocked = evaluate_loesing (fingerprint)
             if not blocked is None:
                 with open (f"data/blocked_loesing", 'a') as f:
                     f.write(f"{fingerprint},{blocked}\n")
+
+            # Absolute threshold with t and m
+            for t in range (8, 40, 8):
+                for m in range (t+8, 112, 8):
+                    blocked = evaluate_abs (t, m, fingerprint)
+                    if not blocked is None:
+                        with open (f"data/blocked_abs_{t}_{m}", 'a') as f:
+                            f.write (f"{fingerprint},{blocked}\n")
+
+            # Relative threshold with d
+            for d in range (8, 112, 8):
+                blocked = evaluate_rel (d, fingerprint)
+                if not blocked is None:
+                    with open (f"data/blocked_rel_{d}", 'a') as f:
+                        f.write (f"{fingerprint},{blocked}\n")

+ 51 - 52
scripts/get-stats.py

@@ -37,71 +37,70 @@ with open ("data/obfs4-email-bridges", 'r') as f:
         if line != "":
             email_bridges.add(line.strip())
 
-rel_table = """
+
+loesing_table = """
 \\hline
-$h$ & \\textbf{TP} & \\textbf{TN} & \\textbf{FP} & \\textbf{FN} & \\textbf{Precision} & \\textbf{Recall} \\\\
+\\textbf{TP} & \\textbf{TN} & \\textbf{FP} & \\textbf{FN} & \\textbf{Precision} & \\textbf{Recall} \\\\
 \\hline
 """
-abs_table = rel_table
-loesing_table = """
+abs_table = """
 \\hline
-\\textbf{TP} & \\textbf{TN} & \\textbf{FP} & \\textbf{FN} & \\textbf{Precision} & \\textbf{Recall} \\\\
+$t$ & $m$ & \\textbf{TP} & \\textbf{TN} & \\textbf{FP} & \\textbf{FN} & \\textbf{Precision} & \\textbf{Recall} \\\\
+\\hline
+"""
+rel_table = """
+\\hline
+$d$ & \\textbf{TP} & \\textbf{TN} & \\textbf{FP} & \\textbf{FN} & \\textbf{Precision} & \\textbf{Recall} \\\\
 \\hline
 """
 
-for harshness in range(5):
-    for suffix in ["_loesing", "_abs", ""]:
-        if suffix == "_loesing":
-            if harshness > 0:
-                continue
-            else:
-                filename = f"data/blocked_loesing"
-        else:
-            filename = f"data/blocked_{harshness}{suffix}"
-        with open (filename, 'r') as f:
-            # obfs4 email bridges correctly identified as blocked
-            correct = 0
-
-            # obfs4 email bridges identified as blocked before they actually were
-            too_soon = 0
-
-            # non-obfs4-email bridges incorrectly identified as blocked
-            incorrect = 0
-
-            for line in f:
-                if line != "":
-                    line = line.strip()
-                    fingerprint = line[:40]
-                    date = int(line[41:])
-
-                    if fingerprint in email_bridges:
-                        if date >= FIRST_DAY:
-                            correct += 1
-                        else:
-                            too_soon += 1
+def accuracy (filename):
+    with open (filename, 'r') as f:
+        # obfs4 email bridges correctly identified as blocked
+        correct = 0
+
+        # obfs4 email bridges identified as blocked before they actually were
+        too_soon = 0
+
+        # non-obfs4-email bridges incorrectly identified as blocked
+        incorrect = 0
+
+        for line in f:
+            if line != "":
+                line = line.strip()
+                fingerprint = line[:40]
+                date = int(line[41:])
+
+                if fingerprint in email_bridges:
+                    if date >= FIRST_DAY:
+                        correct += 1
                     else:
-                        incorrect += 1
+                        too_soon += 1
+                else:
+                    incorrect += 1
+
+        tn = TOTAL_BRIDGES - OBFS4_EMAIL_BRIDGES - incorrect
+        tp = correct
+        fn = OBFS4_EMAIL_BRIDGES - correct - too_soon
+        fp = too_soon + incorrect
+
+        precision = sigfigs(tp / (tp + fp))
 
-            tn = TOTAL_BRIDGES - OBFS4_EMAIL_BRIDGES - incorrect
-            tp = correct
-            fn = OBFS4_EMAIL_BRIDGES - correct - too_soon
-            fp = too_soon + incorrect
+        recall = sigfigs(tp / (tp + fn))
 
-            precision = sigfigs(tp / (tp + fp))
+        return f"{tp} & {tn} & {fp} & {fn} & {precision} & {recall} \\\\\n"
 
-            recall = sigfigs(tp / (tp + fn))
+# Loesing
+loesing_table += accuracy ("data/blocked_loesing")
 
-            if suffix == "_loesing":
-                newline = f"{tp} & {tn} & {fp} & {fn} & {precision} & {recall} \\\\\n"
-            else:
-                newline = f"{harshness} & {tp} & {tn} & {fp} & {fn} & {precision} & {recall} \\\\\n"
+# Absolute threshold
+for t in range (8, 40, 8):
+    for m in range (t+8, 112, 8):
+        abs_table += f"{t} & {m} & " + accuracy (f"data/blocked_abs_{t}_{m}")
 
-            if suffix == "":
-                rel_table += newline
-            elif suffix == "_abs":
-                abs_table += newline
-            else:
-                loesing_table += newline
+# Relative threshold
+for d in range (8, 112, 8):
+    rel_table += f"{d} & " + accuracy (f"data/blocked_rel_{d}")
 
 print ("Loesing's algorithm:")
 print (loesing_table)

+ 2 - 2
scripts/get-stats.sh

@@ -1,7 +1,7 @@
 #!/bin/sh
 
 # If the blockages have not already been evaluted, do that now
-if [ ! -f data/blocked_0 ]; then
+if [ ! -f data/blocked_loesing ]; then
     echo "Running python code to evaluate blockages..."
     ./scripts/evaluate-blockages.py
 fi
@@ -13,7 +13,7 @@ if [ ! -f output ]; then
 fi
 
 echo "Tables:"
-head -33 output
+head -74 output
 
 echo -n "Number of bridges that received more than 8 connections: "
 grep '^Single: ' output | grep 'max=' | grep -v 'max=8' | wc -l