Просмотр исходного кода

Stats about non-email and non-obfs4 bridges

Vecna 4 недель назад
Родитель
Сommit
63da3a4381
4 измененных файлов с 133 добавлено и 1 удалено
  1. 7 0
      scripts/get-email-bridges.sh
  2. 117 0
      scripts/get-stats-non-obfs4-email.py
  3. 2 1
      scripts/get-stats.py
  4. 7 0
      scripts/get-stats.sh

+ 7 - 0
scripts/get-email-bridges.sh

@@ -20,6 +20,13 @@ for i in $(seq 1 21); do
         | grep -Po '(?<=:)(.*?)(?= )'
 done | sort | uniq > data/obfs4-email-bridges
 
+# Extract non-obfs4 email bridges
+for i in $(seq 1 21); do
+    grep " email " data/bridge-pool-assignments-2021-02/$(printf %02d $i)/* \
+        | grep -Po '(?<=:)(.*?)(?= )'
+done | sort | uniq > data/all-email-bridges
+comm -2 -3 data/all-email-bridges data/obfs4-email-bridges > data/non-obfs4-email-bridges
+
 # Get list of all bridges
 for i in $(seq 1 21); do
     grep -v "bridge-pool-assignment" data/bridge-pool-assignments-2021-02/$(printf %02d $i)/* \

+ 117 - 0
scripts/get-stats-non-obfs4-email.py

@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+
+import csv
+import numpy
+import os
+import sys
+
+
+# Starting day: February Nth
+#N = 1
+N = 20
+
+# See note in readme
+JAN_31 = 2459245
+
+# 2021 February Nth as Julian date
+FIRST_DAY = JAN_31 + N
+
+TOTAL_BRIDGES = 1890
+
+OBFS4_EMAIL_BRIDGES = 93
+
+email_bridges = set()
+non_obfs4_email_bridges = set()
+other_bridges = set()
+
+with open ("data/obfs4-email-bridges", 'r') as f:
+    for line in f:
+        if line != "":
+            email_bridges.add(line.strip())
+
+with open ("data/non-obfs4-email-bridges", 'r') as f:
+    for line in f:
+        if line != "":
+            non_obfs4_email_bridges.add(line.strip())
+
+with open ("data/all-bridges", 'r') as f:
+    for line in f:
+        if line != "":
+            bridge = line.strip()
+            if not bridge in email_bridges and not bridge in non_obfs4_email_bridges:
+                other_bridges.add(bridge)
+
+def max_counts (bridges):
+    other_bridge_data = []
+    other_bridge_max = []
+    other_bridge_fpr = []
+
+    for fingerprint in bridges:
+        # We're going to get all the data for each bridge
+        bridge_data = dict()
+        begun = False
+        max_count = 0
+
+        filename = f"data/bridge_data_cleaned/{fingerprint.upper()}"
+
+        if os.path.isfile(filename) and os.path.getsize(filename) > 0:
+            with open(filename, 'r') as csvfile:
+                data = csv.reader(csvfile, delimiter=',')
+
+                for line in data:
+
+                    # Ignore 0 values until we see a non-zero value
+                    if not begun:
+                        if line[1] != "0":
+                            begun = True
+
+                    if begun:
+                        date = int(line[0][:line[0].find(' ')])
+                        if date > FIRST_DAY:
+                            break
+                        val = int(line[1])
+                        bridge_data[date] = val
+                        max_count = max(max_count, val)
+
+                if begun:
+                    other_bridge_data.append(bridge_data)
+                    other_bridge_max.append(max_count)
+                    other_bridge_fpr.append(fingerprint)
+
+    max_overall_count = 0
+    for count in other_bridge_max:
+        if count > max_overall_count:
+            max_overall_count = count
+
+    if max_overall_count == 0:
+        print ("No bridges received any connections.")
+        return
+
+    # We want to count how many bridges have at least 8*i connections
+    at_least_count = [0] * (max_overall_count // 8)
+
+    # Note: We ignore index 0
+    for count in other_bridge_max:
+        for i in range(1, count//8):
+            at_least_count[i] += 1
+
+    last_value = at_least_count[1]
+    for i in range(1, len(at_least_count)):
+        count = at_least_count[i]
+        if i == len(at_least_count) - 1:
+            print (f"Bridges with at least {i*8} connections: {count}")
+        elif i == 1:
+            continue
+        elif count != last_value:
+            # print last number
+            print (f"Bridges with at least {(i-1)*8} connections: {at_least_count[i-1]}")
+        last_value = count
+
+non_obfs4_email_bridges = list(non_obfs4_email_bridges)
+other_bridges = list(other_bridges)
+
+print ("Max counts for bridges that were distributed by email but did not support obfs4:")
+max_counts (non_obfs4_email_bridges)
+
+print ("\nMax counts for bridges that were not distributed by email:")
+max_counts (other_bridges)

+ 2 - 1
scripts/get-stats.py

@@ -37,7 +37,6 @@ with open ("data/obfs4-email-bridges", 'r') as f:
         if line != "":
             email_bridges.add(line.strip())
 
-
 loesing_table = """
 \\hline
 \\textbf{TP} & \\textbf{TN} & \\textbf{FP} & \\textbf{FN} & \\textbf{Precision} & \\textbf{Recall} \\\\
@@ -139,6 +138,8 @@ for fingerprint in email_bridges:
 
                 if begun:
                     date = int(line[0][:line[0].find(' ')])
+                    if date > FIRST_DAY:
+                        break
                     val = int(line[1])
                     bridge_data[date] = val
                     max_count = max(max_count, val)

+ 7 - 0
scripts/get-stats.sh

@@ -15,6 +15,9 @@ fi
 echo "Tables:"
 head -74 output
 
+echo -n "Number of bridges that received more than 0 connections: "
+grep '^Single: ' output | grep 'max=' | wc -l
+
 echo -n "Number of bridges that received more than 8 connections: "
 grep '^Single: ' output | grep 'max=' | grep -v 'max=8' | wc -l
 
@@ -53,3 +56,7 @@ grep "^Double: Zero is " output | grep -v "^Double: Zero is 0." | wc -l
 
 echo -n "Max number of stddevs from 0: "
 grep "^Double: Zero is " output | grep -v "^Double: Zero is 0." | grep -Po '(?<=^Double: Zero is )(.*?)(?= standard deviations away from the mean)' | sort -r | head -1
+
+echo ""
+
+./scripts/get-stats-non-obfs4-email.py