hai 4 meses · 8d386a5f85
--- a/data/bridge-extra-infos.sha256
+++ b/data/bridge-extra-infos.sha256
@@ -0,0 +1,10 @@
 
				+6fa2aafdf9e3d89bc16dd1e3b74180c58df2de8c8949d7825644467d0e9efa77  data/bridge-extra-infos-2020-07.tar.xz
			
 
				+7197feecdb2231a3b889ec46bb4b8172e2f1cdba4581beb571fcaa10faa27df4  data/bridge-extra-infos-2020-08.tar.xz
			
 
				+ad38ac703aab6cd4f9224b72e0678eeef0b07dbd5c4d192c28e62a53871f87cd  data/bridge-extra-infos-2020-09.tar.xz
			
 
				+753771d113cfdd40a698ae56d199e757c147a2a1c3556bca0a0fd78e61c0b2c7  data/bridge-extra-infos-2020-10.tar.xz
			
 
				+4152439f604abb15ff5d9b878f5a5b4dc17adbab63a414cb37e9b851af6645c0  data/bridge-extra-infos-2020-11.tar.xz
			
 
				+125fccc56b7ffa068b6c6de74124319ce14d401685274d1f877807d299ebb8f3  data/bridge-extra-infos-2020-12.tar.xz
			
 
				+165bc89b4aed09acf08632a0aafbeb7b9c3277a6d158752ee25fdd0f370bcef6  data/bridge-extra-infos-2021-01.tar.xz
			
 
				+f4f91383728afb2e4bfcf699e11e79874b5d77bfdb4d125bd0b595fd8c21181c  data/bridge-extra-infos-2021-02.tar.xz
			
 
				+b0f278962fca4b9e7380e0b749948313bc7bdf169b48b58421064d3a9cee8909  data/bridge-extra-infos-2021-03.tar.xz
			
 
				+be1ff1eb2828d9ddbc7a775ea8501fdfc62274a63ccf55c61b749d5a1d56a1ca  data/bridge-extra-infos-2021-04.tar.xz
			
--- a/data/bridge-pool-assignments.sha256
+++ b/data/bridge-pool-assignments.sha256
@@ -0,0 +1 @@
 
				+502b09078fc04567fd4a26860480d027d8816fcd904832375a38274982d4d3d6  data/bridge-pool-assignments-2021-02.tar.xz
			
--- a/data/bridge_data.tar.gz
+++ b/data/bridge_data.tar.gz
--- a/run.sh
+++ b/run.sh
@@ -0,0 +1,32 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+# Check that dependencies are installed before going further
			
 
				+if ! command -v curl; then
			
 
				+    echo "This script needs curl to be installed."
			
 
				+    exit 1
			
 
				+fi
			
 
				+if ! command -v python3; then
			
 
				+    echo "This script needs python3 to be installed."
			
 
				+    exit 1
			
 
				+fi
			
 
				+./scripts/check-python-deps.py || echo "This script needs numpy to be installed." && exit 1
			
 
				+
			
 
				+# Get bridge data
			
 
				+if [ "$1" == "--fast" ]; then
			
 
				+    echo "Extracting some pre-processed data..."
			
 
				+    cd data && tar xf bridge_data.tar.gz && cd ..
			
 
				+else
			
 
				+    echo "Downloading and processing data from step 1..."
			
 
				+    echo "This will take quite a long time (around 12.5 hours on my device)"
			
 
				+    echo "and require a few GB of free space while running."
			
 
				+    ./scripts/get-bridge-data.sh
			
 
				+fi
			
 
				+
			
 
				+# Get list of email-distributed bridges
			
 
				+./scripts/get-email-bridges.sh
			
 
				+
			
 
				+# Clean up bridge data for the format we want
			
 
				+./scripts/clean-bridge-data.sh
			
 
				+
			
 
				+# Evaluate blockages and get stats
			
 
				+./scripts/get-stats.sh
			
--- a/scripts/check-python-deps.py
+++ b/scripts/check-python-deps.py
@@ -0,0 +1,6 @@
 
				+#!/usr/bin/env python3
			
 
				+
			
 
				+# This script doesn't do anything, but it tries to import the needed
			
 
				+# dependencies for other scripts in this project, to ensure they're all
			
 
				+# available.
			
 
				+import csv, numpy, os, sys
			
--- a/scripts/clean-bridge-data.sh
+++ b/scripts/clean-bridge-data.sh
@@ -0,0 +1,32 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+# If we haven't already extracted our set of bridges distributed in 2021
			
 
				+# Feb, do that now.
			
 
				+if [ ! -f data/all-bridges ]; then
			
 
				+    echo "Getting list of bridges distributed in 2021 February"
			
 
				+    ./scripts/get-email-bridges.sh
			
 
				+fi
			
 
				+
			
 
				+# Clean bridge data (sort, remove duplicates)
			
 
				+if [ ! -d data/bridge_data_cleaned ]; then
			
 
				+    echo "Cleaning data for bridges distributed in 2021 February"
			
 
				+    mkdir data/bridge_data_cleaned
			
 
				+    while read fpr; do
			
 
				+        fpr=$(echo -n "$fpr" | tr '[:lower:]' '[:upper:]')
			
 
				+        if [ -n "$fpr" ]; then
			
 
				+            if [ -f data/bridge_data/${fpr} ]; then
			
 
				+                # Get only highest number of observed connections for each day
			
 
				+                pref="placeholder"
			
 
				+                sort -r -n -k1.1,1.7 -k1.9 data/bridge_data/${fpr} \
			
 
				+                    | while read line; do
			
 
				+                    if [[ "$line" != "$pref"* ]]; then
			
 
				+                        echo "$line"
			
 
				+                        pref="${line:0:7}"
			
 
				+                    fi
			
 
				+                done | sort > data/bridge_data_cleaned/${fpr}
			
 
				+            else
			
 
				+                echo "No data/bridge_data/${fpr}"
			
 
				+            fi
			
 
				+        fi
			
 
				+    done < data/all-bridges
			
 
				+fi
			
--- a/scripts/evaluate-blockages.py
+++ b/scripts/evaluate-blockages.py
@@ -0,0 +1,73 @@
 
				+#!/usr/bin/env python3
			
 
				+
			
 
				+import csv
			
 
				+import os
			
 
				+
			
 
				+# If abs is True, we need to have observed more than 32 connections on
			
 
				+# some day to be considered blocked.
			
 
				+def is_blocked (harshness, bridge_ips_max, bridge_ips_today, abs, no_min):
			
 
				+    if bridge_ips_max is None:
			
 
				+        return False
			
 
				+
			
 
				+    threshold = 8 * harshness
			
 
				+
			
 
				+    if abs:
			
 
				+        # absolute threshold
			
 
				+        return bridge_ips_today <= threshold and (no_min or bridge_ips_max > 32)
			
 
				+    else:
			
 
				+        # relative threshold based on prior connection counts
			
 
				+        return bridge_ips_today <= threshold and (no_min or bridge_ips_today <= bridge_ips_max - 32 + threshold)
			
 
				+
			
 
				+def evaluate (harshness, fingerprint, abs=True, no_min=False):
			
 
				+    fingerprint = fingerprint.upper()
			
 
				+    bridge_ips_max = None
			
 
				+
			
 
				+    with open (f"data/bridge_data_cleaned/{fingerprint}", 'r') as file:
			
 
				+        bridge_data = csv.reader(file, delimiter=',')
			
 
				+
			
 
				+        for row in bridge_data:
			
 
				+            bridge_ips_today = int(row[1])
			
 
				+
			
 
				+            if not bridge_ips_max is None:
			
 
				+                if is_blocked (harshness, bridge_ips_max, bridge_ips_today, abs, no_min):
			
 
				+                    return row[0]
			
 
				+
			
 
				+            # Start bridge_ips_max only when we have a non-zero
			
 
				+            # connection count
			
 
				+            elif bridge_ips_today > 0:
			
 
				+                bridge_ips_max = bridge_ips_today
			
 
				+
			
 
				+        # If we got here, the bridge is not blocked
			
 
				+        return None
			
 
				+
			
 
				+# Remove any previous blocked_* files, start over
			
 
				+for i in range(5):
			
 
				+    if os.path.exists (f"data/blocked_{i}"):
			
 
				+        os.remove (f"data/blocked_{i}")
			
 
				+    if os.path.exists (f"data/blocked_{i}_abs"):
			
 
				+        os.remove (f"data/blocked_{i}_abs")
			
 
				+    if os.path.exists (f"data/blocked_{i}_nomin"):
			
 
				+        os.remove (f"data/blocked_{i}_nomin")
			
 
				+
			
 
				+with open ("data/all-bridges", 'r') as all_bridges:
			
 
				+    for fingerprint in all_bridges:
			
 
				+        fingerprint = fingerprint.strip()
			
 
				+        if fingerprint:
			
 
				+            # Go through all harshness values
			
 
				+            for harshness in range(5):
			
 
				+                blocked = evaluate (harshness, fingerprint, False)
			
 
				+                # If the bridge is blocked add its fingerprint and
			
 
				+                # blocked date to the list for that harshness level
			
 
				+                if not blocked is None:
			
 
				+                    with open (f"data/blocked_{harshness}", 'a') as f:
			
 
				+                        f.write(f"{fingerprint},{blocked}\n")
			
 
				+
			
 
				+                blocked = evaluate (harshness, fingerprint, True)
			
 
				+                if not blocked is None:
			
 
				+                    with open (f"data/blocked_{harshness}_abs", 'a') as f:
			
 
				+                        f.write(f"{fingerprint},{blocked}\n")
			
 
				+
			
 
				+                blocked = evaluate (harshness, fingerprint, False, True)
			
 
				+                if not blocked is None:
			
 
				+                    with open (f"data/blocked_{harshness}_nomin", 'a') as f:
			
 
				+                        f.write(f"{fingerprint},{blocked}\n")
			
--- a/scripts/get-bridge-data.sh
+++ b/scripts/get-bridge-data.sh
@@ -0,0 +1,74 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+# Populate array of months we care about
			
 
				+months=()
			
 
				+# 2020
			
 
				+for i in $(seq 7 12); do
			
 
				+    months+=( 2020-$(printf %02d $i) )
			
 
				+done
			
 
				+# 2021
			
 
				+for i in $(seq 1 4); do
			
 
				+    months+=( 2021-$(printf %02d $i) )
			
 
				+done
			
 
				+
			
 
				+# Download the archives if we don't have them already
			
 
				+for i in ${months[@]}; do
			
 
				+    if [ ! -f data/bridge-extra-infos-${i}.tar.xz ]; then
			
 
				+        curl -Lo data/bridge-extra-infos-${i}.tar.xz https://collector.torproject.org/archive/bridge-descriptors/extra-infos/bridge-extra-infos-${i}.tar.xz || exit 1
			
 
				+    fi
			
 
				+done
			
 
				+
			
 
				+# Check that we have the right archives
			
 
				+sha256sum -c data/bridge-extra-infos.sha256 || exit 1
			
 
				+
			
 
				+# If we haven't already extracted the bridge data, then do so. This will
			
 
				+# take a long time (around 12.5 hours on my device) because it needs to
			
 
				+# process around 3 million small files, and it will require a few GB of
			
 
				+# free space while running. In the end, this results in about 91 MB of
			
 
				+# bridge data that we care about.
			
 
				+if [ ! -d data/bridge_data ]; then
			
 
				+
			
 
				+    cd data
			
 
				+
			
 
				+    # This is around 20 GB of data uncompressed, so don't extract it all
			
 
				+    # at once. Instead, extract and process one month at a time.
			
 
				+    for i in ${months[@]}; do
			
 
				+        if [ ! -d bridge-extra-infos-${i} ]; then
			
 
				+            echo "Extracting bridge-extras-infos-${i}.tar.xz"
			
 
				+            tar xf bridge-extra-infos-${i}.tar.xz || exit 1
			
 
				+        fi
			
 
				+
			
 
				+        echo "Processing bridge-extra-infos-${i}"
			
 
				+        for j in bridge-extra-infos-${i}/*; do
			
 
				+            for k in ${j}/*; do
			
 
				+                for l in ${k}/*; do
			
 
				+                    if [[ -s "${k}" ]]; then
			
 
				+                        fingerprint=$(grep -Po '(?<=^extra-info )(.*)(?=$)' "${l}" | grep -Po '(?<= )(.*)(?=$)')
			
 
				+                        date=$(grep -Po '(?<=^published )(.*)(?= )' "${l}")
			
 
				+                        # Convert to Julian date, thanks to
			
 
				+                        # https://stackoverflow.com/a/43318209
			
 
				+                        date_julian=$(( $(date +%s -d "${date}") / 86400 + 2440587 ))
			
 
				+                        count=$(grep -Po '(?<=^bridge-ips )(.*)(?=$)' "${l}" | grep -Po '(?<=by=)(.*?)(?=(,|$))')
			
 
				+                        if [ -z "$count" ]; then
			
 
				+                            count=0
			
 
				+                        fi
			
 
				+
			
 
				+                        if [[ -n "${date_julian}" && -n "${fingerprint}" ]]; then
			
 
				+                            echo "${date_julian},${count}" >> bridge_data/${fingerprint}
			
 
				+                        else
			
 
				+                            echo "Error for ${l}"
			
 
				+                            echo "    fingerprint: ${fingerprint}"
			
 
				+                            echo "    date:        ${date_julian}"
			
 
				+                            echo "    count:       ${count}"
			
 
				+                        fi
			
 
				+                    fi
			
 
				+                done
			
 
				+            done
			
 
				+        done
			
 
				+
			
 
				+        echo "Removing bridge-extra-infos-${i} directory to free up space"
			
 
				+        rm -r bridge-extra-infos-${i}
			
 
				+    done
			
 
				+
			
 
				+    cd ..
			
 
				+fi
			
--- a/scripts/get-email-bridges.sh
+++ b/scripts/get-email-bridges.sh
@@ -0,0 +1,42 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+# If we don't already have the archive, download it
			
 
				+if [ ! -f data/bridge-pool-assignments-2021-02.tar.xz ]; then
			
 
				+    curl -Lo data/bridge-pool-assignments-2021-02.tar.xz https://collector.torproject.org/archive/bridge-pool-assignments/bridge-pool-assignments-2021-02.tar.xz
			
 
				+fi
			
 
				+
			
 
				+# Check that we have the right archive
			
 
				+sha256sum -c data/bridge-pool-assignments.sha256 || exit 1
			
 
				+
			
 
				+# If we haven't already extracted the archive, extract it
			
 
				+if [ ! -d data/bridge-pool-assignments-2021-02 ]; then
			
 
				+    cd data && tar xf bridge-pool-assignments-2021-02.tar.xz && cd ..
			
 
				+fi
			
 
				+
			
 
				+# Extract obfs4 email bridges
			
 
				+for i in $(seq 1 21); do
			
 
				+    grep " email " data/bridge-pool-assignments-2021-02/$(printf %02d $i)/* \
			
 
				+        | grep "obfs4" \
			
 
				+        | grep -Po '(?<=:)(.*?)(?= )'
			
 
				+done | sort | uniq > data/obfs4-email-bridges
			
 
				+
			
 
				+# Get list of all bridges
			
 
				+for i in $(seq 1 21); do
			
 
				+    grep -v "bridge-pool-assignment" data/bridge-pool-assignments-2021-02/$(printf %02d $i)/* \
			
 
				+        | grep -Po '(?<=:)(.*?)(?= )'
			
 
				+done | sort | uniq > data/all-bridges
			
 
				+
			
 
				+# Count bridges in each category
			
 
				+all_bridges=$(cat data/all-bridges | wc -l)
			
 
				+obfs4_email_bridges=$(cat data/obfs4-email-bridges | wc -l)
			
 
				+email_bridges=$(for i in $(seq 1 21); do grep -v "bridge-pool-assignment" data/bridge-pool-assignments-2021-02/$(printf %02d $i)/* | grep "email" | grep -Po '(?<=:)(.*?)(?= )';done | sort | uniq | wc -l)
			
 
				+https_bridges=$(for i in $(seq 1 21); do grep -v "bridge-pool-assignment" data/bridge-pool-assignments-2021-02/$(printf %02d $i)/* | grep "https" | grep -Po '(?<=:)(.*?)(?= )';done | sort | uniq | wc -l)
			
 
				+moat_bridges=$(for i in $(seq 1 21); do grep -v "bridge-pool-assignment" data/bridge-pool-assignments-2021-02/$(printf %02d $i)/* | grep "moat" | grep -Po '(?<=:)(.*?)(?= )';done | sort | uniq | wc -l)
			
 
				+unallocated_bridges=$(for i in $(seq 1 21); do grep -v "bridge-pool-assignment" data/bridge-pool-assignments-2021-02/$(printf %02d $i)/* | grep "unallocated" | grep -Po '(?<=:)(.*?)(?= )';done | sort | uniq | wc -l)
			
 
				+
			
 
				+echo "Total number of bridges: ${all_bridges}"
			
 
				+echo "Number of obfs4 email bridges: ${obfs4_email_bridges}"
			
 
				+echo "Number of email bridges: ${email_bridges}"
			
 
				+echo "Number of HTTPS bridges: ${https_bridges}"
			
 
				+echo "Number of moat bridges: ${moat_bridges}"
			
 
				+echo "Number of unallocated bridges: ${unallocated_bridges}"
			
--- a/scripts/get-stats.py
+++ b/scripts/get-stats.py
@@ -0,0 +1,217 @@
 
				+#!/usr/bin/env python3
			
 
				+
			
 
				+import csv
			
 
				+import numpy
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+# Starting day: February Nth
			
 
				+#N = 1
			
 
				+N = 20
			
 
				+
			
 
				+# See note in readme
			
 
				+JAN_31 = 2459245
			
 
				+
			
 
				+# 2021 February Nth as Julian date
			
 
				+FIRST_DAY = JAN_31 + N
			
 
				+
			
 
				+TOTAL_BRIDGES = 1890
			
 
				+
			
 
				+OBFS4_EMAIL_BRIDGES = 93
			
 
				+
			
 
				+def sigfigs(n):
			
 
				+    if n == 0.0:
			
 
				+        n = 0 # as an int
			
 
				+    else:
			
 
				+        i=0
			
 
				+        while n * (10**i) < 1:
			
 
				+            i += 1
			
 
				+        n = round(n * 10**i) / 10**i
			
 
				+
			
 
				+    return n
			
 
				+
			
 
				+email_bridges = set()
			
 
				+
			
 
				+with open ("data/obfs4-email-bridges", 'r') as f:
			
 
				+    for line in f:
			
 
				+        if line != "":
			
 
				+            email_bridges.add(line.strip())
			
 
				+
			
 
				+rel_table = """
			
 
				+\\hline
			
 
				+$h$ & \\textbf{TP} & \\textbf{TN} & \\textbf{FP} & \\textbf{FN} & \\textbf{Precision} & \\textbf{Recall} \\\\
			
 
				+\\hline
			
 
				+"""
			
 
				+abs_table = rel_table
			
 
				+nomin_table = rel_table
			
 
				+
			
 
				+for harshness in range(5):
			
 
				+    for suffix in ["", "_abs", "_nomin"]:
			
 
				+        with open (f"data/blocked_{harshness}{suffix}", 'r') as f:
			
 
				+            # obfs4 email bridges correctly identified as blocked
			
 
				+            correct = 0
			
 
				+
			
 
				+            # obfs4 email bridges identified as blocked before they actually were
			
 
				+            too_soon = 0
			
 
				+
			
 
				+            # non-obfs4-email bridges incorrectly identified as blocked
			
 
				+            incorrect = 0
			
 
				+
			
 
				+            for line in f:
			
 
				+                if line != "":
			
 
				+                    line = line.strip()
			
 
				+                    fingerprint = line[:40]
			
 
				+                    date = int(line[41:])
			
 
				+
			
 
				+                    if fingerprint in email_bridges:
			
 
				+                        if date >= FIRST_DAY:
			
 
				+                            correct += 1
			
 
				+                        else:
			
 
				+                            too_soon += 1
			
 
				+                    else:
			
 
				+                        incorrect += 1
			
 
				+
			
 
				+            tn = TOTAL_BRIDGES - OBFS4_EMAIL_BRIDGES - incorrect
			
 
				+            tp = correct
			
 
				+            fn = OBFS4_EMAIL_BRIDGES - correct - too_soon
			
 
				+            fp = too_soon + incorrect
			
 
				+
			
 
				+            precision = sigfigs(tp / (tp + fp))
			
 
				+
			
 
				+            recall = sigfigs(tp / (tp + fn))
			
 
				+
			
 
				+            newline = f"{harshness} & {tp} & {tn} & {fp} & {fn} & {precision} & {recall} \\\\\n"
			
 
				+
			
 
				+            if suffix == "":
			
 
				+                rel_table += newline
			
 
				+            elif suffix == "_abs":
			
 
				+                abs_table += newline
			
 
				+            else:
			
 
				+                nomin_table += newline
			
 
				+
			
 
				+print ("Absolute threshold without a minimum:")
			
 
				+print (nomin_table)
			
 
				+
			
 
				+print ("Absolute threshold:")
			
 
				+print (abs_table)
			
 
				+
			
 
				+print ("Relative threshold:")
			
 
				+print (rel_table)
			
 
				+
			
 
				+
			
 
				+# Now let's look at stddevs
			
 
				+
			
 
				+email_bridges = list(email_bridges)
			
 
				+email_bridge_data = []
			
 
				+email_bridge_max = []
			
 
				+
			
 
				+for fingerprint in email_bridges:
			
 
				+    # We're going to get all the data for each bridge
			
 
				+    bridge_data = dict()
			
 
				+    begun = False
			
 
				+    max_count = 0
			
 
				+
			
 
				+    filename = f"data/bridge_data_cleaned/{fingerprint.upper()}"
			
 
				+
			
 
				+    if os.path.isfile(filename) and os.path.getsize(filename) > 0:
			
 
				+        with open(filename, 'r') as csvfile:
			
 
				+            data = csv.reader(csvfile, delimiter=',')
			
 
				+
			
 
				+            for line in data:
			
 
				+
			
 
				+                # Ignore 0 values until we see a non-zero value
			
 
				+                if not begun:
			
 
				+                    if line[1] != "0":
			
 
				+                        begun = True
			
 
				+
			
 
				+                if begun:
			
 
				+                    date = int(line[0][:line[0].find(' ')])
			
 
				+                    val = int(line[1])
			
 
				+                    bridge_data[date] = val
			
 
				+                    max_count = max(max_count, val)
			
 
				+
			
 
				+            if begun:
			
 
				+                email_bridge_data.append(bridge_data)
			
 
				+                email_bridge_max.append(max_count)
			
 
				+
			
 
				+# Look at bridges individually
			
 
				+for i in range(len(email_bridge_data)):
			
 
				+    bridge = email_bridge_data[i]
			
 
				+
			
 
				+    vals = []
			
 
				+
			
 
				+    # Get smallest key, i.e., first date we have data for
			
 
				+    start_date = min(bridge)
			
 
				+
			
 
				+    # Get counts before censorship started
			
 
				+    #for d in range(start_date, FIRST_DAY):
			
 
				+    #    if d in bridge:
			
 
				+    #        vals.append(bridge[d])
			
 
				+        # If this day is not represented, the bridge did not report
			
 
				+        # stats; this is different from 0.
			
 
				+
			
 
				+    # Note: This is cheaper than the above impelmentation.
			
 
				+    for date, val in bridge.items():
			
 
				+        if date < FIRST_DAY:
			
 
				+            vals.append(val)
			
 
				+
			
 
				+    # If we have no data, don't worry about it
			
 
				+    if len(vals) == 0:
			
 
				+        continue
			
 
				+
			
 
				+    mu = numpy.mean(vals)
			
 
				+    sigma = numpy.std(vals)
			
 
				+
			
 
				+    if sigma > 0:
			
 
				+        print (f"Single: Bridge {i}: max={email_bridge_max[i]}, mean={mu}, std={sigma}")
			
 
				+        print (f"Single: Zero is {mu / sigma} standard deviations away from the mean ({mu})")
			
 
				+        print (f"Single: We are looking at data from {len(vals)} days, starting on {start_date}\n")
			
 
				+
			
 
				+# Look at pairs of bridges
			
 
				+for i in range(len(email_bridge_data)):
			
 
				+    for j in range(i+1, len(email_bridge_data)):
			
 
				+        max_count = 0
			
 
				+
			
 
				+        bridge_i = email_bridge_data[i]
			
 
				+        bridge_j = email_bridge_data[j]
			
 
				+
			
 
				+        vals = []
			
 
				+
			
 
				+        # Get smallest key, i.e., the first date BOTH bridges have data for
			
 
				+        start_date = max(min(bridge_i), min(bridge_j))
			
 
				+
			
 
				+        # Get counts before censorship started
			
 
				+        #for d in range(start_date, FIRST_DAY):
			
 
				+
			
 
				+        # Get set of keys between start_date and FIRST_DAY
			
 
				+        keys = set()
			
 
				+        for d in bridge_i:
			
 
				+            if d >= start_date and d <= FIRST_DAY:
			
 
				+                keys.add(d)
			
 
				+        for d in bridge_j:
			
 
				+            if d >= start_date and d <= FIRST_DAY:
			
 
				+                keys.add(d)
			
 
				+
			
 
				+        for d in keys:
			
 
				+            val = 0
			
 
				+            if d in bridge_i and d in bridge_j:
			
 
				+                val = bridge_i[d] + bridge_j[d]
			
 
				+            elif d in bridge_i:
			
 
				+                val = bridge_i[d]
			
 
				+            elif d in bridge_j:
			
 
				+                val = bridge_j[d]
			
 
				+
			
 
				+            vals.append(val)
			
 
				+            max_count = max(max_count, val)
			
 
				+
			
 
				+        # If we have no data, don't worry about it
			
 
				+        if len(vals) == 0:
			
 
				+            continue
			
 
				+
			
 
				+        mu = numpy.mean(vals)
			
 
				+        sigma = numpy.std(vals)
			
 
				+
			
 
				+        if sigma > 0:
			
 
				+            print (f"Double: Bridges {i} and {j}: max={max_count}, mean={mu}, std={sigma}")
			
 
				+            print (f"Double: Zero is {mu / sigma} standard deviations away from the mean ({mu})")
			
 
				+            print (f"Double: We are looking at data from {len(vals)} days, starting on {start_date}\n")
			
--- a/scripts/get-stats.sh
+++ b/scripts/get-stats.sh
@@ -0,0 +1,55 @@
 
				+#!/bin/sh
			
 
				+
			
 
				+# If the blockages have not already been evaluted, do that now
			
 
				+if [ ! -f data/blocked_0 ]; then
			
 
				+    echo "Running python code to evaluate blockages..."
			
 
				+    ./scripts/evaluate-blockages.py
			
 
				+fi
			
 
				+
			
 
				+# Do the actual math in python
			
 
				+if [ ! -f output ]; then
			
 
				+    echo "Running python code to compute stats..."
			
 
				+    ./scripts/get-stats.py > output
			
 
				+fi
			
 
				+
			
 
				+echo "Tables:"
			
 
				+head -33 output
			
 
				+
			
 
				+echo -n "Number of bridges that received more than 8 connections: "
			
 
				+grep '^Single: ' output | grep 'max=' | grep -v 'max=8' | wc -l
			
 
				+
			
 
				+echo -n "Number of bridges that received more than 16 connections: "
			
 
				+grep '^Single: ' output | grep 'max=' | grep -v 'max=8' | grep -v 'max=16' | wc -l
			
 
				+
			
 
				+echo -n "Number of bridges that received more than 24 connections: "
			
 
				+grep '^Single: ' output | grep 'max=' | grep -v 'max=8' | grep -v 'max=16' | grep -v 'max=24' | wc -l
			
 
				+
			
 
				+echo ""
			
 
				+
			
 
				+echo -n "Number of bridges with connection count mean more than 1 stddev away from 0: "
			
 
				+grep "^Single: Zero is " output | grep -v "^Single: Zero is 0." | wc -l
			
 
				+
			
 
				+echo -n "Max number of stddevs from 0: "
			
 
				+grep "^Single: Zero is " output | grep -v "^Single: Zero is 0." | grep -Po '(?<=^Single: Zero is )(.*?)(?= standard deviations away from the mean)' | sort -r | head -1
			
 
				+
			
 
				+echo ""
			
 
				+
			
 
				+echo -n "Number of pairs of bridges that received more than 8 connections: "
			
 
				+grep '^Double: ' output | grep 'max=' | grep -v 'max=8' | wc -l
			
 
				+
			
 
				+echo -n "Number of pairs of bridges that received more than 16 connections: "
			
 
				+grep '^Double: ' output | grep 'max=' | grep -v 'max=8' | grep -v 'max=16' | wc -l
			
 
				+
			
 
				+echo -n "Number of pairs of bridges that received more than 24 connections: "
			
 
				+grep '^Double: ' output | grep 'max=' | grep -v 'max=8' | grep -v 'max=16' | grep -v 'max=24' | wc -l
			
 
				+
			
 
				+echo -n "Number of pairs of bridges that received more than 32 connections: "
			
 
				+grep '^Double: ' output | grep 'max=' | grep -v 'max=8' | grep -v 'max=16' | grep -v 'max=24' | grep -v 'max=32' | wc -l
			
 
				+
			
 
				+echo ""
			
 
				+
			
 
				+echo -n "Number of pairs of bridges with connection count mean more than 1 stddev away from 0: "
			
 
				+grep "^Double: Zero is " output | grep -v "^Double: Zero is 0." | wc -l
			
 
				+
			
 
				+echo -n "Max number of stddevs from 0: "
			
 
				+grep "^Double: Zero is " output | grep -v "^Double: Zero is 0." | grep -Po '(?<=^Double: Zero is )(.*?)(?= standard deviations away from the mean)' | sort -r | head -1
		`@@ -0,0 +1 @@`
		`+502b09078fc04567fd4a26860480d027d8816fcd904832375a38274982d4d3d6 data/bridge-pool-assignments-2021-02.tar.xz`