Browse Source

Model both accessible bridge and blocked bridge in analysis

Vecna 1 year ago
parent
commit
31b27a291a
2 changed files with 295 additions and 20 deletions
  1. 91 19
      src/analysis.rs
  2. 204 1
      src/tests.rs

+ 91 - 19
src/analysis.rs

@@ -1,7 +1,7 @@
 use crate::{BridgeInfo, BridgeInfoType};
 use lox_library::proto::{level_up::LEVEL_INTERVAL, trust_promotion::UNTRUSTED_INTERVAL};
 use nalgebra::DVector;
-use statrs::distribution::{Continuous, MultivariateNormal};
+use statrs::distribution::{Continuous, MultivariateNormal, Normal};
 use std::{
     cmp::min,
     collections::{BTreeMap, HashSet},
@@ -302,22 +302,52 @@ impl Analyzer for NormalAnalyzer {
         let alpha = 1.0 - confidence;
 
         let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports]);
-        let bridge_ips_mean = mean_vec[0];
         let negative_reports_mean = mean_vec[1];
         let bridge_ips_sd = sd_vec[0];
         let negative_reports_sd = sd_vec[1];
 
+        // Artificially create data for alternative hypothesis
+        let num_days = bridge_ips.len() as usize;
+        let mut bridge_ips_blocked = vec![0; num_days];
+        let mut negative_reports_blocked = vec![0; num_days];
+        let bridge_ips_deviation = (2.0 * bridge_ips_sd).round() as u32;
+        for i in 0..num_days {
+            // Suppose bridge stats will go down by 2 SDs
+            bridge_ips_blocked[i] = if bridge_ips_deviation > bridge_ips[i] {
+                0
+            } else {
+                bridge_ips[i] - bridge_ips_deviation
+            };
+            // Suppose negative reports will go up by 2 SDs
+            negative_reports_blocked[i] =
+                negative_reports[i] + (2.0 * negative_reports_sd).round() as u32;
+        }
+        let (mean_vec_blocked, _sd_vec_blocked, cov_mat_blocked) =
+            Self::stats(&[&bridge_ips_blocked, &negative_reports_blocked]);
+
         let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
         let pdf = mvn.pdf(&DVector::from_vec(vec![
             bridge_ips_today as f64,
             negative_reports_today as f64,
         ]));
-        if pdf < alpha {
-            (negative_reports_today as f64) > negative_reports_mean + negative_reports_sd
-                || (bridge_ips_today as f64) < bridge_ips_mean - bridge_ips_sd
-        } else {
-            false
-        }
+
+        let mvn_blocked = MultivariateNormal::new(mean_vec_blocked, cov_mat_blocked).unwrap();
+        let pdf_blocked = mvn_blocked.pdf(&DVector::from_vec(vec![
+            bridge_ips_today as f64,
+            negative_reports_today as f64,
+        ]));
+
+        // Also model negative reports in isolation
+        let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
+        let nr_pdf = nr_normal.pdf(negative_reports_today as f64);
+        let nr_normal_blocked = Normal::new(
+            negative_reports_mean + 2.0 * negative_reports_sd,
+            negative_reports_sd,
+        )
+        .unwrap();
+        let nr_pdf_blocked = nr_normal_blocked.pdf(negative_reports_today as f64);
+
+        (pdf / pdf_blocked).ln() < alpha || (nr_pdf / nr_pdf_blocked).ln() < alpha
     }
 
     /// Evaluate invite-only bridge with lv3+ users submitting positive reports
@@ -339,25 +369,67 @@ impl Analyzer for NormalAnalyzer {
 
         let (mean_vec, sd_vec, cov_mat) =
             Self::stats(&[bridge_ips, negative_reports, positive_reports]);
-        let bridge_ips_mean = mean_vec[0];
         let negative_reports_mean = mean_vec[1];
-        let positive_reports_mean = mean_vec[2];
         let bridge_ips_sd = sd_vec[0];
         let negative_reports_sd = sd_vec[1];
         let positive_reports_sd = sd_vec[2];
 
+        // Artificially create data for alternative hypothesis
+        let num_days = bridge_ips.len() as usize;
+        let mut bridge_ips_blocked = vec![0; num_days];
+        let mut negative_reports_blocked = vec![0; num_days];
+        let mut positive_reports_blocked = vec![0; num_days];
+        let bridge_ips_deviation = (2.0 * bridge_ips_sd).round() as u32;
+        let positive_reports_deviation = (2.0 * positive_reports_sd).round() as u32;
+        for i in 0..num_days {
+            // Suppose positive reports will go down by 2 SDs
+            positive_reports_blocked[i] = if positive_reports_deviation > positive_reports[i] {
+                0
+            } else {
+                positive_reports[i] - positive_reports_deviation
+            };
+            // Suppose bridge stats will go down by 2 SDs
+            bridge_ips_blocked[i] = if bridge_ips_deviation > bridge_ips[i] {
+                0
+            } else {
+                bridge_ips[i] - bridge_ips_deviation
+            };
+            // Suppose each user who would have submitted a positive report but
+            // didn't submits a negative report instead.
+            negative_reports_blocked[i] =
+                negative_reports[i] + positive_reports[i] - positive_reports_blocked[i];
+        }
+        let (mean_vec_blocked, _sd_vec_blocked, cov_mat_blocked) = Self::stats(&[
+            &bridge_ips_blocked,
+            &negative_reports_blocked,
+            &positive_reports_blocked,
+        ]);
+
         let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
-        if mvn.pdf(&DVector::from_vec(vec![
+        let pdf = mvn.pdf(&DVector::from_vec(vec![
             bridge_ips_today as f64,
             negative_reports_today as f64,
             positive_reports_today as f64,
-        ])) < alpha
-        {
-            (negative_reports_today as f64) > negative_reports_mean + negative_reports_sd
-                || (bridge_ips_today as f64) < bridge_ips_mean - bridge_ips_sd
-                || (positive_reports_today as f64) < positive_reports_mean - positive_reports_sd
-        } else {
-            false
-        }
+        ]));
+
+        let mvn_blocked = MultivariateNormal::new(mean_vec_blocked, cov_mat_blocked).unwrap();
+        let pdf_blocked = mvn_blocked.pdf(&DVector::from_vec(vec![
+            bridge_ips_today as f64,
+            negative_reports_today as f64,
+            positive_reports_today as f64,
+        ]));
+
+        // Also model negative reports in isolation
+        let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
+        let nr_pdf = nr_normal.pdf(negative_reports_today as f64);
+        // Note we do NOT make this a function of positive signals
+        let nr_normal_blocked = Normal::new(
+            negative_reports_mean + 2.0 * negative_reports_sd,
+            negative_reports_sd,
+        )
+        .unwrap();
+        let nr_pdf_blocked = nr_normal_blocked.pdf(negative_reports_today as f64);
+
+        (pdf / pdf_blocked).ln() < alpha || (nr_pdf / nr_pdf_blocked).ln() < alpha
     }
 }

+ 204 - 1
src/tests.rs

@@ -1119,7 +1119,210 @@ fn test_analysis() {
         );
     }
 
-    // TODO: More tests
+    {
+        let mut date = get_date();
+
+        // New bridge info
+        let mut bridge_info = BridgeInfo::new([0; 20], &String::default());
+
+        bridge_info
+            .info_by_country
+            .insert("ru".to_string(), BridgeCountryInfo::new());
+        let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
+        let confidence = 0.95;
+
+        let mut blocking_countries = HashSet::<String>::new();
+
+        // No data today
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+
+        for i in 1..30 {
+            // 9-32 connections, 0-3 negative reports each day
+            date += 1;
+            bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+                BridgeInfoType::BridgeIps,
+                date,
+                8 * (i % 3 + 2),
+            );
+            bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+                BridgeInfoType::NegativeReports,
+                date,
+                i % 4,
+            );
+            assert_eq!(
+                blocked_in(&analyzer, &bridge_info, confidence, date),
+                blocking_countries
+            );
+        }
+
+        // Data similar to previous days:
+        // 24 connections, 2 negative reports
+        date += 1;
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::BridgeIps,
+            date,
+            24,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::NegativeReports,
+            date,
+            2,
+        );
+
+        // Should not be blocked because we have similar data.
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+
+        // Data different from previous days:
+        // 104 connections, 1 negative report
+        date += 1;
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::BridgeIps,
+            date,
+            104,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::NegativeReports,
+            date,
+            1,
+        );
+
+        // This should not be blocked even though it's very different because
+        // it's different in the good direction.
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+
+        // Data different from previous days:
+        // 800 connections, 12 negative reports
+        date += 1;
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::BridgeIps,
+            date,
+            800,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::NegativeReports,
+            date,
+            12,
+        );
+        blocking_countries.insert("ru".to_string());
+
+        // The censor artificially inflated bridge stats to prevent detection.
+        // Ensure we still detect the censorship from negative reports.
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+    }
+
+    {
+        let mut date = get_date();
+
+        // New bridge info
+        let mut bridge_info = BridgeInfo::new([0; 20], &String::default());
+
+        bridge_info
+            .info_by_country
+            .insert("ru".to_string(), BridgeCountryInfo::new());
+        let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
+        let confidence = 0.95;
+
+        let mut blocking_countries = HashSet::<String>::new();
+
+        // No data today
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+
+        for i in 1..30 {
+            // 9-32 connections, 0-3 negative reports each day
+            date += 1;
+            bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+                BridgeInfoType::BridgeIps,
+                date,
+                8 * (i % 3 + 2),
+            );
+            bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+                BridgeInfoType::NegativeReports,
+                date,
+                i % 4,
+            );
+            assert_eq!(
+                blocked_in(&analyzer, &bridge_info, confidence, date),
+                blocking_countries
+            );
+        }
+
+        // Data similar to previous days:
+        // 24 connections, 2 negative reports
+        date += 1;
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::BridgeIps,
+            date,
+            24,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::NegativeReports,
+            date,
+            2,
+        );
+
+        // Should not be blocked because we have similar data.
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+
+        // Data different from previous days:
+        // 104 connections, 1 negative report
+        date += 1;
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::BridgeIps,
+            date,
+            104,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::NegativeReports,
+            date,
+            1,
+        );
+
+        // This should not be blocked even though it's very different because
+        // it's different in the good direction.
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+
+        // Data different from previous days:
+        // 0 connections, 0 negative reports
+        date += 1;
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::BridgeIps,
+            date,
+            0,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::NegativeReports,
+            date,
+            0,
+        );
+        blocking_countries.insert("ru".to_string());
+
+        // This should be blocked because it's different in the bad direction.
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+    }
 
     // TODO: Test stage 3 analysis
 }