소스 검색

Refactor Analyzer trait

Vecna 1 개월 전
부모
커밋
1a8b64080d
4개의 변경된 파일350개의 추가작업 그리고 271개의 파일을 삭제
  1. 346 0
      src/analysis.rs
  2. 0 267
      src/analyzer.rs
  3. 1 1
      src/bin/server.rs
  4. 3 3
      src/lib.rs

+ 346 - 0
src/analysis.rs

@@ -0,0 +1,346 @@
+use crate::{get_date, BridgeInfo, BridgeInfoType};
+use lox_library::proto::{level_up::LEVEL_INTERVAL, trust_promotion::UNTRUSTED_INTERVAL};
+use nalgebra::DVector;
+use statrs::distribution::{Continuous, MultivariateNormal};
+use std::{
+    cmp::min,
+    collections::{BTreeMap, HashSet},
+};
+
+/// Provides a function for predicting which countries block this bridge
+pub trait Analyzer {
+    /// Evaluate open-entry bridge. Returns true if blocked, false otherwise.
+    fn stage_one(
+        &self,
+        confidence: f64,
+        bridge_ips: &[u32],
+        bridge_ips_today: u32,
+        negative_reports: &[u32],
+        negative_reports_today: u32,
+    ) -> bool;
+
+    /// Evaluate invite-only bridge without positive reports. Return true if
+    /// blocked, false otherwise.
+    fn stage_two(
+        &self,
+        confidence: f64,
+        bridge_ips: &[u32],
+        bridge_ips_today: u32,
+        negative_reports: &[u32],
+        negative_reports_today: u32,
+    ) -> bool;
+
+    /// Evaluate invite-only bridge with positive reports. Return true if
+    /// blocked, false otherwise.
+    fn stage_three(
+        &self,
+        confidence: f64,
+        bridge_ips: &[u32],
+        bridge_ips_today: u32,
+        negative_reports: &[u32],
+        negative_reports_today: u32,
+        positive_reports: &[u32],
+        positive_reports_today: u32,
+    ) -> bool;
+}
+
+/// Accepts an analyzer, information about a bridge, and a confidence value.
+/// Returns a set of country codes where the bridge is believed to be blocked.
+pub fn blocked_in(
+    analyzer: &dyn Analyzer,
+    bridge_info: &BridgeInfo,
+    confidence: f64,
+) -> HashSet<String> {
+    // TODO: Re-evaluate past days if we have backdated reports
+    let mut blocked_in = HashSet::<String>::new();
+    let today = get_date();
+    let age = bridge_info.first_seen - today;
+    for (country, info) in &bridge_info.info_by_country {
+        if info.blocked {
+            // Assume bridges never become unblocked
+            blocked_in.insert(country.to_string());
+        } else {
+            // Get today's values
+            let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
+            // TODO: Evaluate on yesterday if we don't have data for today?
+            let today_info = match info.info_by_day.get(&today) {
+                Some(v) => v,
+                None => &new_map_binding,
+            };
+            let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) {
+                Some(v) => *v,
+                None => 0,
+            };
+            let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports) {
+                Some(v) => *v,
+                None => 0,
+            };
+            let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports) {
+                Some(v) => *v,
+                None => 0,
+            };
+
+            let num_days = min(age, UNTRUSTED_INTERVAL);
+
+            // Get time series for last num_days
+            let mut bridge_ips = vec![0; num_days as usize];
+            let mut negative_reports = vec![0; num_days as usize];
+            let mut positive_reports = vec![0; num_days as usize];
+
+            for i in 0..num_days {
+                let date = today - num_days + i - 1;
+                let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
+                let day_info = match info.info_by_day.get(&date) {
+                    Some(v) => v,
+                    None => &new_map_binding,
+                };
+                bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) {
+                    Some(&v) => v,
+                    None => 0,
+                };
+                negative_reports[i as usize] = match day_info.get(&BridgeInfoType::NegativeReports)
+                {
+                    Some(&v) => v,
+                    None => 0,
+                };
+                positive_reports[i as usize] = match day_info.get(&BridgeInfoType::PositiveReports)
+                {
+                    Some(&v) => v,
+                    None => 0,
+                };
+            }
+
+            // Evaluate using appropriate stage based on age of the bridge
+            if age < UNTRUSTED_INTERVAL {
+                // open-entry bridge
+                if analyzer.stage_one(
+                    confidence,
+                    &bridge_ips,
+                    bridge_ips_today,
+                    &negative_reports,
+                    negative_reports_today,
+                ) {
+                    blocked_in.insert(country.to_string());
+                }
+            } else if age
+                < UNTRUSTED_INTERVAL + LEVEL_INTERVAL[1] + LEVEL_INTERVAL[2] + UNTRUSTED_INTERVAL
+            {
+                // invite-only bridge without 30+ days of historical data on
+                // positive reports
+                if analyzer.stage_two(
+                    confidence,
+                    &bridge_ips,
+                    bridge_ips_today,
+                    &negative_reports,
+                    negative_reports_today,
+                ) {
+                    blocked_in.insert(country.to_string());
+                }
+            } else {
+                // invite-only bridge that has been up long enough that it
+                // might have 30+ days of historical data on positive reports
+                if analyzer.stage_three(
+                    confidence,
+                    &bridge_ips,
+                    bridge_ips_today,
+                    &negative_reports,
+                    negative_reports_today,
+                    &positive_reports,
+                    positive_reports_today,
+                ) {
+                    blocked_in.insert(country.to_string());
+                }
+            }
+        }
+    }
+    blocked_in
+}
+
+// Analyzer implementations
+
+/// Dummy example that never thinks bridges are blocked
+pub struct ExampleAnalyzer {}
+
+impl Analyzer for ExampleAnalyzer {
+    fn stage_one(
+        &self,
+        _confidence: f64,
+        _bridge_ips: &[u32],
+        _bridge_ips_today: u32,
+        _negative_reports: &[u32],
+        _negative_reports_today: u32,
+    ) -> bool {
+        false
+    }
+
+    fn stage_two(
+        &self,
+        _confidence: f64,
+        _bridge_ips: &[u32],
+        _bridge_ips_today: u32,
+        _negative_reports: &[u32],
+        _negative_reports_today: u32,
+    ) -> bool {
+        false
+    }
+
+    fn stage_three(
+        &self,
+        _confidence: f64,
+        _bridge_ips: &[u32],
+        _bridge_ips_today: u32,
+        _negative_reports: &[u32],
+        _negative_reports_today: u32,
+        _positive_reports: &[u32],
+        _positive_reports_today: u32,
+    ) -> bool {
+        false
+    }
+}
+
+/// Model data as multivariate normal distribution
+pub struct NormalAnalyzer {
+    max_threshold: u32,
+    scaling_factor: f64,
+}
+
+impl NormalAnalyzer {
+    pub fn new(max_threshold: u32, scaling_factor: f64) -> Self {
+        Self {
+            max_threshold,
+            scaling_factor,
+        }
+    }
+
+    fn mean_vector_and_covariance_matrix(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>) {
+        let n = data.len();
+
+        // Compute mean vector
+        let mean_vec = {
+            let mut mean_vec = Vec::<f64>::new();
+            for var in data {
+                mean_vec.push({
+                    let mut sum = 0.0;
+                    for count in *var {
+                        sum += *count as f64;
+                    }
+                    sum / var.len() as f64
+                });
+            }
+            mean_vec
+        };
+
+        // Compute covariance matrix
+        let cov_mat = {
+            let mut cov_mat = Vec::<f64>::new();
+            // We don't need to recompute Syx, but we currently do
+            for i in 0..n {
+                for j in 0..n {
+                    cov_mat.push({
+                        let var1 = data[i];
+                        let var1_mean = mean_vec[i];
+
+                        let var2 = data[j];
+                        let var2_mean = mean_vec[j];
+
+                        assert_eq!(var1.len(), var2.len());
+
+                        let mut sum = 0.0;
+                        for index in 0..var1.len() {
+                            sum +=
+                                (var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean);
+                        }
+                        sum / var1.len() as f64
+                    });
+                }
+            }
+            cov_mat
+        };
+
+        (mean_vec, cov_mat)
+    }
+}
+
+impl Analyzer for NormalAnalyzer {
+    /// Evaluate open-entry bridge based on only today's data
+    fn stage_one(
+        &self,
+        _confidence: f64,
+        _bridge_ips: &[u32],
+        bridge_ips_today: u32,
+        _negative_reports: &[u32],
+        negative_reports_today: u32,
+    ) -> bool {
+        negative_reports_today > self.max_threshold
+            || f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today)
+    }
+
+    /// Evaluate invite-only bridge based on last 30 days
+    fn stage_two(
+        &self,
+        confidence: f64,
+        bridge_ips: &[u32],
+        bridge_ips_today: u32,
+        negative_reports: &[u32],
+        negative_reports_today: u32,
+    ) -> bool {
+        assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
+        assert_eq!(bridge_ips.len(), negative_reports.len());
+
+        let (mean_vec, cov_mat) =
+            Self::mean_vector_and_covariance_matrix(&[bridge_ips, negative_reports]);
+        let bridge_ips_mean = mean_vec[0];
+        let negative_reports_mean = mean_vec[1];
+
+        let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
+        if mvn.pdf(&DVector::from_vec(vec![
+            bridge_ips_today as f64,
+            negative_reports_today as f64,
+        ])) < confidence
+        {
+            (negative_reports_today as f64) > negative_reports_mean
+                || (bridge_ips_today as f64) < bridge_ips_mean
+        } else {
+            false
+        }
+    }
+
+    /// Evaluate invite-only bridge with lv3+ users submitting positive reports
+    fn stage_three(
+        &self,
+        confidence: f64,
+        bridge_ips: &[u32],
+        bridge_ips_today: u32,
+        negative_reports: &[u32],
+        negative_reports_today: u32,
+        positive_reports: &[u32],
+        positive_reports_today: u32,
+    ) -> bool {
+        assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
+        assert_eq!(bridge_ips.len(), negative_reports.len());
+        assert_eq!(bridge_ips.len(), positive_reports.len());
+
+        let (mean_vec, cov_mat) = Self::mean_vector_and_covariance_matrix(&[
+            bridge_ips,
+            negative_reports,
+            positive_reports,
+        ]);
+        let bridge_ips_mean = mean_vec[0];
+        let negative_reports_mean = mean_vec[1];
+        let positive_reports_mean = mean_vec[2];
+
+        let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
+        if mvn.pdf(&DVector::from_vec(vec![
+            bridge_ips_today as f64,
+            negative_reports_today as f64,
+            positive_reports_today as f64,
+        ])) < confidence
+        {
+            (negative_reports_today as f64) > negative_reports_mean
+                || (bridge_ips_today as f64) < bridge_ips_mean
+                || (positive_reports_today as f64) < positive_reports_mean
+        } else {
+            false
+        }
+    }
+}

+ 0 - 267
src/analyzer.rs

@@ -1,267 +0,0 @@
-use crate::{get_date, BridgeInfo, BridgeInfoType};
-use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL;
-use nalgebra::DVector;
-use statrs::distribution::{Continuous, MultivariateNormal};
-use std::collections::{BTreeMap, HashSet};
-
-/// Provides a function for predicting which countries block this bridge
-pub trait Analyzer {
-    fn blocked_in(&self, bridge_info: &BridgeInfo, confidence: f64) -> HashSet<String>;
-}
-
-/// Dummy example that just tells us about blockages we already know about
-pub struct ExampleAnalyzer {}
-
-impl Analyzer for ExampleAnalyzer {
-    fn blocked_in(&self, bridge_info: &BridgeInfo, _confidence: f64) -> HashSet<String> {
-        let mut blocked_in = HashSet::<String>::new();
-        for (country, info) in &bridge_info.info_by_country {
-            if info.blocked {
-                blocked_in.insert(country.to_string());
-            }
-        }
-        blocked_in
-    }
-}
-
-/// Model data as multivariate normal distribution
-pub struct NormalAnalyzer {
-    max_threshold: u32,
-    scaling_factor: f64,
-}
-
-impl NormalAnalyzer {
-    pub fn new(max_threshold: u32, scaling_factor: f64) -> Self {
-        Self {
-            max_threshold,
-            scaling_factor,
-        }
-    }
-
-    fn mean_vector_and_covariance_matrix(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>) {
-        let n = data.len();
-
-        // Compute mean vector
-        let mean_vec = {
-            let mut mean_vec = Vec::<f64>::new();
-            for var in data {
-                mean_vec.push({
-                    let mut sum = 0.0;
-                    for count in *var {
-                        sum += *count as f64;
-                    }
-                    sum / var.len() as f64
-                });
-            }
-            mean_vec
-        };
-
-        // Compute covariance matrix
-        let cov_mat = {
-            let mut cov_mat = Vec::<f64>::new();
-            // We don't need to recompute Syx, but we currently do
-            for i in 0..n {
-                for j in 0..n {
-                    cov_mat.push({
-                        let var1 = data[i];
-                        let var1_mean = mean_vec[i];
-
-                        let var2 = data[j];
-                        let var2_mean = mean_vec[j];
-
-                        assert_eq!(var1.len(), var2.len());
-
-                        let mut sum = 0.0;
-                        for index in 0..var1.len() {
-                            sum +=
-                                (var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean);
-                        }
-                        sum / var1.len() as f64
-                    });
-                }
-            }
-            cov_mat
-        };
-
-        (mean_vec, cov_mat)
-    }
-
-    /// Evaluate open-entry bridge based on only today's data
-    fn stage_one(&self, bridge_ips_today: u32, negative_reports_today: u32) -> bool {
-        negative_reports_today > self.max_threshold
-            || f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today)
-    }
-
-    /// Evaluate invite-only bridge based on last 30 days
-    fn stage_two(
-        &self,
-        confidence: f64,
-        bridge_ips: &[u32],
-        bridge_ips_today: u32,
-        negative_reports: &[u32],
-        negative_reports_today: u32,
-    ) -> bool {
-        assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
-        assert_eq!(bridge_ips.len(), negative_reports.len());
-
-        let (mean_vec, cov_mat) =
-            Self::mean_vector_and_covariance_matrix(&[bridge_ips, negative_reports]);
-        let bridge_ips_mean = mean_vec[0];
-        let negative_reports_mean = mean_vec[1];
-
-        let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
-        if mvn.pdf(&DVector::from_vec(vec![
-            bridge_ips_today as f64,
-            negative_reports_today as f64,
-        ])) < confidence
-        {
-            (negative_reports_today as f64) > negative_reports_mean
-                || (bridge_ips_today as f64) < bridge_ips_mean
-        } else {
-            false
-        }
-    }
-
-    /// Evaluate invite-only bridge with lv3+ users submitting positive reports
-    fn stage_three(
-        &self,
-        confidence: f64,
-        bridge_ips: &[u32],
-        bridge_ips_today: u32,
-        negative_reports: &[u32],
-        negative_reports_today: u32,
-        positive_reports: &[u32],
-        positive_reports_today: u32,
-    ) -> bool {
-        assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
-        assert_eq!(bridge_ips.len(), negative_reports.len());
-        assert_eq!(bridge_ips.len(), positive_reports.len());
-
-        let (mean_vec, cov_mat) = Self::mean_vector_and_covariance_matrix(&[
-            bridge_ips,
-            negative_reports,
-            positive_reports,
-        ]);
-        let bridge_ips_mean = mean_vec[0];
-        let negative_reports_mean = mean_vec[1];
-        let positive_reports_mean = mean_vec[2];
-
-        let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
-        if mvn.pdf(&DVector::from_vec(vec![
-            bridge_ips_today as f64,
-            negative_reports_today as f64,
-            positive_reports_today as f64,
-        ])) < confidence
-        {
-            (negative_reports_today as f64) > negative_reports_mean
-                || (bridge_ips_today as f64) < bridge_ips_mean
-                || (positive_reports_today as f64) < positive_reports_mean
-        } else {
-            false
-        }
-    }
-}
-
-impl Analyzer for NormalAnalyzer {
-    fn blocked_in(&self, bridge_info: &BridgeInfo, confidence: f64) -> HashSet<String> {
-        // TODO: Re-evaluate past days if we have backdated reports
-        let mut blocked_in = HashSet::<String>::new();
-        let today = get_date();
-        let age = bridge_info.first_seen - today;
-        for (country, info) in &bridge_info.info_by_country {
-            if info.blocked {
-                // Assume bridges never become unblocked
-                blocked_in.insert(country.to_string());
-            } else {
-                // Get today's values
-                let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
-                // TODO: Evaluate on yesterday if we don't have data for today?
-                let today_info = match info.info_by_day.get(&today) {
-                    Some(v) => v,
-                    None => &new_map_binding,
-                };
-                let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) {
-                    Some(v) => *v,
-                    None => 0,
-                };
-                let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports)
-                {
-                    Some(v) => *v,
-                    None => 0,
-                };
-                let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports)
-                {
-                    Some(v) => *v,
-                    None => 0,
-                };
-
-                if age < UNTRUSTED_INTERVAL {
-                    // open-entry bridge
-                    if self.stage_one(bridge_ips_today, negative_reports_today) {
-                        blocked_in.insert(country.to_string());
-                    }
-                } else {
-                    // invite-only bridge
-                    let mut bridge_ips = [0; UNTRUSTED_INTERVAL as usize];
-                    let mut negative_reports = [0; UNTRUSTED_INTERVAL as usize];
-                    let mut positive_reports = [0; UNTRUSTED_INTERVAL as usize];
-                    let mut stage_3 = false;
-
-                    // Populate time series
-                    for i in 0..UNTRUSTED_INTERVAL {
-                        let date = today - UNTRUSTED_INTERVAL + i - 1;
-                        let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
-                        let day_info = match info.info_by_day.get(&date) {
-                            Some(v) => v,
-                            None => &new_map_binding,
-                        };
-                        bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) {
-                            Some(v) => *v,
-                            None => 0,
-                        };
-                        negative_reports[i as usize] =
-                            match day_info.get(&BridgeInfoType::NegativeReports) {
-                                Some(v) => *v,
-                                None => 0,
-                            };
-                        positive_reports[i as usize] =
-                            match day_info.get(&BridgeInfoType::PositiveReports) {
-                                Some(v) => {
-                                    stage_3 = true;
-                                    *v
-                                }
-                                None => 0,
-                            };
-                    }
-
-                    if stage_3 {
-                        // We've seen positive reports
-                        if self.stage_three(
-                            confidence,
-                            &bridge_ips,
-                            bridge_ips_today,
-                            &negative_reports,
-                            negative_reports_today,
-                            &positive_reports,
-                            positive_reports_today,
-                        ) {
-                            blocked_in.insert(country.to_string());
-                        }
-                    } else {
-                        // We have not seen positive reports
-                        if self.stage_two(
-                            confidence,
-                            &bridge_ips,
-                            bridge_ips_today,
-                            &negative_reports,
-                            negative_reports_today,
-                        ) {
-                            blocked_in.insert(country.to_string());
-                        }
-                    }
-                }
-            }
-        }
-        blocked_in
-    }
-}

+ 1 - 1
src/bin/server.rs

@@ -86,7 +86,7 @@ async fn update_daily_info(
     update_positive_reports(&db, &distributors).await;
     let new_blockages = guess_blockages(
         &db,
-        &analyzer::NormalAnalyzer::new(max_threshold, scaling_factor),
+        &analysis::NormalAnalyzer::new(max_threshold, scaling_factor),
         confidence,
     );
     report_blockages(&distributors, new_blockages).await;

+ 3 - 3
src/lib.rs

@@ -11,14 +11,14 @@ use std::{
     fmt,
 };
 
-pub mod analyzer;
+pub mod analysis;
 pub mod bridge_verification_info;
 pub mod extra_info;
 pub mod negative_report;
 pub mod positive_report;
 pub mod request_handler;
 
-use analyzer::Analyzer;
+use analysis::Analyzer;
 use extra_info::*;
 use negative_report::*;
 use positive_report::*;
@@ -583,7 +583,7 @@ pub fn guess_blockages(
         let mut bridge_info: BridgeInfo =
             bincode::deserialize(&db.get(fingerprint).unwrap().unwrap()).unwrap();
         let mut new_blockages = HashSet::<String>::new();
-        let blocked_in = analyzer.blocked_in(&bridge_info, confidence);
+        let blocked_in = analysis::blocked_in(analyzer, &bridge_info, confidence);
         for country in blocked_in {
             let bridge_country_info = bridge_info.info_by_country.get_mut(&country).unwrap();
             if !bridge_country_info.blocked {