use crate::{BridgeInfo, BridgeInfoType}; use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL; use nalgebra::DVector; use statrs::distribution::{Continuous, ContinuousCDF, MultivariateNormal, Normal}; use statrs::statistics::Statistics; use std::{ cmp::min, collections::{BTreeMap, HashSet}, }; /// Provides a function for predicting which countries block this bridge pub trait Analyzer { /// Evaluate open-entry bridge. Returns true if blocked, false otherwise. fn stage_one( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, ) -> bool; /// Evaluate invite-only bridge without positive reports. Return true if /// blocked, false otherwise. fn stage_two( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, ) -> bool; /// Evaluate invite-only bridge with positive reports. Return true if /// blocked, false otherwise. fn stage_three( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, positive_reports: &[u32], positive_reports_today: u32, ) -> bool; } /// Accepts an analyzer, information about a bridge, and a confidence value. /// Returns a set of country codes where the bridge is believed to be blocked. pub fn blocked_in( analyzer: &dyn Analyzer, bridge_info: &BridgeInfo, confidence: f64, date: u32, min_historical_days: u32, max_historical_days: u32, ) -> HashSet { let mut blocked_in = HashSet::::new(); let today = date; for (country, info) in &bridge_info.info_by_country { let age = today - info.first_seen; if info.blocked { // Assume bridges never become unblocked blocked_in.insert(country.to_string()); } else { // Get today's values let new_map_binding = BTreeMap::::new(); // TODO: Evaluate on yesterday if we don't have data for today? let today_info = match info.info_by_day.get(&today) { Some(v) => v, None => &new_map_binding, }; let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) { Some(&v) => v, None => 0, }; let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports) { Some(&v) => v, None => 0, }; let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports) { Some(&v) => v, None => 0, }; let num_days = min(age, max_historical_days); // Get time series for last num_days let mut bridge_ips = vec![0; num_days as usize]; let mut negative_reports = vec![0; num_days as usize]; let mut positive_reports = vec![0; num_days as usize]; for i in 0..num_days { let date = today - num_days + i - 1; let new_map_binding = BTreeMap::::new(); let day_info = match info.info_by_day.get(&date) { Some(v) => v, None => &new_map_binding, }; bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) { Some(&v) => v, None => 0, }; negative_reports[i as usize] = match day_info.get(&BridgeInfoType::NegativeReports) { Some(&v) => v, None => 0, }; positive_reports[i as usize] = match day_info.get(&BridgeInfoType::PositiveReports) { Some(&v) => v, None => 0, }; } // Evaluate using appropriate stage based on age of the bridge if age < UNTRUSTED_INTERVAL || age < min_historical_days { // open-entry bridge and/or not enough days of // historical days for stages 2 and 3 if analyzer.stage_one( confidence, &bridge_ips, bridge_ips_today, &negative_reports, negative_reports_today, ) { blocked_in.insert(country.to_string()); } } else if info.first_pr.is_none() || today < info.first_pr.unwrap() + min_historical_days { // invite-only bridge without min_historical_days of // historical data on positive reports if analyzer.stage_two( confidence, &bridge_ips, bridge_ips_today, &negative_reports, negative_reports_today, ) { blocked_in.insert(country.to_string()); } } else { // invite-only bridge that has min_historical_days or // more of historical data since the first positive report if analyzer.stage_three( confidence, &bridge_ips, bridge_ips_today, &negative_reports, negative_reports_today, &positive_reports, positive_reports_today, ) { blocked_in.insert(country.to_string()); } } } } blocked_in } // Analyzer implementations /// Dummy example that never thinks bridges are blocked pub struct ExampleAnalyzer {} impl Analyzer for ExampleAnalyzer { fn stage_one( &self, _confidence: f64, _bridge_ips: &[u32], _bridge_ips_today: u32, _negative_reports: &[u32], _negative_reports_today: u32, ) -> bool { false } fn stage_two( &self, _confidence: f64, _bridge_ips: &[u32], _bridge_ips_today: u32, _negative_reports: &[u32], _negative_reports_today: u32, ) -> bool { false } fn stage_three( &self, _confidence: f64, _bridge_ips: &[u32], _bridge_ips_today: u32, _negative_reports: &[u32], _negative_reports_today: u32, _positive_reports: &[u32], _positive_reports_today: u32, ) -> bool { false } } /// Model data as multivariate normal distribution pub struct NormalAnalyzer { max_threshold: u32, scaling_factor: f64, } impl NormalAnalyzer { pub fn new(max_threshold: u32, scaling_factor: f64) -> Self { Self { max_threshold, scaling_factor, } } } impl Analyzer for NormalAnalyzer { /// Evaluate open-entry bridge based on only today's data fn stage_one( &self, _confidence: f64, _bridge_ips: &[u32], bridge_ips_today: u32, _negative_reports: &[u32], negative_reports_today: u32, ) -> bool { negative_reports_today > self.max_threshold || f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today) } /// Evaluate invite-only bridge based on historical data fn stage_two( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, ) -> bool { assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize); assert_eq!(bridge_ips.len(), negative_reports.len()); let alpha = 1.0 - confidence; // Convert to f64 for stats let bridge_ips_f64 = &bridge_ips.iter().map(|n| *n as f64).collect::>(); let negative_reports_f64 = &negative_reports .iter() .map(|n| *n as f64) .collect::>(); // Evaluate based on negative reports let negative_reports_mean = negative_reports_f64.mean(); let negative_reports_sd = negative_reports_f64.std_dev(); // Only use CCDF test if today's numbers are worse than average if (negative_reports_today as f64) > negative_reports_mean { let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd); if negative_reports_sd > 0.0 { // We use CCDF because more negative reports is worse. if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha { return true; } } else { // If the standard deviation is 0, we need another option. // Consider the bridge blocked negative reports increase by // more than 1 after a long static period. (Note that the // mean is the exact value because we had no deviation.) if (negative_reports_today as f64) > negative_reports_mean + 1.0 { return true; } } } // Evaluate based on bridge stats let bridge_ips_mean = bridge_ips_f64.mean(); let bridge_ips_sd = bridge_ips_f64.std_dev(); // Only use CDF test if today's numbers are worse than average if (bridge_ips_today as f64) < bridge_ips_mean { let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd); if bridge_ips_sd > 0.0 { if bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha { return true; } } else { // If the standard deviation is 0, we need another option. // Consider the bridge blocked if its usage dropped by more // than 1 bin. (Note that the mean is the exact value // because we had no deviation.) if (bridge_ips_today as f64) < bridge_ips_mean - 8.0 { return true; } } } // If none of the tests concluded that the bridge is blocked, // return false false } /// Evaluate invite-only bridge with lv3+ users submitting positive reports fn stage_three( &self, confidence: f64, bridge_ips: &[u32], bridge_ips_today: u32, negative_reports: &[u32], negative_reports_today: u32, positive_reports: &[u32], positive_reports_today: u32, ) -> bool { assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize); assert_eq!(bridge_ips.len(), negative_reports.len()); assert_eq!(bridge_ips.len(), positive_reports.len()); let alpha = 1.0 - confidence; // Convert to f64 for stats let bridge_ips_f64 = &bridge_ips.iter().map(|n| *n as f64).collect::>(); let negative_reports_f64 = &negative_reports .iter() .map(|n| *n as f64) .collect::>(); let positive_reports_f64 = &positive_reports .iter() .map(|n| *n as f64) .collect::>(); // Evaluate based on negative reports. It is better to compute // negative reports test first because the positive test may be // expensive. let negative_reports_mean = negative_reports_f64.mean(); let negative_reports_sd = negative_reports_f64.std_dev(); // Only use CCDF test if today's numbers are worse than average if (negative_reports_today as f64) > negative_reports_mean { let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd); if negative_reports_sd > 0.0 { // We use CCDF because more negative reports is worse. if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha { return true; } } else { // Consider the bridge blocked negative reports increase by // more than 1 after a long static period. (Note that the // mean is the exact value because we had no deviation.) if (negative_reports_today as f64) > negative_reports_mean + 1.0 { return true; } } } // Evaluate based on bridge stats and positive reports. let bridge_ips_mean = bridge_ips_f64.mean(); let positive_reports_mean = positive_reports_f64.mean(); let cov_mat = { let x = bridge_ips_f64; let y = positive_reports_f64; let xx = x.covariance(x); let xy = x.covariance(y); let yy = y.covariance(y); vec![xx, xy, xy, yy] }; // Only use CDF test if today's numbers are worse than average if (bridge_ips_today as f64) < bridge_ips_mean || (positive_reports_today as f64) < positive_reports_mean { let mvn = MultivariateNormal::new(vec![bridge_ips_mean, positive_reports_mean], cov_mat); if mvn.is_ok() { let mvn = mvn.unwrap(); // Start 3 standard deviations below the mean, based on // 68-95-99.7 rule, assuming the confidence will be high // enough that 99.7 is close enough to "the whole // distribution" to be reasonable let bip_start = (bridge_ips_mean - (3.0 * bridge_ips_f64.std_dev()).ceil()) as i32; let pr_start = (positive_reports_mean - (3.0 * positive_reports_f64.std_dev()).ceil()) as i32; // Estimate the CDF by integrating the PDF by hand with step // size 1 let mut cdf = 0.0; for bip in bip_start..bridge_ips_today as i32 { for pr in pr_start..positive_reports_today as i32 { cdf += mvn.pdf(&DVector::from_vec(vec![bip as f64, pr as f64])); } } if cdf < alpha { return true; } } else { // If we have 0 standard deviation or a covariance matrix // that is not positive definite, we need another way to // evaluate each variable. Ignore positive reports and // compute as in stage 2 if self.stage_two( confidence, bridge_ips, bridge_ips_today, negative_reports, negative_reports_today, ) { return true; } } } // If none of the tests concluded that the bridge is blocked, // return false false } }