123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419 |
- use crate::{BridgeInfo, BridgeInfoType};
- use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL;
- use nalgebra::DVector;
- use statrs::distribution::{Continuous, ContinuousCDF, MultivariateNormal, Normal};
- use statrs::statistics::Statistics;
- use std::{
- cmp::min,
- collections::{BTreeMap, HashSet},
- };
- /// Provides a function for predicting which countries block this bridge
- pub trait Analyzer {
- /// Evaluate open-entry bridge. Returns true if blocked, false otherwise.
- fn stage_one(
- &self,
- confidence: f64,
- bridge_ips: &[u32],
- bridge_ips_today: u32,
- negative_reports: &[u32],
- negative_reports_today: u32,
- ) -> bool;
- /// Evaluate invite-only bridge without positive reports. Return true if
- /// blocked, false otherwise.
- fn stage_two(
- &self,
- confidence: f64,
- bridge_ips: &[u32],
- bridge_ips_today: u32,
- negative_reports: &[u32],
- negative_reports_today: u32,
- ) -> bool;
- /// Evaluate invite-only bridge with positive reports. Return true if
- /// blocked, false otherwise.
- fn stage_three(
- &self,
- confidence: f64,
- bridge_ips: &[u32],
- bridge_ips_today: u32,
- negative_reports: &[u32],
- negative_reports_today: u32,
- positive_reports: &[u32],
- positive_reports_today: u32,
- ) -> bool;
- }
- /// Accepts an analyzer, information about a bridge, and a confidence value.
- /// Returns a set of country codes where the bridge is believed to be blocked.
- pub fn blocked_in(
- analyzer: &dyn Analyzer,
- bridge_info: &BridgeInfo,
- confidence: f64,
- date: u32,
- min_historical_days: u32,
- max_historical_days: u32,
- ) -> HashSet<String> {
- let mut blocked_in = HashSet::<String>::new();
- let today = date;
- for (country, info) in &bridge_info.info_by_country {
- let age = today - info.first_seen;
- if info.blocked {
- // Assume bridges never become unblocked
- blocked_in.insert(country.to_string());
- } else {
- // Get today's values
- let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
- // TODO: Evaluate on yesterday if we don't have data for today?
- let today_info = match info.info_by_day.get(&today) {
- Some(v) => v,
- None => &new_map_binding,
- };
- let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) {
- Some(&v) => v,
- None => 0,
- };
- let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports) {
- Some(&v) => v,
- None => 0,
- };
- let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports) {
- Some(&v) => v,
- None => 0,
- };
- let num_days = min(age, max_historical_days);
- // Get time series for last num_days
- let mut bridge_ips = vec![0; num_days as usize];
- let mut negative_reports = vec![0; num_days as usize];
- let mut positive_reports = vec![0; num_days as usize];
- for i in 0..num_days {
- let date = today - num_days + i - 1;
- let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
- let day_info = match info.info_by_day.get(&date) {
- Some(v) => v,
- None => &new_map_binding,
- };
- bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) {
- Some(&v) => v,
- None => 0,
- };
- negative_reports[i as usize] = match day_info.get(&BridgeInfoType::NegativeReports)
- {
- Some(&v) => v,
- None => 0,
- };
- positive_reports[i as usize] = match day_info.get(&BridgeInfoType::PositiveReports)
- {
- Some(&v) => v,
- None => 0,
- };
- }
- // Evaluate using appropriate stage based on age of the bridge
- if age < UNTRUSTED_INTERVAL || age < min_historical_days {
- // open-entry bridge and/or not enough days of
- // historical days for stages 2 and 3
- if analyzer.stage_one(
- confidence,
- &bridge_ips,
- bridge_ips_today,
- &negative_reports,
- negative_reports_today,
- ) {
- blocked_in.insert(country.to_string());
- }
- } else if info.first_pr.is_none()
- || today < info.first_pr.unwrap() + min_historical_days
- {
- // invite-only bridge without min_historical_days of
- // historical data on positive reports
- if analyzer.stage_two(
- confidence,
- &bridge_ips,
- bridge_ips_today,
- &negative_reports,
- negative_reports_today,
- ) {
- blocked_in.insert(country.to_string());
- }
- } else {
- // invite-only bridge that has min_historical_days or
- // more of historical data since the first positive report
- if analyzer.stage_three(
- confidence,
- &bridge_ips,
- bridge_ips_today,
- &negative_reports,
- negative_reports_today,
- &positive_reports,
- positive_reports_today,
- ) {
- blocked_in.insert(country.to_string());
- }
- }
- }
- }
- blocked_in
- }
- // Analyzer implementations
- /// Dummy example that never thinks bridges are blocked
- pub struct ExampleAnalyzer {}
- impl Analyzer for ExampleAnalyzer {
- fn stage_one(
- &self,
- _confidence: f64,
- _bridge_ips: &[u32],
- _bridge_ips_today: u32,
- _negative_reports: &[u32],
- _negative_reports_today: u32,
- ) -> bool {
- false
- }
- fn stage_two(
- &self,
- _confidence: f64,
- _bridge_ips: &[u32],
- _bridge_ips_today: u32,
- _negative_reports: &[u32],
- _negative_reports_today: u32,
- ) -> bool {
- false
- }
- fn stage_three(
- &self,
- _confidence: f64,
- _bridge_ips: &[u32],
- _bridge_ips_today: u32,
- _negative_reports: &[u32],
- _negative_reports_today: u32,
- _positive_reports: &[u32],
- _positive_reports_today: u32,
- ) -> bool {
- false
- }
- }
- /// Model data as multivariate normal distribution
- pub struct NormalAnalyzer {
- max_threshold: u32,
- scaling_factor: f64,
- }
- impl NormalAnalyzer {
- pub fn new(max_threshold: u32, scaling_factor: f64) -> Self {
- Self {
- max_threshold,
- scaling_factor,
- }
- }
- }
- impl Analyzer for NormalAnalyzer {
- /// Evaluate open-entry bridge based on only today's data
- fn stage_one(
- &self,
- _confidence: f64,
- _bridge_ips: &[u32],
- bridge_ips_today: u32,
- _negative_reports: &[u32],
- negative_reports_today: u32,
- ) -> bool {
- negative_reports_today > self.max_threshold
- || f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today)
- }
- /// Evaluate invite-only bridge based on historical data
- fn stage_two(
- &self,
- confidence: f64,
- bridge_ips: &[u32],
- bridge_ips_today: u32,
- negative_reports: &[u32],
- negative_reports_today: u32,
- ) -> bool {
- assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
- assert_eq!(bridge_ips.len(), negative_reports.len());
- let alpha = 1.0 - confidence;
- // Convert to f64 for stats
- let bridge_ips_f64 = &bridge_ips.iter().map(|n| *n as f64).collect::<Vec<f64>>();
- let negative_reports_f64 = &negative_reports
- .iter()
- .map(|n| *n as f64)
- .collect::<Vec<f64>>();
- // Evaluate based on negative reports
- let negative_reports_mean = negative_reports_f64.mean();
- let negative_reports_sd = negative_reports_f64.std_dev();
- // Only use CCDF test if today's numbers are worse than average
- if (negative_reports_today as f64) > negative_reports_mean {
- let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
- if negative_reports_sd > 0.0 {
- // We use CCDF because more negative reports is worse.
- if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha {
- return true;
- }
- } else {
- // If the standard deviation is 0, we need another option.
- // Consider the bridge blocked negative reports increase by
- // more than 1 after a long static period. (Note that the
- // mean is the exact value because we had no deviation.)
- if (negative_reports_today as f64) > negative_reports_mean + 1.0 {
- return true;
- }
- }
- }
- // Evaluate based on bridge stats
- let bridge_ips_mean = bridge_ips_f64.mean();
- let bridge_ips_sd = bridge_ips_f64.std_dev();
- // Only use CDF test if today's numbers are worse than average
- if (bridge_ips_today as f64) < bridge_ips_mean {
- let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd);
- if bridge_ips_sd > 0.0 {
- if bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha {
- return true;
- }
- } else {
- // If the standard deviation is 0, we need another option.
- // Consider the bridge blocked if its usage dropped by more
- // than 1 bin. (Note that the mean is the exact value
- // because we had no deviation.)
- if (bridge_ips_today as f64) < bridge_ips_mean - 8.0 {
- return true;
- }
- }
- }
- // If none of the tests concluded that the bridge is blocked,
- // return false
- false
- }
- /// Evaluate invite-only bridge with lv3+ users submitting positive reports
- fn stage_three(
- &self,
- confidence: f64,
- bridge_ips: &[u32],
- bridge_ips_today: u32,
- negative_reports: &[u32],
- negative_reports_today: u32,
- positive_reports: &[u32],
- positive_reports_today: u32,
- ) -> bool {
- assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
- assert_eq!(bridge_ips.len(), negative_reports.len());
- assert_eq!(bridge_ips.len(), positive_reports.len());
- let alpha = 1.0 - confidence;
- // Convert to f64 for stats
- let bridge_ips_f64 = &bridge_ips.iter().map(|n| *n as f64).collect::<Vec<f64>>();
- let negative_reports_f64 = &negative_reports
- .iter()
- .map(|n| *n as f64)
- .collect::<Vec<f64>>();
- let positive_reports_f64 = &positive_reports
- .iter()
- .map(|n| *n as f64)
- .collect::<Vec<f64>>();
- // Evaluate based on negative reports. It is better to compute
- // negative reports test first because the positive test may be
- // expensive.
- let negative_reports_mean = negative_reports_f64.mean();
- let negative_reports_sd = negative_reports_f64.std_dev();
- // Only use CCDF test if today's numbers are worse than average
- if (negative_reports_today as f64) > negative_reports_mean {
- let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
- if negative_reports_sd > 0.0 {
- // We use CCDF because more negative reports is worse.
- if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha {
- return true;
- }
- } else {
- // Consider the bridge blocked negative reports increase by
- // more than 1 after a long static period. (Note that the
- // mean is the exact value because we had no deviation.)
- if (negative_reports_today as f64) > negative_reports_mean + 1.0 {
- return true;
- }
- }
- }
- // Evaluate based on bridge stats and positive reports.
- let bridge_ips_mean = bridge_ips_f64.mean();
- let positive_reports_mean = positive_reports_f64.mean();
- let cov_mat = {
- let x = bridge_ips_f64;
- let y = positive_reports_f64;
- let xx = x.covariance(x);
- let xy = x.covariance(y);
- let yy = y.covariance(y);
- vec![xx, xy, xy, yy]
- };
- // Only use CDF test if today's numbers are worse than average
- if (bridge_ips_today as f64) < bridge_ips_mean
- || (positive_reports_today as f64) < positive_reports_mean
- {
- let mvn =
- MultivariateNormal::new(vec![bridge_ips_mean, positive_reports_mean], cov_mat);
- if mvn.is_ok() {
- let mvn = mvn.unwrap();
- // Start 3 standard deviations below the mean, based on
- // 68-95-99.7 rule, assuming the confidence will be high
- // enough that 99.7 is close enough to "the whole
- // distribution" to be reasonable
- let bip_start = (bridge_ips_mean - (3.0 * bridge_ips_f64.std_dev()).ceil()) as i32;
- let pr_start =
- (positive_reports_mean - (3.0 * positive_reports_f64.std_dev()).ceil()) as i32;
- // Estimate the CDF by integrating the PDF by hand with step
- // size 1
- let mut cdf = 0.0;
- for bip in bip_start..bridge_ips_today as i32 {
- for pr in pr_start..positive_reports_today as i32 {
- cdf += mvn.pdf(&DVector::from_vec(vec![bip as f64, pr as f64]));
- }
- }
- if cdf < alpha {
- return true;
- }
- } else {
- // If we have 0 standard deviation or a covariance matrix
- // that is not positive definite, we need another way to
- // evaluate each variable. Ignore positive reports and
- // compute as in stage 2
- if self.stage_two(
- confidence,
- bridge_ips,
- bridge_ips_today,
- negative_reports,
- negative_reports_today,
- ) {
- return true;
- }
- }
- }
- // If none of the tests concluded that the bridge is blocked,
- // return false
- false
- }
- }
|