123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267 |
- use crate::{get_date, BridgeInfo, BridgeInfoType};
- use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL;
- use nalgebra::DVector;
- use statrs::distribution::{Continuous, MultivariateNormal};
- use std::collections::{BTreeMap, HashSet};
- /// Provides a function for predicting which countries block this bridge
- pub trait Analyzer {
- fn blocked_in(&self, bridge_info: &BridgeInfo, confidence: f64) -> HashSet<String>;
- }
- /// Dummy example that just tells us about blockages we already know about
- pub struct ExampleAnalyzer {}
- impl Analyzer for ExampleAnalyzer {
- fn blocked_in(&self, bridge_info: &BridgeInfo, _confidence: f64) -> HashSet<String> {
- let mut blocked_in = HashSet::<String>::new();
- for (country, info) in &bridge_info.info_by_country {
- if info.blocked {
- blocked_in.insert(country.to_string());
- }
- }
- blocked_in
- }
- }
- /// Model data as multivariate normal distribution
- pub struct NormalAnalyzer {
- max_threshold: u32,
- scaling_factor: f64,
- }
- impl NormalAnalyzer {
- pub fn new(max_threshold: u32, scaling_factor: f64) -> Self {
- Self {
- max_threshold,
- scaling_factor,
- }
- }
- fn mean_vector_and_covariance_matrix(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>) {
- let n = data.len();
- // Compute mean vector
- let mean_vec = {
- let mut mean_vec = Vec::<f64>::new();
- for var in data {
- mean_vec.push({
- let mut sum = 0.0;
- for count in *var {
- sum += *count as f64;
- }
- sum / var.len() as f64
- });
- }
- mean_vec
- };
- // Compute covariance matrix
- let cov_mat = {
- let mut cov_mat = Vec::<f64>::new();
- // We don't need to recompute Syx, but we currently do
- for i in 0..n {
- for j in 0..n {
- cov_mat.push({
- let var1 = data[i];
- let var1_mean = mean_vec[i];
- let var2 = data[j];
- let var2_mean = mean_vec[j];
- assert_eq!(var1.len(), var2.len());
- let mut sum = 0.0;
- for index in 0..var1.len() {
- sum +=
- (var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean);
- }
- sum / var1.len() as f64
- });
- }
- }
- cov_mat
- };
- (mean_vec, cov_mat)
- }
- /// Evaluate open-entry bridge based on only today's data
- fn stage_one(&self, bridge_ips_today: u32, negative_reports_today: u32) -> bool {
- negative_reports_today > self.max_threshold
- || f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today)
- }
- /// Evaluate invite-only bridge based on last 30 days
- fn stage_two(
- &self,
- confidence: f64,
- bridge_ips: &[u32],
- bridge_ips_today: u32,
- negative_reports: &[u32],
- negative_reports_today: u32,
- ) -> bool {
- assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
- assert_eq!(bridge_ips.len(), negative_reports.len());
- let (mean_vec, cov_mat) =
- Self::mean_vector_and_covariance_matrix(&[bridge_ips, negative_reports]);
- let bridge_ips_mean = mean_vec[0];
- let negative_reports_mean = mean_vec[1];
- let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
- if mvn.pdf(&DVector::from_vec(vec![
- bridge_ips_today as f64,
- negative_reports_today as f64,
- ])) < confidence
- {
- (negative_reports_today as f64) > negative_reports_mean
- || (bridge_ips_today as f64) < bridge_ips_mean
- } else {
- false
- }
- }
- /// Evaluate invite-only bridge with lv3+ users submitting positive reports
- fn stage_three(
- &self,
- confidence: f64,
- bridge_ips: &[u32],
- bridge_ips_today: u32,
- negative_reports: &[u32],
- negative_reports_today: u32,
- positive_reports: &[u32],
- positive_reports_today: u32,
- ) -> bool {
- assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
- assert_eq!(bridge_ips.len(), negative_reports.len());
- assert_eq!(bridge_ips.len(), positive_reports.len());
- let (mean_vec, cov_mat) = Self::mean_vector_and_covariance_matrix(&[
- bridge_ips,
- negative_reports,
- positive_reports,
- ]);
- let bridge_ips_mean = mean_vec[0];
- let negative_reports_mean = mean_vec[1];
- let positive_reports_mean = mean_vec[2];
- let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
- if mvn.pdf(&DVector::from_vec(vec![
- bridge_ips_today as f64,
- negative_reports_today as f64,
- positive_reports_today as f64,
- ])) < confidence
- {
- (negative_reports_today as f64) > negative_reports_mean
- || (bridge_ips_today as f64) < bridge_ips_mean
- || (positive_reports_today as f64) < positive_reports_mean
- } else {
- false
- }
- }
- }
- impl Analyzer for NormalAnalyzer {
- fn blocked_in(&self, bridge_info: &BridgeInfo, confidence: f64) -> HashSet<String> {
- // TODO: Re-evaluate past days if we have backdated reports
- let mut blocked_in = HashSet::<String>::new();
- let today = get_date();
- let age = bridge_info.first_seen - today;
- for (country, info) in &bridge_info.info_by_country {
- if info.blocked {
- // Assume bridges never become unblocked
- blocked_in.insert(country.to_string());
- } else {
- // Get today's values
- let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
- // TODO: Evaluate on yesterday if we don't have data for today?
- let today_info = match info.info_by_day.get(&today) {
- Some(v) => v,
- None => &new_map_binding,
- };
- let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) {
- Some(v) => *v,
- None => 0,
- };
- let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports)
- {
- Some(v) => *v,
- None => 0,
- };
- let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports)
- {
- Some(v) => *v,
- None => 0,
- };
- if age < UNTRUSTED_INTERVAL {
- // open-entry bridge
- if self.stage_one(bridge_ips_today, negative_reports_today) {
- blocked_in.insert(country.to_string());
- }
- } else {
- // invite-only bridge
- let mut bridge_ips = [0; UNTRUSTED_INTERVAL as usize];
- let mut negative_reports = [0; UNTRUSTED_INTERVAL as usize];
- let mut positive_reports = [0; UNTRUSTED_INTERVAL as usize];
- let mut stage_3 = false;
- // Populate time series
- for i in 0..UNTRUSTED_INTERVAL {
- let date = today - UNTRUSTED_INTERVAL + i - 1;
- let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
- let day_info = match info.info_by_day.get(&date) {
- Some(v) => v,
- None => &new_map_binding,
- };
- bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) {
- Some(v) => *v,
- None => 0,
- };
- negative_reports[i as usize] =
- match day_info.get(&BridgeInfoType::NegativeReports) {
- Some(v) => *v,
- None => 0,
- };
- positive_reports[i as usize] =
- match day_info.get(&BridgeInfoType::PositiveReports) {
- Some(v) => {
- stage_3 = true;
- *v
- }
- None => 0,
- };
- }
- if stage_3 {
- // We've seen positive reports
- if self.stage_three(
- confidence,
- &bridge_ips,
- bridge_ips_today,
- &negative_reports,
- negative_reports_today,
- &positive_reports,
- positive_reports_today,
- ) {
- blocked_in.insert(country.to_string());
- }
- } else {
- // We have not seen positive reports
- if self.stage_two(
- confidence,
- &bridge_ips,
- bridge_ips_today,
- &negative_reports,
- negative_reports_today,
- ) {
- blocked_in.insert(country.to_string());
- }
- }
- }
- }
- }
- blocked_in
- }
- }
|