|
@@ -1,20 +1,265 @@
|
|
|
-use crate::BridgeInfo;
|
|
|
-use std::collections::HashSet;
|
|
|
+use crate::{get_date, BridgeInfo, BridgeInfoType};
|
|
|
+use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL;
|
|
|
+use nalgebra::DVector;
|
|
|
+use statrs::distribution::{Continuous, MultivariateNormal};
|
|
|
+use std::collections::{BTreeMap, HashSet};
|
|
|
|
|
|
/// Provides a function for predicting which countries block this bridge
|
|
|
pub trait Analyzer {
|
|
|
fn blocked_in(&self, bridge_info: &BridgeInfo, confidence: f64) -> HashSet<String>;
|
|
|
}
|
|
|
|
|
|
+/// Dummy example that just tells us about blockages we already know about
|
|
|
pub struct ExampleAnalyzer {}
|
|
|
|
|
|
-/// Dummy example which just tells us about blockages we already know about
|
|
|
impl Analyzer for ExampleAnalyzer {
|
|
|
+ fn blocked_in(&self, bridge_info: &BridgeInfo, _confidence: f64) -> HashSet<String> {
|
|
|
+ let mut blocked_in = HashSet::<String>::new();
|
|
|
+ for (country, info) in &bridge_info.info_by_country {
|
|
|
+ if info.blocked {
|
|
|
+ blocked_in.insert(country.to_string());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ blocked_in
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/// Model data as multivariate normal distribution
|
|
|
+pub struct NormalAnalyzer {
|
|
|
+ max_threshold: u32,
|
|
|
+ scaling_factor: f64,
|
|
|
+}
|
|
|
+
|
|
|
+impl NormalAnalyzer {
|
|
|
+ pub fn new(max_threshold: u32, scaling_factor: f64) -> Self {
|
|
|
+ Self {
|
|
|
+ max_threshold,
|
|
|
+ scaling_factor,
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ fn mean_vector_and_covariance_matrix(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>) {
|
|
|
+ let n = data.len();
|
|
|
+
|
|
|
+ // Compute mean vector
|
|
|
+ let mean_vec = {
|
|
|
+ let mut mean_vec = Vec::<f64>::new();
|
|
|
+ for var in data {
|
|
|
+ mean_vec.push({
|
|
|
+ let mut sum = 0.0;
|
|
|
+ for count in *var {
|
|
|
+ sum += *count as f64;
|
|
|
+ }
|
|
|
+ sum / var.len() as f64
|
|
|
+ });
|
|
|
+ }
|
|
|
+ mean_vec
|
|
|
+ };
|
|
|
+
|
|
|
+ // Compute covariance matrix
|
|
|
+ let cov_mat = {
|
|
|
+ let mut cov_mat = Vec::<f64>::new();
|
|
|
+ // We don't need to recompute Syx, but we currently do
|
|
|
+ for i in 0..n {
|
|
|
+ for j in 0..n {
|
|
|
+ cov_mat.push({
|
|
|
+ let var1 = data[i];
|
|
|
+ let var1_mean = mean_vec[i];
|
|
|
+
|
|
|
+ let var2 = data[j];
|
|
|
+ let var2_mean = mean_vec[j];
|
|
|
+
|
|
|
+ assert_eq!(var1.len(), var2.len());
|
|
|
+
|
|
|
+ let mut sum = 0.0;
|
|
|
+ for index in 0..var1.len() {
|
|
|
+ sum +=
|
|
|
+ (var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean);
|
|
|
+ }
|
|
|
+ sum / var1.len() as f64
|
|
|
+ });
|
|
|
+ }
|
|
|
+ }
|
|
|
+ cov_mat
|
|
|
+ };
|
|
|
+
|
|
|
+ (mean_vec, cov_mat)
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Evaluate open-entry bridge based on only today's data
|
|
|
+ fn stage_one(&self, bridge_ips_today: u32, negative_reports_today: u32) -> bool {
|
|
|
+ negative_reports_today > self.max_threshold
|
|
|
+ || f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today)
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Evaluate invite-only bridge based on last 30 days
|
|
|
+ fn stage_two(
|
|
|
+ &self,
|
|
|
+ confidence: f64,
|
|
|
+ bridge_ips: &[u32],
|
|
|
+ bridge_ips_today: u32,
|
|
|
+ negative_reports: &[u32],
|
|
|
+ negative_reports_today: u32,
|
|
|
+ ) -> bool {
|
|
|
+ assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
|
|
|
+ assert_eq!(bridge_ips.len(), negative_reports.len());
|
|
|
+
|
|
|
+ let (mean_vec, cov_mat) =
|
|
|
+ Self::mean_vector_and_covariance_matrix(&[bridge_ips, negative_reports]);
|
|
|
+ let bridge_ips_mean = mean_vec[0];
|
|
|
+ let negative_reports_mean = mean_vec[1];
|
|
|
+
|
|
|
+ let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
|
|
|
+ if mvn.pdf(&DVector::from_vec(vec![
|
|
|
+ bridge_ips_today as f64,
|
|
|
+ negative_reports_today as f64,
|
|
|
+ ])) < confidence
|
|
|
+ {
|
|
|
+ (negative_reports_today as f64) > negative_reports_mean
|
|
|
+ || (bridge_ips_today as f64) < bridge_ips_mean
|
|
|
+ } else {
|
|
|
+ false
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Evaluate invite-only bridge with lv3+ users submitting positive reports
|
|
|
+ fn stage_three(
|
|
|
+ &self,
|
|
|
+ confidence: f64,
|
|
|
+ bridge_ips: &[u32],
|
|
|
+ bridge_ips_today: u32,
|
|
|
+ negative_reports: &[u32],
|
|
|
+ negative_reports_today: u32,
|
|
|
+ positive_reports: &[u32],
|
|
|
+ positive_reports_today: u32,
|
|
|
+ ) -> bool {
|
|
|
+ assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
|
|
|
+ assert_eq!(bridge_ips.len(), negative_reports.len());
|
|
|
+ assert_eq!(bridge_ips.len(), positive_reports.len());
|
|
|
+
|
|
|
+ let (mean_vec, cov_mat) = Self::mean_vector_and_covariance_matrix(&[
|
|
|
+ bridge_ips,
|
|
|
+ negative_reports,
|
|
|
+ positive_reports,
|
|
|
+ ]);
|
|
|
+ let bridge_ips_mean = mean_vec[0];
|
|
|
+ let negative_reports_mean = mean_vec[1];
|
|
|
+ let positive_reports_mean = mean_vec[2];
|
|
|
+
|
|
|
+ let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
|
|
|
+ if mvn.pdf(&DVector::from_vec(vec![
|
|
|
+ bridge_ips_today as f64,
|
|
|
+ negative_reports_today as f64,
|
|
|
+ positive_reports_today as f64,
|
|
|
+ ])) < confidence
|
|
|
+ {
|
|
|
+ (negative_reports_today as f64) > negative_reports_mean
|
|
|
+ || (bridge_ips_today as f64) < bridge_ips_mean
|
|
|
+ || (positive_reports_today as f64) < positive_reports_mean
|
|
|
+ } else {
|
|
|
+ false
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl Analyzer for NormalAnalyzer {
|
|
|
fn blocked_in(&self, bridge_info: &BridgeInfo, confidence: f64) -> HashSet<String> {
|
|
|
+ // TODO: Re-evaluate past days if we have backdated reports
|
|
|
let mut blocked_in = HashSet::<String>::new();
|
|
|
+ let today = get_date();
|
|
|
+ let age = bridge_info.first_seen - today;
|
|
|
for (country, info) in &bridge_info.info_by_country {
|
|
|
if info.blocked {
|
|
|
+ // Assume bridges never become unblocked
|
|
|
blocked_in.insert(country.to_string());
|
|
|
+ } else {
|
|
|
+ // Get today's values
|
|
|
+ let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
|
|
|
+ // TODO: Evaluate on yesterday if we don't have data for today?
|
|
|
+ let today_info = match info.info_by_day.get(&today) {
|
|
|
+ Some(v) => v,
|
|
|
+ None => &new_map_binding,
|
|
|
+ };
|
|
|
+ let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) {
|
|
|
+ Some(v) => *v,
|
|
|
+ None => 0,
|
|
|
+ };
|
|
|
+ let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports)
|
|
|
+ {
|
|
|
+ Some(v) => *v,
|
|
|
+ None => 0,
|
|
|
+ };
|
|
|
+ let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports)
|
|
|
+ {
|
|
|
+ Some(v) => *v,
|
|
|
+ None => 0,
|
|
|
+ };
|
|
|
+
|
|
|
+ if age < UNTRUSTED_INTERVAL {
|
|
|
+ // open-entry bridge
|
|
|
+ if self.stage_one(bridge_ips_today, negative_reports_today) {
|
|
|
+ blocked_in.insert(country.to_string());
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ // invite-only bridge
|
|
|
+ let mut bridge_ips = [0; UNTRUSTED_INTERVAL as usize];
|
|
|
+ let mut negative_reports = [0; UNTRUSTED_INTERVAL as usize];
|
|
|
+ let mut positive_reports = [0; UNTRUSTED_INTERVAL as usize];
|
|
|
+ let mut stage_3 = false;
|
|
|
+
|
|
|
+ // Populate time series
|
|
|
+ for i in 0..UNTRUSTED_INTERVAL {
|
|
|
+ let date = today - UNTRUSTED_INTERVAL + i - 1;
|
|
|
+ let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
|
|
|
+ let day_info = match info.info_by_day.get(&date) {
|
|
|
+ Some(v) => v,
|
|
|
+ None => &new_map_binding,
|
|
|
+ };
|
|
|
+ bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) {
|
|
|
+ Some(v) => *v,
|
|
|
+ None => 0,
|
|
|
+ };
|
|
|
+ negative_reports[i as usize] =
|
|
|
+ match day_info.get(&BridgeInfoType::NegativeReports) {
|
|
|
+ Some(v) => *v,
|
|
|
+ None => 0,
|
|
|
+ };
|
|
|
+ positive_reports[i as usize] =
|
|
|
+ match day_info.get(&BridgeInfoType::PositiveReports) {
|
|
|
+ Some(v) => {
|
|
|
+ stage_3 = true;
|
|
|
+ *v
|
|
|
+ }
|
|
|
+ None => 0,
|
|
|
+ };
|
|
|
+ }
|
|
|
+
|
|
|
+ if stage_3 {
|
|
|
+ // We've seen positive reports
|
|
|
+ if self.stage_three(
|
|
|
+ confidence,
|
|
|
+ &bridge_ips,
|
|
|
+ bridge_ips_today,
|
|
|
+ &negative_reports,
|
|
|
+ negative_reports_today,
|
|
|
+ &positive_reports,
|
|
|
+ positive_reports_today,
|
|
|
+ ) {
|
|
|
+ blocked_in.insert(country.to_string());
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ // We have not seen positive reports
|
|
|
+ if self.stage_two(
|
|
|
+ confidence,
|
|
|
+ &bridge_ips,
|
|
|
+ bridge_ips_today,
|
|
|
+ &negative_reports,
|
|
|
+ negative_reports_today,
|
|
|
+ ) {
|
|
|
+ blocked_in.insert(country.to_string());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
blocked_in
|