Selaa lähdekoodia

Use CDF, not PDF with artificial 'blocked' data

TODO: Figure out proper multivariate CDF
Vecna 9 kuukautta sitten
vanhempi
commit
97d4622cd4
1 muutettua tiedostoa jossa 41 lisäystä ja 98 poistoa
  1. 41 98
      src/analysis.rs

+ 41 - 98
src/analysis.rs

@@ -2,7 +2,7 @@ use crate::{BridgeInfo, BridgeInfoType};
 use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL;
 use nalgebra::{Cholesky, DMatrix, DVector};
 use rand::Rng;
-use statrs::distribution::{Continuous, MultivariateNormal, Normal};
+use statrs::distribution::{ContinuousCDF, MultivariateNormal, Normal};
 use std::{
     cmp::{max, min},
     collections::{BTreeMap, HashSet},
@@ -333,52 +333,29 @@ impl Analyzer for NormalAnalyzer {
         let alpha = 1.0 - confidence;
 
         let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports]);
+        let bridge_ips_mean = mean_vec[0];
         let negative_reports_mean = mean_vec[1];
         let bridge_ips_sd = sd_vec[0];
         let negative_reports_sd = sd_vec[1];
 
-        // Artificially create data for alternative hypothesis
-        let num_days = bridge_ips.len() as usize;
-        let mut bridge_ips_blocked = vec![0; num_days];
-        let mut negative_reports_blocked = vec![0; num_days];
-        let bridge_ips_deviation = (2.0 * bridge_ips_sd).round() as u32;
-        for i in 0..num_days {
-            // Suppose bridge stats will go down by 2 SDs
-            bridge_ips_blocked[i] = if bridge_ips_deviation > bridge_ips[i] {
-                0
-            } else {
-                bridge_ips[i] - bridge_ips_deviation
-            };
-            // Suppose negative reports will go up by 2 SDs
-            negative_reports_blocked[i] =
-                negative_reports[i] + (2.0 * negative_reports_sd).round() as u32;
-        }
-        let (mean_vec_blocked, _sd_vec_blocked, cov_mat_blocked) =
-            Self::stats(&[&bridge_ips_blocked, &negative_reports_blocked]);
-
-        let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
-        let pdf = mvn.pdf(&DVector::from_vec(vec![
-            bridge_ips_today as f64,
-            negative_reports_today as f64,
-        ]));
-
-        let mvn_blocked = MultivariateNormal::new(mean_vec_blocked, cov_mat_blocked).unwrap();
-        let pdf_blocked = mvn_blocked.pdf(&DVector::from_vec(vec![
-            bridge_ips_today as f64,
-            negative_reports_today as f64,
-        ]));
-
-        // Also model negative reports in isolation
+        /*
+                let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
+                let pdf = mvn.pdf(&DVector::from_vec(vec![
+                    bridge_ips_today as f64,
+                    negative_reports_today as f64,
+                ]));
+        */
+
+        // Model each variable in isolation. We use 1 - the CDF for
+        // negative reports because more negative reports is worse.
+        let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd).unwrap();
+        let bip_cdf = bip_normal.cdf(bridge_ips_today as f64);
         let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
-        let nr_pdf = nr_normal.pdf(negative_reports_today as f64);
-        let nr_normal_blocked = Normal::new(
-            negative_reports_mean + 2.0 * negative_reports_sd,
-            negative_reports_sd,
-        )
-        .unwrap();
-        let nr_pdf_blocked = nr_normal_blocked.pdf(negative_reports_today as f64);
-
-        (pdf / pdf_blocked).ln() < alpha || (nr_pdf / nr_pdf_blocked).ln() < alpha
+        let nr_cdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
+
+        // For now, just look at each variable in isolation
+        // TODO: How do we do a multivariate normal CDF?
+        bip_cdf < alpha || nr_cdf < alpha
     }
 
     /// Evaluate invite-only bridge with lv3+ users submitting positive reports
@@ -400,67 +377,33 @@ impl Analyzer for NormalAnalyzer {
 
         let (mean_vec, sd_vec, cov_mat) =
             Self::stats(&[bridge_ips, negative_reports, positive_reports]);
+        let bridge_ips_mean = mean_vec[0];
         let negative_reports_mean = mean_vec[1];
+        let positive_reports_mean = mean_vec[2];
         let bridge_ips_sd = sd_vec[0];
         let negative_reports_sd = sd_vec[1];
         let positive_reports_sd = sd_vec[2];
 
-        // Artificially create data for alternative hypothesis
-        let num_days = bridge_ips.len() as usize;
-        let mut bridge_ips_blocked = vec![0; num_days];
-        let mut negative_reports_blocked = vec![0; num_days];
-        let mut positive_reports_blocked = vec![0; num_days];
-        let bridge_ips_deviation = (2.0 * bridge_ips_sd).round() as u32;
-        let positive_reports_deviation = (2.0 * positive_reports_sd).round() as u32;
-        for i in 0..num_days {
-            // Suppose positive reports will go down by 2 SDs
-            positive_reports_blocked[i] = if positive_reports_deviation > positive_reports[i] {
-                0
-            } else {
-                positive_reports[i] - positive_reports_deviation
-            };
-            // Suppose bridge stats will go down by 2 SDs
-            bridge_ips_blocked[i] = if bridge_ips_deviation > bridge_ips[i] {
-                0
-            } else {
-                bridge_ips[i] - bridge_ips_deviation
-            };
-            // Suppose each user who would have submitted a positive report but
-            // didn't submits a negative report instead.
-            negative_reports_blocked[i] =
-                negative_reports[i] + positive_reports[i] - positive_reports_blocked[i];
-        }
-        let (mean_vec_blocked, _sd_vec_blocked, cov_mat_blocked) = Self::stats(&[
-            &bridge_ips_blocked,
-            &negative_reports_blocked,
-            &positive_reports_blocked,
-        ]);
-
-        let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
-        let pdf = mvn.pdf(&DVector::from_vec(vec![
-            bridge_ips_today as f64,
-            negative_reports_today as f64,
-            positive_reports_today as f64,
-        ]));
-
-        let mvn_blocked = MultivariateNormal::new(mean_vec_blocked, cov_mat_blocked).unwrap();
-        let pdf_blocked = mvn_blocked.pdf(&DVector::from_vec(vec![
-            bridge_ips_today as f64,
-            negative_reports_today as f64,
-            positive_reports_today as f64,
-        ]));
-
-        // Also model negative reports in isolation
+        /*
+                let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
+                let pdf = mvn.pdf(&DVector::from_vec(vec![
+                    bridge_ips_today as f64,
+                    negative_reports_today as f64,
+                    positive_reports_today as f64,
+                ]));
+        */
+
+        // Model each variable in isolation. We use 1 - the CDF for
+        // negative reports because more negative reports is worse.
+        let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd).unwrap();
+        let bip_cdf = bip_normal.cdf(bridge_ips_today as f64);
         let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
-        let nr_pdf = nr_normal.pdf(negative_reports_today as f64);
-        // Note we do NOT make this a function of positive signals
-        let nr_normal_blocked = Normal::new(
-            negative_reports_mean + 2.0 * negative_reports_sd,
-            negative_reports_sd,
-        )
-        .unwrap();
-        let nr_pdf_blocked = nr_normal_blocked.pdf(negative_reports_today as f64);
-
-        (pdf / pdf_blocked).ln() < alpha || (nr_pdf / nr_pdf_blocked).ln() < alpha
+        let nr_cdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
+        let pr_normal = Normal::new(positive_reports_mean, positive_reports_sd).unwrap();
+        let pr_cdf = pr_normal.cdf(positive_reports_today as f64);
+
+        // For now, just look at each variable in isolation
+        // TODO: How do we do a multivariate normal CDF?
+        bip_cdf < alpha || nr_cdf < alpha || pr_cdf < alpha
     }
 }