Browse Source

Use statrs functions

Vecna 10 months ago
parent
commit
a8a0983f9e
1 changed files with 40 additions and 71 deletions
  1. 40 71
      src/analysis.rs

+ 40 - 71
src/analysis.rs

@@ -2,6 +2,7 @@ use crate::{BridgeInfo, BridgeInfoType};
 use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL;
 use nalgebra::DVector;
 use statrs::distribution::{Continuous, ContinuousCDF, MultivariateNormal, Normal};
+use statrs::statistics::Statistics;
 use std::{
     cmp::min,
     collections::{BTreeMap, HashSet},
@@ -214,71 +215,6 @@ impl NormalAnalyzer {
             scaling_factor,
         }
     }
-
-    fn mean(data: &[u32]) -> f64 {
-        let mut sum = 0.0;
-        for count in data {
-            sum += *count as f64;
-        }
-        sum / data.len() as f64
-    }
-
-    fn std_dev(data: &[u32], mean: f64) -> f64 {
-        let mut sum = 0.0;
-        for count in data {
-            sum += (*count as f64 - mean).powi(2);
-        }
-        (sum / data.len() as f64).sqrt()
-    }
-
-    fn mean_and_std_dev(data: &[u32]) -> (f64, f64) {
-        let mean = Self::mean(data);
-        let std_dev = Self::std_dev(data, mean);
-        (mean, std_dev)
-    }
-
-    // Returns the mean vector and covariance matrix
-    fn stats(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>) {
-        let n = data.len();
-
-        // Compute mean vector
-        let mean_vec = {
-            let mut mean_vec = Vec::<f64>::new();
-            for var in data {
-                mean_vec.push(Self::mean(var));
-            }
-            mean_vec
-        };
-
-        // Compute covariance matrix
-        let cov_mat = {
-            let mut cov_mat = Vec::<f64>::new();
-            // We don't need to recompute Syx, but we currently do
-            for i in 0..n {
-                for j in 0..n {
-                    cov_mat.push({
-                        let var1 = data[i];
-                        let var1_mean = mean_vec[i];
-
-                        let var2 = data[j];
-                        let var2_mean = mean_vec[j];
-
-                        assert_eq!(var1.len(), var2.len());
-
-                        let mut sum = 0.0;
-                        for index in 0..var1.len() {
-                            sum +=
-                                (var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean);
-                        }
-                        sum / (var1.len() - 1) as f64
-                    });
-                }
-            }
-            cov_mat
-        };
-
-        (mean_vec, cov_mat)
-    }
 }
 
 impl Analyzer for NormalAnalyzer {
@@ -309,8 +245,16 @@ impl Analyzer for NormalAnalyzer {
 
         let alpha = 1.0 - confidence;
 
+        // Convert to f64 for stats
+        let bridge_ips_f64 = &bridge_ips.iter().map(|n| *n as f64).collect::<Vec<f64>>();
+        let negative_reports_f64 = &negative_reports
+            .iter()
+            .map(|n| *n as f64)
+            .collect::<Vec<f64>>();
+
         // Evaluate based on negative reports
-        let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
+        let negative_reports_mean = negative_reports_f64.mean();
+        let negative_reports_sd = negative_reports_f64.std_dev();
 
         // Only use CCDF test if today's numbers are worse than average
         if (negative_reports_today as f64) > negative_reports_mean {
@@ -332,7 +276,8 @@ impl Analyzer for NormalAnalyzer {
         }
 
         // Evaluate based on bridge stats
-        let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
+        let bridge_ips_mean = bridge_ips_f64.mean();
+        let bridge_ips_sd = bridge_ips_f64.std_dev();
 
         // Only use CDF test if today's numbers are worse than average
         if (bridge_ips_today as f64) < bridge_ips_mean {
@@ -374,10 +319,22 @@ impl Analyzer for NormalAnalyzer {
 
         let alpha = 1.0 - confidence;
 
+        // Convert to f64 for stats
+        let bridge_ips_f64 = &bridge_ips.iter().map(|n| *n as f64).collect::<Vec<f64>>();
+        let negative_reports_f64 = &negative_reports
+            .iter()
+            .map(|n| *n as f64)
+            .collect::<Vec<f64>>();
+        let positive_reports_f64 = &positive_reports
+            .iter()
+            .map(|n| *n as f64)
+            .collect::<Vec<f64>>();
+
         // Evaluate based on negative reports. It is better to compute
         // negative reports test first because the positive test may be
         // expensive.
-        let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
+        let negative_reports_mean = negative_reports_f64.mean();
+        let negative_reports_sd = negative_reports_f64.std_dev();
 
         // Only use CCDF test if today's numbers are worse than average
         if (negative_reports_today as f64) > negative_reports_mean {
@@ -398,12 +355,24 @@ impl Analyzer for NormalAnalyzer {
         }
 
         // Evaluate based on bridge stats and positive reports.
-        let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]);
+        let bridge_ips_mean = bridge_ips_f64.mean();
+        let positive_reports_mean = positive_reports_f64.mean();
+
+        let cov_mat = {
+            let x = bridge_ips_f64;
+            let y = positive_reports_f64;
+            let xx = x.covariance(x);
+            let xy = x.covariance(y);
+            let yy = y.covariance(y);
+            vec![xx, xy, xy, yy]
+        };
 
         // Only use CDF test if today's numbers are worse than average
-        if (bridge_ips_today as f64) < mean_vec[0] || (positive_reports_today as f64) < mean_vec[1]
+        if (bridge_ips_today as f64) < bridge_ips_mean
+            || (positive_reports_today as f64) < positive_reports_mean
         {
-            let mvn = MultivariateNormal::new(mean_vec, cov_mat);
+            let mvn =
+                MultivariateNormal::new(vec![bridge_ips_mean, positive_reports_mean], cov_mat);
             if mvn.is_ok() {
                 let mvn = mvn.unwrap();