Преглед изворни кода

Refactor analysis for efficiency

Vecna пре 9 месеци
родитељ
комит
659b8fa16c
1 измењених фајлова са 64 додато и 45 уклоњено
  1. 64 45
      src/analysis.rs

+ 64 - 45
src/analysis.rs

@@ -309,35 +309,44 @@ impl Analyzer for NormalAnalyzer {
 
         let alpha = 1.0 - confidence;
 
-        let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
+        // Evaluate based on negative reports
         let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
-
-        // Model negative reports separately
-        let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd);
         let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
-
-        // If we have 0 standard deviation, we need another way to
-        // evaluate each variable
-        let bip_test = if bridge_ips_sd > 0.0 {
-            bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha
-        } else {
-            // Consider the bridge blocked if its usage dropped by more
-            // than 1 bin. (Note that the mean is the exact value
-            // because we had no deviation.)
-            (bridge_ips_today as f64) < bridge_ips_mean - 8.0
-        };
-        let nr_test = if negative_reports_sd > 0.0 {
+        if negative_reports_sd > 0.0 {
             // We use CCDF because more negative reports is worse.
-            (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha
+            if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha {
+                return true;
+            }
         } else {
+            // If the standard deviation is 0, we need another option.
             // Consider the bridge blocked negative reports increase by
             // more than 1 after a long static period. (Note that the
             // mean is the exact value because we had no deviation.)
-            (negative_reports_today as f64) > negative_reports_mean + 1.0
-        };
+            if (negative_reports_today as f64) > negative_reports_mean + 1.0 {
+                return true;
+            }
+        }
+
+        // Evaluate based on bridge stats
+        let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
+        let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd);
+        if bridge_ips_sd > 0.0 {
+            if bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha {
+                return true;
+            }
+        } else {
+            // If the standard deviation is 0, we need another option.
+            // Consider the bridge blocked if its usage dropped by more
+            // than 1 bin. (Note that the mean is the exact value
+            // because we had no deviation.)
+            if (bridge_ips_today as f64) < bridge_ips_mean - 8.0 {
+                return true;
+            }
+        }
 
-        // Return true if any test concluded the bridge is blocked
-        bip_test || nr_test
+        // If none of the tests concluded that the bridge is blocked,
+        // return false
+        false
     }
 
     /// Evaluate invite-only bridge with lv3+ users submitting positive reports
@@ -357,19 +366,29 @@ impl Analyzer for NormalAnalyzer {
 
         let alpha = 1.0 - confidence;
 
-        // Model bridge IPs and positive reports with multivariate
-        // normal distribution
-        let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]);
-        let mvn = MultivariateNormal::new(mean_vec, cov_mat);
-
-        // Model negative reports separately
+        // Evaluate based on negative reports. It is better to compute
+        // negative reports test first because the positive test may be
+        // expensive.
         let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
         let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
+        if negative_reports_sd > 0.0 {
+            // We use CCDF because more negative reports is worse.
+            if (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha {
+                return true;
+            }
+        } else {
+            // Consider the bridge blocked negative reports increase by
+            // more than 1 after a long static period. (Note that the
+            // mean is the exact value because we had no deviation.)
+            if (negative_reports_today as f64) > negative_reports_mean + 1.0 {
+                return true;
+            }
+        }
 
-        // If we have 0 standard deviation or a covariance matrix that
-        // is not positive definite, we need another way to evaluate
-        // each variable
-        let positive_test = if mvn.is_ok() {
+        // Evaluate based on bridge stats and positive reports.
+        let (mean_vec, cov_mat) = Self::stats(&[bridge_ips, positive_reports]);
+        let mvn = MultivariateNormal::new(mean_vec, cov_mat);
+        if mvn.is_ok() {
             let mvn = mvn.unwrap();
 
             // Estimate the CDF by integrating the PDF by hand with step
@@ -380,27 +399,27 @@ impl Analyzer for NormalAnalyzer {
                     cdf += mvn.pdf(&DVector::from_vec(vec![bip as f64, pr as f64]));
                 }
             }
-            cdf < alpha
+            if cdf < alpha {
+                return true;
+            }
         } else {
-            // Ignore positive reports and compute as in stage 2
-            self.stage_two(
+            // If we have 0 standard deviation or a covariance matrix
+            // that is not positive definite, we need another way to
+            // evaluate each variable. Ignore positive reports and
+            // compute as in stage 2
+            if self.stage_two(
                 confidence,
                 bridge_ips,
                 bridge_ips_today,
                 negative_reports,
                 negative_reports_today,
-            )
-        };
-        let nr_test = if negative_reports_sd > 0.0 {
-            // We use CCDF because more negative reports is worse.
-            (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha
-        } else {
-            // Consider the bridge blocked negative reports increase by
-            // more than 1 after a long static period. (Note that the
-            // mean is the exact value because we had no deviation.)
-            (negative_reports_today as f64) > negative_reports_mean + 1.0
+            ) {
+                return true;
+            }
         };
 
-        positive_test || nr_test
+        // If none of the tests concluded that the bridge is blocked,
+        // return false
+        false
     }
 }