|
@@ -216,6 +216,28 @@ impl NormalAnalyzer {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ fn mean(data: &[u32]) -> f64 {
|
|
|
+ let mut sum = 0.0;
|
|
|
+ for count in data {
|
|
|
+ sum += *count as f64;
|
|
|
+ }
|
|
|
+ sum / data.len() as f64
|
|
|
+ }
|
|
|
+
|
|
|
+ fn std_dev(data: &[u32], mean: f64) -> f64 {
|
|
|
+ let mut sum = 0.0;
|
|
|
+ for count in data {
|
|
|
+ sum += (*count as f64 - mean).powi(2);
|
|
|
+ }
|
|
|
+ (sum / data.len() as f64).sqrt()
|
|
|
+ }
|
|
|
+
|
|
|
+ fn mean_and_std_dev(data: &[u32]) -> (f64, f64) {
|
|
|
+ let mean = Self::mean(data);
|
|
|
+ let std = Self::std_dev(data, mean);
|
|
|
+ (mean, std)
|
|
|
+ }
|
|
|
+
|
|
|
// Returns the mean vector, vector of individual standard deviations, and
|
|
|
// covariance matrix. If the standard deviation for a variable is 0 and/or
|
|
|
// the covariance matrix is not positive definite, add some noise to the
|
|
@@ -318,7 +340,7 @@ impl Analyzer for NormalAnalyzer {
|
|
|
|| f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today)
|
|
|
}
|
|
|
|
|
|
- /// Evaluate invite-only bridge based on last 30 days
|
|
|
+ /// Evaluate invite-only bridge based on historical data
|
|
|
fn stage_two(
|
|
|
&self,
|
|
|
confidence: f64,
|
|
@@ -332,30 +354,35 @@ impl Analyzer for NormalAnalyzer {
|
|
|
|
|
|
let alpha = 1.0 - confidence;
|
|
|
|
|
|
- let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports]);
|
|
|
- let bridge_ips_mean = mean_vec[0];
|
|
|
- let negative_reports_mean = mean_vec[1];
|
|
|
- let bridge_ips_sd = sd_vec[0];
|
|
|
- let negative_reports_sd = sd_vec[1];
|
|
|
+ let (bridge_ips_mean, bridge_ips_sd) = Self::mean_and_std_dev(bridge_ips);
|
|
|
+ let (negative_reports_mean, negative_reports_sd) = Self::mean_and_std_dev(negative_reports);
|
|
|
|
|
|
- /*
|
|
|
- let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
|
|
|
- let pdf = mvn.pdf(&DVector::from_vec(vec![
|
|
|
- bridge_ips_today as f64,
|
|
|
- negative_reports_today as f64,
|
|
|
- ]));
|
|
|
- */
|
|
|
+ // Model each variable with a normal distribution.
|
|
|
+ let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd);
|
|
|
+ let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd);
|
|
|
|
|
|
- // Model each variable in isolation. We use 1 - the CDF for
|
|
|
- // negative reports because more negative reports is worse.
|
|
|
- let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd).unwrap();
|
|
|
- let bip_cdf = bip_normal.cdf(bridge_ips_today as f64);
|
|
|
- let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
|
|
|
- let nr_cdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
|
|
|
+ // If we have 0 standard deviation, we need another way to
|
|
|
+ // evaluate each variable
|
|
|
+ let bip_test = if bridge_ips_sd > 0.0 {
|
|
|
+ bip_normal.unwrap().cdf(bridge_ips_today as f64) < alpha
|
|
|
+ } else {
|
|
|
+ // Consider the bridge blocked if its usage dropped by more
|
|
|
+ // than 1 bin. (Note that the mean is the exact value
|
|
|
+ // because we had no deviation.)
|
|
|
+ (bridge_ips_today as f64) < bridge_ips_mean - 8.0
|
|
|
+ };
|
|
|
+ let nr_test = if negative_reports_sd > 0.0 {
|
|
|
+ // We use CCDF because more negative reports is worse.
|
|
|
+ (1.0 - nr_normal.unwrap().cdf(negative_reports_today as f64)) < alpha
|
|
|
+ } else {
|
|
|
+ // Consider the bridge blocked negative reports increase by
|
|
|
+ // more than 1 after a long static period. (Note that the
|
|
|
+ // mean is the exact value because we had no deviation.)
|
|
|
+ (negative_reports_today as f64) > negative_reports_mean + 1.0
|
|
|
+ };
|
|
|
|
|
|
- // For now, just look at each variable in isolation
|
|
|
- // TODO: How do we do a multivariate normal CDF?
|
|
|
- bip_cdf < alpha || nr_cdf < alpha
|
|
|
+ // Return true if any test concluded the bridge is blocked
|
|
|
+ bip_test || nr_test
|
|
|
}
|
|
|
|
|
|
/// Evaluate invite-only bridge with lv3+ users submitting positive reports
|
|
@@ -393,17 +420,17 @@ impl Analyzer for NormalAnalyzer {
|
|
|
]));
|
|
|
*/
|
|
|
|
|
|
- // Model each variable in isolation. We use 1 - the CDF for
|
|
|
+ // Model each variable in isolation. We use the CCDF for
|
|
|
// negative reports because more negative reports is worse.
|
|
|
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd).unwrap();
|
|
|
let bip_cdf = bip_normal.cdf(bridge_ips_today as f64);
|
|
|
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
|
|
|
- let nr_cdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
|
|
|
+ let nr_ccdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
|
|
|
let pr_normal = Normal::new(positive_reports_mean, positive_reports_sd).unwrap();
|
|
|
let pr_cdf = pr_normal.cdf(positive_reports_today as f64);
|
|
|
|
|
|
// For now, just look at each variable in isolation
|
|
|
// TODO: How do we do a multivariate normal CDF?
|
|
|
- bip_cdf < alpha || nr_cdf < alpha || pr_cdf < alpha
|
|
|
+ bip_cdf < alpha || nr_ccdf < alpha || pr_cdf < alpha
|
|
|
}
|
|
|
}
|