analyzer.rs 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. use crate::{get_date, BridgeInfo, BridgeInfoType};
  2. use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL;
  3. use nalgebra::DVector;
  4. use statrs::distribution::{Continuous, MultivariateNormal};
  5. use std::collections::{BTreeMap, HashSet};
  6. /// Provides a function for predicting which countries block this bridge
  7. pub trait Analyzer {
  8. fn blocked_in(&self, bridge_info: &BridgeInfo, confidence: f64) -> HashSet<String>;
  9. }
  10. /// Dummy example that just tells us about blockages we already know about
  11. pub struct ExampleAnalyzer {}
  12. impl Analyzer for ExampleAnalyzer {
  13. fn blocked_in(&self, bridge_info: &BridgeInfo, _confidence: f64) -> HashSet<String> {
  14. let mut blocked_in = HashSet::<String>::new();
  15. for (country, info) in &bridge_info.info_by_country {
  16. if info.blocked {
  17. blocked_in.insert(country.to_string());
  18. }
  19. }
  20. blocked_in
  21. }
  22. }
  23. /// Model data as multivariate normal distribution
  24. pub struct NormalAnalyzer {
  25. max_threshold: u32,
  26. scaling_factor: f64,
  27. }
  28. impl NormalAnalyzer {
  29. pub fn new(max_threshold: u32, scaling_factor: f64) -> Self {
  30. Self {
  31. max_threshold,
  32. scaling_factor,
  33. }
  34. }
  35. fn mean_vector_and_covariance_matrix(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>) {
  36. let n = data.len();
  37. // Compute mean vector
  38. let mean_vec = {
  39. let mut mean_vec = Vec::<f64>::new();
  40. for var in data {
  41. mean_vec.push({
  42. let mut sum = 0.0;
  43. for count in *var {
  44. sum += *count as f64;
  45. }
  46. sum / var.len() as f64
  47. });
  48. }
  49. mean_vec
  50. };
  51. // Compute covariance matrix
  52. let cov_mat = {
  53. let mut cov_mat = Vec::<f64>::new();
  54. // We don't need to recompute Syx, but we currently do
  55. for i in 0..n {
  56. for j in 0..n {
  57. cov_mat.push({
  58. let var1 = data[i];
  59. let var1_mean = mean_vec[i];
  60. let var2 = data[j];
  61. let var2_mean = mean_vec[j];
  62. assert_eq!(var1.len(), var2.len());
  63. let mut sum = 0.0;
  64. for index in 0..var1.len() {
  65. sum +=
  66. (var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean);
  67. }
  68. sum / var1.len() as f64
  69. });
  70. }
  71. }
  72. cov_mat
  73. };
  74. (mean_vec, cov_mat)
  75. }
  76. /// Evaluate open-entry bridge based on only today's data
  77. fn stage_one(&self, bridge_ips_today: u32, negative_reports_today: u32) -> bool {
  78. negative_reports_today > self.max_threshold
  79. || f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today)
  80. }
  81. /// Evaluate invite-only bridge based on last 30 days
  82. fn stage_two(
  83. &self,
  84. confidence: f64,
  85. bridge_ips: &[u32],
  86. bridge_ips_today: u32,
  87. negative_reports: &[u32],
  88. negative_reports_today: u32,
  89. ) -> bool {
  90. assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
  91. assert_eq!(bridge_ips.len(), negative_reports.len());
  92. let (mean_vec, cov_mat) =
  93. Self::mean_vector_and_covariance_matrix(&[bridge_ips, negative_reports]);
  94. let bridge_ips_mean = mean_vec[0];
  95. let negative_reports_mean = mean_vec[1];
  96. let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
  97. if mvn.pdf(&DVector::from_vec(vec![
  98. bridge_ips_today as f64,
  99. negative_reports_today as f64,
  100. ])) < confidence
  101. {
  102. (negative_reports_today as f64) > negative_reports_mean
  103. || (bridge_ips_today as f64) < bridge_ips_mean
  104. } else {
  105. false
  106. }
  107. }
  108. /// Evaluate invite-only bridge with lv3+ users submitting positive reports
  109. fn stage_three(
  110. &self,
  111. confidence: f64,
  112. bridge_ips: &[u32],
  113. bridge_ips_today: u32,
  114. negative_reports: &[u32],
  115. negative_reports_today: u32,
  116. positive_reports: &[u32],
  117. positive_reports_today: u32,
  118. ) -> bool {
  119. assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
  120. assert_eq!(bridge_ips.len(), negative_reports.len());
  121. assert_eq!(bridge_ips.len(), positive_reports.len());
  122. let (mean_vec, cov_mat) = Self::mean_vector_and_covariance_matrix(&[
  123. bridge_ips,
  124. negative_reports,
  125. positive_reports,
  126. ]);
  127. let bridge_ips_mean = mean_vec[0];
  128. let negative_reports_mean = mean_vec[1];
  129. let positive_reports_mean = mean_vec[2];
  130. let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
  131. if mvn.pdf(&DVector::from_vec(vec![
  132. bridge_ips_today as f64,
  133. negative_reports_today as f64,
  134. positive_reports_today as f64,
  135. ])) < confidence
  136. {
  137. (negative_reports_today as f64) > negative_reports_mean
  138. || (bridge_ips_today as f64) < bridge_ips_mean
  139. || (positive_reports_today as f64) < positive_reports_mean
  140. } else {
  141. false
  142. }
  143. }
  144. }
  145. impl Analyzer for NormalAnalyzer {
  146. fn blocked_in(&self, bridge_info: &BridgeInfo, confidence: f64) -> HashSet<String> {
  147. // TODO: Re-evaluate past days if we have backdated reports
  148. let mut blocked_in = HashSet::<String>::new();
  149. let today = get_date();
  150. let age = bridge_info.first_seen - today;
  151. for (country, info) in &bridge_info.info_by_country {
  152. if info.blocked {
  153. // Assume bridges never become unblocked
  154. blocked_in.insert(country.to_string());
  155. } else {
  156. // Get today's values
  157. let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
  158. // TODO: Evaluate on yesterday if we don't have data for today?
  159. let today_info = match info.info_by_day.get(&today) {
  160. Some(v) => v,
  161. None => &new_map_binding,
  162. };
  163. let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) {
  164. Some(v) => *v,
  165. None => 0,
  166. };
  167. let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports)
  168. {
  169. Some(v) => *v,
  170. None => 0,
  171. };
  172. let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports)
  173. {
  174. Some(v) => *v,
  175. None => 0,
  176. };
  177. if age < UNTRUSTED_INTERVAL {
  178. // open-entry bridge
  179. if self.stage_one(bridge_ips_today, negative_reports_today) {
  180. blocked_in.insert(country.to_string());
  181. }
  182. } else {
  183. // invite-only bridge
  184. let mut bridge_ips = [0; UNTRUSTED_INTERVAL as usize];
  185. let mut negative_reports = [0; UNTRUSTED_INTERVAL as usize];
  186. let mut positive_reports = [0; UNTRUSTED_INTERVAL as usize];
  187. let mut stage_3 = false;
  188. // Populate time series
  189. for i in 0..UNTRUSTED_INTERVAL {
  190. let date = today - UNTRUSTED_INTERVAL + i - 1;
  191. let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
  192. let day_info = match info.info_by_day.get(&date) {
  193. Some(v) => v,
  194. None => &new_map_binding,
  195. };
  196. bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) {
  197. Some(v) => *v,
  198. None => 0,
  199. };
  200. negative_reports[i as usize] =
  201. match day_info.get(&BridgeInfoType::NegativeReports) {
  202. Some(v) => *v,
  203. None => 0,
  204. };
  205. positive_reports[i as usize] =
  206. match day_info.get(&BridgeInfoType::PositiveReports) {
  207. Some(v) => {
  208. stage_3 = true;
  209. *v
  210. }
  211. None => 0,
  212. };
  213. }
  214. if stage_3 {
  215. // We've seen positive reports
  216. if self.stage_three(
  217. confidence,
  218. &bridge_ips,
  219. bridge_ips_today,
  220. &negative_reports,
  221. negative_reports_today,
  222. &positive_reports,
  223. positive_reports_today,
  224. ) {
  225. blocked_in.insert(country.to_string());
  226. }
  227. } else {
  228. // We have not seen positive reports
  229. if self.stage_two(
  230. confidence,
  231. &bridge_ips,
  232. bridge_ips_today,
  233. &negative_reports,
  234. negative_reports_today,
  235. ) {
  236. blocked_in.insert(country.to_string());
  237. }
  238. }
  239. }
  240. }
  241. }
  242. blocked_in
  243. }
  244. }