analysis.rs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. use crate::{BridgeInfo, BridgeInfoType};
  2. use lox_library::proto::{level_up::LEVEL_INTERVAL, trust_promotion::UNTRUSTED_INTERVAL};
  3. use nalgebra::DVector;
  4. use statrs::distribution::{Continuous, MultivariateNormal, Normal};
  5. use std::{
  6. cmp::min,
  7. collections::{BTreeMap, HashSet},
  8. };
  9. /// Provides a function for predicting which countries block this bridge
  10. pub trait Analyzer {
  11. /// Evaluate open-entry bridge. Returns true if blocked, false otherwise.
  12. fn stage_one(
  13. &self,
  14. confidence: f64,
  15. bridge_ips: &[u32],
  16. bridge_ips_today: u32,
  17. negative_reports: &[u32],
  18. negative_reports_today: u32,
  19. ) -> bool;
  20. /// Evaluate invite-only bridge without positive reports. Return true if
  21. /// blocked, false otherwise.
  22. fn stage_two(
  23. &self,
  24. confidence: f64,
  25. bridge_ips: &[u32],
  26. bridge_ips_today: u32,
  27. negative_reports: &[u32],
  28. negative_reports_today: u32,
  29. ) -> bool;
  30. /// Evaluate invite-only bridge with positive reports. Return true if
  31. /// blocked, false otherwise.
  32. fn stage_three(
  33. &self,
  34. confidence: f64,
  35. bridge_ips: &[u32],
  36. bridge_ips_today: u32,
  37. negative_reports: &[u32],
  38. negative_reports_today: u32,
  39. positive_reports: &[u32],
  40. positive_reports_today: u32,
  41. ) -> bool;
  42. }
  43. /// Accepts an analyzer, information about a bridge, and a confidence value.
  44. /// Returns a set of country codes where the bridge is believed to be blocked.
  45. pub fn blocked_in(
  46. analyzer: &dyn Analyzer,
  47. bridge_info: &BridgeInfo,
  48. confidence: f64,
  49. date: u32,
  50. ) -> HashSet<String> {
  51. let mut blocked_in = HashSet::<String>::new();
  52. let today = date;
  53. for (country, info) in &bridge_info.info_by_country {
  54. let age = today - info.first_seen;
  55. if info.blocked {
  56. // Assume bridges never become unblocked
  57. blocked_in.insert(country.to_string());
  58. } else {
  59. // Get today's values
  60. let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
  61. // TODO: Evaluate on yesterday if we don't have data for today?
  62. let today_info = match info.info_by_day.get(&today) {
  63. Some(v) => v,
  64. None => &new_map_binding,
  65. };
  66. let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) {
  67. Some(&v) => v,
  68. None => 0,
  69. };
  70. let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports) {
  71. Some(&v) => v,
  72. None => 0,
  73. };
  74. let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports) {
  75. Some(&v) => v,
  76. None => 0,
  77. };
  78. let num_days = min(age, UNTRUSTED_INTERVAL);
  79. // Get time series for last num_days
  80. let mut bridge_ips = vec![0; num_days as usize];
  81. let mut negative_reports = vec![0; num_days as usize];
  82. let mut positive_reports = vec![0; num_days as usize];
  83. for i in 0..num_days {
  84. let date = today - num_days + i - 1;
  85. let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
  86. let day_info = match info.info_by_day.get(&date) {
  87. Some(v) => v,
  88. None => &new_map_binding,
  89. };
  90. bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) {
  91. Some(&v) => v,
  92. None => 0,
  93. };
  94. negative_reports[i as usize] = match day_info.get(&BridgeInfoType::NegativeReports)
  95. {
  96. Some(&v) => v,
  97. None => 0,
  98. };
  99. positive_reports[i as usize] = match day_info.get(&BridgeInfoType::PositiveReports)
  100. {
  101. Some(&v) => v,
  102. None => 0,
  103. };
  104. }
  105. // Evaluate using appropriate stage based on age of the bridge
  106. if age < UNTRUSTED_INTERVAL {
  107. // open-entry bridge
  108. if analyzer.stage_one(
  109. confidence,
  110. &bridge_ips,
  111. bridge_ips_today,
  112. &negative_reports,
  113. negative_reports_today,
  114. ) {
  115. blocked_in.insert(country.to_string());
  116. }
  117. } else if info.first_pr.is_none() || today < info.first_pr.unwrap() + UNTRUSTED_INTERVAL
  118. {
  119. // invite-only bridge without 30+ days of historical data on
  120. // positive reports
  121. if analyzer.stage_two(
  122. confidence,
  123. &bridge_ips,
  124. bridge_ips_today,
  125. &negative_reports,
  126. negative_reports_today,
  127. ) {
  128. blocked_in.insert(country.to_string());
  129. }
  130. } else {
  131. // invite-only bridge that has been up long enough that it
  132. // might have 30+ days of historical data on positive reports
  133. if analyzer.stage_three(
  134. confidence,
  135. &bridge_ips,
  136. bridge_ips_today,
  137. &negative_reports,
  138. negative_reports_today,
  139. &positive_reports,
  140. positive_reports_today,
  141. ) {
  142. blocked_in.insert(country.to_string());
  143. }
  144. }
  145. }
  146. }
  147. blocked_in
  148. }
  149. // Analyzer implementations
  150. /// Dummy example that never thinks bridges are blocked
  151. pub struct ExampleAnalyzer {}
  152. impl Analyzer for ExampleAnalyzer {
  153. fn stage_one(
  154. &self,
  155. _confidence: f64,
  156. _bridge_ips: &[u32],
  157. _bridge_ips_today: u32,
  158. _negative_reports: &[u32],
  159. _negative_reports_today: u32,
  160. ) -> bool {
  161. false
  162. }
  163. fn stage_two(
  164. &self,
  165. _confidence: f64,
  166. _bridge_ips: &[u32],
  167. _bridge_ips_today: u32,
  168. _negative_reports: &[u32],
  169. _negative_reports_today: u32,
  170. ) -> bool {
  171. false
  172. }
  173. fn stage_three(
  174. &self,
  175. _confidence: f64,
  176. _bridge_ips: &[u32],
  177. _bridge_ips_today: u32,
  178. _negative_reports: &[u32],
  179. _negative_reports_today: u32,
  180. _positive_reports: &[u32],
  181. _positive_reports_today: u32,
  182. ) -> bool {
  183. false
  184. }
  185. }
  186. /// Model data as multivariate normal distribution
  187. pub struct NormalAnalyzer {
  188. max_threshold: u32,
  189. scaling_factor: f64,
  190. }
  191. impl NormalAnalyzer {
  192. pub fn new(max_threshold: u32, scaling_factor: f64) -> Self {
  193. Self {
  194. max_threshold,
  195. scaling_factor,
  196. }
  197. }
  198. // Returns the mean vector, vector of individual standard deviations, and
  199. // covariance matrix
  200. fn stats(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>, Vec<f64>) {
  201. let n = data.len();
  202. // Compute mean and standard deviation vectors
  203. let (mean_vec, sd_vec) = {
  204. let mut mean_vec = Vec::<f64>::new();
  205. let mut sd_vec = Vec::<f64>::new();
  206. for var in data {
  207. // Compute mean
  208. let mut sum = 0.0;
  209. for count in *var {
  210. sum += *count as f64;
  211. }
  212. let mean = sum / var.len() as f64;
  213. // Compute standard deviation
  214. let mut sum = 0.0;
  215. for count in *var {
  216. sum += (*count as f64 - mean).powi(2);
  217. }
  218. let sd = (sum / var.len() as f64).sqrt();
  219. mean_vec.push(mean);
  220. sd_vec.push(sd);
  221. }
  222. (mean_vec, sd_vec)
  223. };
  224. // Compute covariance matrix
  225. let cov_mat = {
  226. let mut cov_mat = Vec::<f64>::new();
  227. // We don't need to recompute Syx, but we currently do
  228. for i in 0..n {
  229. for j in 0..n {
  230. cov_mat.push({
  231. let var1 = data[i];
  232. let var1_mean = mean_vec[i];
  233. let var2 = data[j];
  234. let var2_mean = mean_vec[j];
  235. assert_eq!(var1.len(), var2.len());
  236. let mut sum = 0.0;
  237. for index in 0..var1.len() {
  238. sum +=
  239. (var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean);
  240. }
  241. sum / (var1.len() - 1) as f64
  242. });
  243. }
  244. }
  245. cov_mat
  246. };
  247. (mean_vec, sd_vec, cov_mat)
  248. }
  249. }
  250. impl Analyzer for NormalAnalyzer {
  251. /// Evaluate open-entry bridge based on only today's data
  252. fn stage_one(
  253. &self,
  254. _confidence: f64,
  255. _bridge_ips: &[u32],
  256. bridge_ips_today: u32,
  257. _negative_reports: &[u32],
  258. negative_reports_today: u32,
  259. ) -> bool {
  260. negative_reports_today > self.max_threshold
  261. || f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today)
  262. }
  263. /// Evaluate invite-only bridge based on last 30 days
  264. fn stage_two(
  265. &self,
  266. confidence: f64,
  267. bridge_ips: &[u32],
  268. bridge_ips_today: u32,
  269. negative_reports: &[u32],
  270. negative_reports_today: u32,
  271. ) -> bool {
  272. assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
  273. assert_eq!(bridge_ips.len(), negative_reports.len());
  274. let alpha = 1.0 - confidence;
  275. let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports]);
  276. let negative_reports_mean = mean_vec[1];
  277. let bridge_ips_sd = sd_vec[0];
  278. let negative_reports_sd = sd_vec[1];
  279. // Artificially create data for alternative hypothesis
  280. let num_days = bridge_ips.len() as usize;
  281. let mut bridge_ips_blocked = vec![0; num_days];
  282. let mut negative_reports_blocked = vec![0; num_days];
  283. let bridge_ips_deviation = (2.0 * bridge_ips_sd).round() as u32;
  284. for i in 0..num_days {
  285. // Suppose bridge stats will go down by 2 SDs
  286. bridge_ips_blocked[i] = if bridge_ips_deviation > bridge_ips[i] {
  287. 0
  288. } else {
  289. bridge_ips[i] - bridge_ips_deviation
  290. };
  291. // Suppose negative reports will go up by 2 SDs
  292. negative_reports_blocked[i] =
  293. negative_reports[i] + (2.0 * negative_reports_sd).round() as u32;
  294. }
  295. let (mean_vec_blocked, _sd_vec_blocked, cov_mat_blocked) =
  296. Self::stats(&[&bridge_ips_blocked, &negative_reports_blocked]);
  297. let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
  298. let pdf = mvn.pdf(&DVector::from_vec(vec![
  299. bridge_ips_today as f64,
  300. negative_reports_today as f64,
  301. ]));
  302. let mvn_blocked = MultivariateNormal::new(mean_vec_blocked, cov_mat_blocked).unwrap();
  303. let pdf_blocked = mvn_blocked.pdf(&DVector::from_vec(vec![
  304. bridge_ips_today as f64,
  305. negative_reports_today as f64,
  306. ]));
  307. // Also model negative reports in isolation
  308. let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
  309. let nr_pdf = nr_normal.pdf(negative_reports_today as f64);
  310. let nr_normal_blocked = Normal::new(
  311. negative_reports_mean + 2.0 * negative_reports_sd,
  312. negative_reports_sd,
  313. )
  314. .unwrap();
  315. let nr_pdf_blocked = nr_normal_blocked.pdf(negative_reports_today as f64);
  316. (pdf / pdf_blocked).ln() < alpha || (nr_pdf / nr_pdf_blocked).ln() < alpha
  317. }
  318. /// Evaluate invite-only bridge with lv3+ users submitting positive reports
  319. fn stage_three(
  320. &self,
  321. confidence: f64,
  322. bridge_ips: &[u32],
  323. bridge_ips_today: u32,
  324. negative_reports: &[u32],
  325. negative_reports_today: u32,
  326. positive_reports: &[u32],
  327. positive_reports_today: u32,
  328. ) -> bool {
  329. assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
  330. assert_eq!(bridge_ips.len(), negative_reports.len());
  331. assert_eq!(bridge_ips.len(), positive_reports.len());
  332. let alpha = 1.0 - confidence;
  333. let (mean_vec, sd_vec, cov_mat) =
  334. Self::stats(&[bridge_ips, negative_reports, positive_reports]);
  335. let negative_reports_mean = mean_vec[1];
  336. let bridge_ips_sd = sd_vec[0];
  337. let negative_reports_sd = sd_vec[1];
  338. let positive_reports_sd = sd_vec[2];
  339. // Artificially create data for alternative hypothesis
  340. let num_days = bridge_ips.len() as usize;
  341. let mut bridge_ips_blocked = vec![0; num_days];
  342. let mut negative_reports_blocked = vec![0; num_days];
  343. let mut positive_reports_blocked = vec![0; num_days];
  344. let bridge_ips_deviation = (2.0 * bridge_ips_sd).round() as u32;
  345. let positive_reports_deviation = (2.0 * positive_reports_sd).round() as u32;
  346. for i in 0..num_days {
  347. // Suppose positive reports will go down by 2 SDs
  348. positive_reports_blocked[i] = if positive_reports_deviation > positive_reports[i] {
  349. 0
  350. } else {
  351. positive_reports[i] - positive_reports_deviation
  352. };
  353. // Suppose bridge stats will go down by 2 SDs
  354. bridge_ips_blocked[i] = if bridge_ips_deviation > bridge_ips[i] {
  355. 0
  356. } else {
  357. bridge_ips[i] - bridge_ips_deviation
  358. };
  359. // Suppose each user who would have submitted a positive report but
  360. // didn't submits a negative report instead.
  361. negative_reports_blocked[i] =
  362. negative_reports[i] + positive_reports[i] - positive_reports_blocked[i];
  363. }
  364. let (mean_vec_blocked, _sd_vec_blocked, cov_mat_blocked) = Self::stats(&[
  365. &bridge_ips_blocked,
  366. &negative_reports_blocked,
  367. &positive_reports_blocked,
  368. ]);
  369. let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
  370. let pdf = mvn.pdf(&DVector::from_vec(vec![
  371. bridge_ips_today as f64,
  372. negative_reports_today as f64,
  373. positive_reports_today as f64,
  374. ]));
  375. let mvn_blocked = MultivariateNormal::new(mean_vec_blocked, cov_mat_blocked).unwrap();
  376. let pdf_blocked = mvn_blocked.pdf(&DVector::from_vec(vec![
  377. bridge_ips_today as f64,
  378. negative_reports_today as f64,
  379. positive_reports_today as f64,
  380. ]));
  381. // Also model negative reports in isolation
  382. let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
  383. let nr_pdf = nr_normal.pdf(negative_reports_today as f64);
  384. // Note we do NOT make this a function of positive signals
  385. let nr_normal_blocked = Normal::new(
  386. negative_reports_mean + 2.0 * negative_reports_sd,
  387. negative_reports_sd,
  388. )
  389. .unwrap();
  390. let nr_pdf_blocked = nr_normal_blocked.pdf(negative_reports_today as f64);
  391. (pdf / pdf_blocked).ln() < alpha || (nr_pdf / nr_pdf_blocked).ln() < alpha
  392. }
  393. }