analysis.rs 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. use crate::{BridgeInfo, BridgeInfoType};
  2. use lox_library::proto::{level_up::LEVEL_INTERVAL, trust_promotion::UNTRUSTED_INTERVAL};
  3. use nalgebra::DVector;
  4. use statrs::distribution::{Continuous, MultivariateNormal};
  5. use std::{
  6. cmp::min,
  7. collections::{BTreeMap, HashSet},
  8. };
  9. const SCALE_BRIDGE_IPS: u32 = 8;
  10. /// Provides a function for predicting which countries block this bridge
  11. pub trait Analyzer {
  12. /// Evaluate open-entry bridge. Returns true if blocked, false otherwise.
  13. fn stage_one(
  14. &self,
  15. confidence: f64,
  16. bridge_ips: &[u32],
  17. bridge_ips_today: u32,
  18. negative_reports: &[u32],
  19. negative_reports_today: u32,
  20. ) -> bool;
  21. /// Evaluate invite-only bridge without positive reports. Return true if
  22. /// blocked, false otherwise.
  23. fn stage_two(
  24. &self,
  25. confidence: f64,
  26. bridge_ips: &[u32],
  27. bridge_ips_today: u32,
  28. negative_reports: &[u32],
  29. negative_reports_today: u32,
  30. ) -> bool;
  31. /// Evaluate invite-only bridge with positive reports. Return true if
  32. /// blocked, false otherwise.
  33. fn stage_three(
  34. &self,
  35. confidence: f64,
  36. bridge_ips: &[u32],
  37. bridge_ips_today: u32,
  38. negative_reports: &[u32],
  39. negative_reports_today: u32,
  40. positive_reports: &[u32],
  41. positive_reports_today: u32,
  42. ) -> bool;
  43. }
  44. /// Accepts an analyzer, information about a bridge, and a confidence value.
  45. /// Returns a set of country codes where the bridge is believed to be blocked.
  46. pub fn blocked_in(
  47. analyzer: &dyn Analyzer,
  48. bridge_info: &BridgeInfo,
  49. confidence: f64,
  50. date: u32,
  51. ) -> HashSet<String> {
  52. // TODO: Re-evaluate past days if we have backdated reports
  53. let mut blocked_in = HashSet::<String>::new();
  54. let today = date;
  55. let age = today - bridge_info.first_seen;
  56. for (country, info) in &bridge_info.info_by_country {
  57. if info.blocked {
  58. // Assume bridges never become unblocked
  59. blocked_in.insert(country.to_string());
  60. } else {
  61. // Get today's values
  62. let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
  63. // TODO: Evaluate on yesterday if we don't have data for today?
  64. let today_info = match info.info_by_day.get(&today) {
  65. Some(v) => v,
  66. None => &new_map_binding,
  67. };
  68. let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) {
  69. Some(&v) => v / SCALE_BRIDGE_IPS,
  70. None => 0,
  71. };
  72. let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports) {
  73. Some(&v) => v,
  74. None => 0,
  75. };
  76. let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports) {
  77. Some(&v) => v,
  78. None => 0,
  79. };
  80. let num_days = min(age, UNTRUSTED_INTERVAL);
  81. // Get time series for last num_days
  82. let mut bridge_ips = vec![0; num_days as usize];
  83. let mut negative_reports = vec![0; num_days as usize];
  84. let mut positive_reports = vec![0; num_days as usize];
  85. for i in 0..num_days {
  86. let date = today - num_days + i - 1;
  87. let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
  88. let day_info = match info.info_by_day.get(&date) {
  89. Some(v) => v,
  90. None => &new_map_binding,
  91. };
  92. bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) {
  93. Some(&v) => v / SCALE_BRIDGE_IPS,
  94. None => 0,
  95. };
  96. negative_reports[i as usize] = match day_info.get(&BridgeInfoType::NegativeReports)
  97. {
  98. Some(&v) => v,
  99. None => 0,
  100. };
  101. positive_reports[i as usize] = match day_info.get(&BridgeInfoType::PositiveReports)
  102. {
  103. Some(&v) => v,
  104. None => 0,
  105. };
  106. }
  107. // Evaluate using appropriate stage based on age of the bridge
  108. if age < UNTRUSTED_INTERVAL {
  109. // open-entry bridge
  110. if analyzer.stage_one(
  111. confidence,
  112. &bridge_ips,
  113. bridge_ips_today,
  114. &negative_reports,
  115. negative_reports_today,
  116. ) {
  117. blocked_in.insert(country.to_string());
  118. }
  119. } else if age
  120. < UNTRUSTED_INTERVAL + LEVEL_INTERVAL[1] + LEVEL_INTERVAL[2] + UNTRUSTED_INTERVAL
  121. {
  122. // invite-only bridge without 30+ days of historical data on
  123. // positive reports
  124. if analyzer.stage_two(
  125. confidence,
  126. &bridge_ips,
  127. bridge_ips_today,
  128. &negative_reports,
  129. negative_reports_today,
  130. ) {
  131. blocked_in.insert(country.to_string());
  132. }
  133. } else {
  134. // invite-only bridge that has been up long enough that it
  135. // might have 30+ days of historical data on positive reports
  136. if analyzer.stage_three(
  137. confidence,
  138. &bridge_ips,
  139. bridge_ips_today,
  140. &negative_reports,
  141. negative_reports_today,
  142. &positive_reports,
  143. positive_reports_today,
  144. ) {
  145. blocked_in.insert(country.to_string());
  146. }
  147. }
  148. }
  149. }
  150. blocked_in
  151. }
  152. // Analyzer implementations
  153. /// Dummy example that never thinks bridges are blocked
  154. pub struct ExampleAnalyzer {}
  155. impl Analyzer for ExampleAnalyzer {
  156. fn stage_one(
  157. &self,
  158. _confidence: f64,
  159. _bridge_ips: &[u32],
  160. _bridge_ips_today: u32,
  161. _negative_reports: &[u32],
  162. _negative_reports_today: u32,
  163. ) -> bool {
  164. false
  165. }
  166. fn stage_two(
  167. &self,
  168. _confidence: f64,
  169. _bridge_ips: &[u32],
  170. _bridge_ips_today: u32,
  171. _negative_reports: &[u32],
  172. _negative_reports_today: u32,
  173. ) -> bool {
  174. false
  175. }
  176. fn stage_three(
  177. &self,
  178. _confidence: f64,
  179. _bridge_ips: &[u32],
  180. _bridge_ips_today: u32,
  181. _negative_reports: &[u32],
  182. _negative_reports_today: u32,
  183. _positive_reports: &[u32],
  184. _positive_reports_today: u32,
  185. ) -> bool {
  186. false
  187. }
  188. }
  189. /// Model data as multivariate normal distribution
  190. pub struct NormalAnalyzer {
  191. max_threshold: u32,
  192. scaling_factor: f64,
  193. }
  194. impl NormalAnalyzer {
  195. pub fn new(max_threshold: u32, scaling_factor: f64) -> Self {
  196. Self {
  197. max_threshold,
  198. scaling_factor,
  199. }
  200. }
  201. // Returns the mean vector, vector of individual standard deviations, and
  202. // covariance matrix
  203. fn stats(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>, Vec<f64>) {
  204. let n = data.len();
  205. // Compute mean and standard deviation vectors
  206. let (mean_vec, sd_vec) = {
  207. let mut mean_vec = Vec::<f64>::new();
  208. let mut sd_vec = Vec::<f64>::new();
  209. for var in data {
  210. // Compute mean
  211. let mut sum = 0.0;
  212. for count in *var {
  213. sum += *count as f64;
  214. }
  215. let mean = sum / var.len() as f64;
  216. // Compute standard deviation
  217. let mut sum = 0.0;
  218. for count in *var {
  219. sum += (*count as f64 - mean).powi(2);
  220. }
  221. let sd = (sum / var.len() as f64).sqrt();
  222. mean_vec.push(mean);
  223. sd_vec.push(sd);
  224. }
  225. (mean_vec, sd_vec)
  226. };
  227. // Compute covariance matrix
  228. let cov_mat = {
  229. let mut cov_mat = Vec::<f64>::new();
  230. // We don't need to recompute Syx, but we currently do
  231. for i in 0..n {
  232. for j in 0..n {
  233. cov_mat.push({
  234. let var1 = data[i];
  235. let var1_mean = mean_vec[i];
  236. let var2 = data[j];
  237. let var2_mean = mean_vec[j];
  238. assert_eq!(var1.len(), var2.len());
  239. let mut sum = 0.0;
  240. for index in 0..var1.len() {
  241. sum +=
  242. (var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean);
  243. }
  244. sum / var1.len() as f64
  245. });
  246. }
  247. }
  248. cov_mat
  249. };
  250. (mean_vec, sd_vec, cov_mat)
  251. }
  252. }
  253. impl Analyzer for NormalAnalyzer {
  254. /// Evaluate open-entry bridge based on only today's data
  255. fn stage_one(
  256. &self,
  257. _confidence: f64,
  258. _bridge_ips: &[u32],
  259. bridge_ips_today: u32,
  260. _negative_reports: &[u32],
  261. negative_reports_today: u32,
  262. ) -> bool {
  263. negative_reports_today > self.max_threshold
  264. || f64::from(negative_reports_today)
  265. > self.scaling_factor * f64::from(bridge_ips_today) * SCALE_BRIDGE_IPS as f64
  266. }
  267. /// Evaluate invite-only bridge based on last 30 days
  268. fn stage_two(
  269. &self,
  270. confidence: f64,
  271. bridge_ips: &[u32],
  272. bridge_ips_today: u32,
  273. negative_reports: &[u32],
  274. negative_reports_today: u32,
  275. ) -> bool {
  276. assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
  277. assert_eq!(bridge_ips.len(), negative_reports.len());
  278. let alpha = 1.0 - confidence;
  279. let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports]);
  280. let bridge_ips_mean = mean_vec[0];
  281. let negative_reports_mean = mean_vec[1];
  282. let bridge_ips_sd = sd_vec[0];
  283. let negative_reports_sd = sd_vec[1];
  284. let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
  285. if mvn.pdf(&DVector::from_vec(vec![
  286. bridge_ips_today as f64,
  287. negative_reports_today as f64,
  288. ])) < alpha
  289. {
  290. (negative_reports_today as f64) > negative_reports_mean + negative_reports_sd
  291. || (bridge_ips_today as f64) < bridge_ips_mean - bridge_ips_sd
  292. } else {
  293. false
  294. }
  295. }
  296. /// Evaluate invite-only bridge with lv3+ users submitting positive reports
  297. fn stage_three(
  298. &self,
  299. confidence: f64,
  300. bridge_ips: &[u32],
  301. bridge_ips_today: u32,
  302. negative_reports: &[u32],
  303. negative_reports_today: u32,
  304. positive_reports: &[u32],
  305. positive_reports_today: u32,
  306. ) -> bool {
  307. assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
  308. assert_eq!(bridge_ips.len(), negative_reports.len());
  309. assert_eq!(bridge_ips.len(), positive_reports.len());
  310. let alpha = 1.0 - confidence;
  311. let (mean_vec, sd_vec, cov_mat) =
  312. Self::stats(&[bridge_ips, negative_reports, positive_reports]);
  313. let bridge_ips_mean = mean_vec[0];
  314. let negative_reports_mean = mean_vec[1];
  315. let positive_reports_mean = mean_vec[2];
  316. let bridge_ips_sd = sd_vec[0];
  317. let negative_reports_sd = sd_vec[1];
  318. let positive_reports_sd = sd_vec[2];
  319. let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
  320. if mvn.pdf(&DVector::from_vec(vec![
  321. bridge_ips_today as f64,
  322. negative_reports_today as f64,
  323. positive_reports_today as f64,
  324. ])) < alpha
  325. {
  326. (negative_reports_today as f64) > negative_reports_mean + negative_reports_sd
  327. || (bridge_ips_today as f64) < bridge_ips_mean - bridge_ips_sd
  328. || (positive_reports_today as f64) < positive_reports_mean - positive_reports_sd
  329. } else {
  330. false
  331. }
  332. }
  333. }