analysis.rs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. use crate::{BridgeInfo, BridgeInfoType};
  2. use lox_library::proto::{level_up::LEVEL_INTERVAL, trust_promotion::UNTRUSTED_INTERVAL};
  3. use nalgebra::DVector;
  4. use statrs::distribution::{Continuous, MultivariateNormal};
  5. use std::{
  6. cmp::min,
  7. collections::{BTreeMap, HashSet},
  8. };
  9. /// Provides a function for predicting which countries block this bridge
  10. pub trait Analyzer {
  11. /// Evaluate open-entry bridge. Returns true if blocked, false otherwise.
  12. fn stage_one(
  13. &self,
  14. confidence: f64,
  15. bridge_ips: &[u32],
  16. bridge_ips_today: u32,
  17. negative_reports: &[u32],
  18. negative_reports_today: u32,
  19. ) -> bool;
  20. /// Evaluate invite-only bridge without positive reports. Return true if
  21. /// blocked, false otherwise.
  22. fn stage_two(
  23. &self,
  24. confidence: f64,
  25. bridge_ips: &[u32],
  26. bridge_ips_today: u32,
  27. negative_reports: &[u32],
  28. negative_reports_today: u32,
  29. ) -> bool;
  30. /// Evaluate invite-only bridge with positive reports. Return true if
  31. /// blocked, false otherwise.
  32. fn stage_three(
  33. &self,
  34. confidence: f64,
  35. bridge_ips: &[u32],
  36. bridge_ips_today: u32,
  37. negative_reports: &[u32],
  38. negative_reports_today: u32,
  39. positive_reports: &[u32],
  40. positive_reports_today: u32,
  41. ) -> bool;
  42. }
  43. /// Accepts an analyzer, information about a bridge, and a confidence value.
  44. /// Returns a set of country codes where the bridge is believed to be blocked.
  45. pub fn blocked_in(
  46. analyzer: &dyn Analyzer,
  47. bridge_info: &BridgeInfo,
  48. confidence: f64,
  49. date: u32,
  50. ) -> HashSet<String> {
  51. // TODO: Re-evaluate past days if we have backdated reports
  52. let mut blocked_in = HashSet::<String>::new();
  53. let today = date;
  54. let age = today - bridge_info.first_seen;
  55. for (country, info) in &bridge_info.info_by_country {
  56. if info.blocked {
  57. // Assume bridges never become unblocked
  58. blocked_in.insert(country.to_string());
  59. } else {
  60. // Get today's values
  61. let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
  62. // TODO: Evaluate on yesterday if we don't have data for today?
  63. let today_info = match info.info_by_day.get(&today) {
  64. Some(v) => v,
  65. None => &new_map_binding,
  66. };
  67. let bridge_ips_today = match today_info.get(&BridgeInfoType::BridgeIps) {
  68. Some(v) => *v,
  69. None => 0,
  70. };
  71. let negative_reports_today = match today_info.get(&BridgeInfoType::NegativeReports) {
  72. Some(v) => *v,
  73. None => 0,
  74. };
  75. let positive_reports_today = match today_info.get(&BridgeInfoType::PositiveReports) {
  76. Some(v) => *v,
  77. None => 0,
  78. };
  79. let num_days = min(age, UNTRUSTED_INTERVAL);
  80. // Get time series for last num_days
  81. let mut bridge_ips = vec![0; num_days as usize];
  82. let mut negative_reports = vec![0; num_days as usize];
  83. let mut positive_reports = vec![0; num_days as usize];
  84. for i in 0..num_days {
  85. let date = today - num_days + i - 1;
  86. let new_map_binding = BTreeMap::<BridgeInfoType, u32>::new();
  87. let day_info = match info.info_by_day.get(&date) {
  88. Some(v) => v,
  89. None => &new_map_binding,
  90. };
  91. bridge_ips[i as usize] = match day_info.get(&BridgeInfoType::BridgeIps) {
  92. Some(&v) => v,
  93. None => 0,
  94. };
  95. negative_reports[i as usize] = match day_info.get(&BridgeInfoType::NegativeReports)
  96. {
  97. Some(&v) => v,
  98. None => 0,
  99. };
  100. positive_reports[i as usize] = match day_info.get(&BridgeInfoType::PositiveReports)
  101. {
  102. Some(&v) => v,
  103. None => 0,
  104. };
  105. }
  106. // Evaluate using appropriate stage based on age of the bridge
  107. if age < UNTRUSTED_INTERVAL {
  108. // open-entry bridge
  109. if analyzer.stage_one(
  110. confidence,
  111. &bridge_ips,
  112. bridge_ips_today,
  113. &negative_reports,
  114. negative_reports_today,
  115. ) {
  116. blocked_in.insert(country.to_string());
  117. }
  118. } else if age
  119. < UNTRUSTED_INTERVAL + LEVEL_INTERVAL[1] + LEVEL_INTERVAL[2] + UNTRUSTED_INTERVAL
  120. {
  121. // invite-only bridge without 30+ days of historical data on
  122. // positive reports
  123. if analyzer.stage_two(
  124. confidence,
  125. &bridge_ips,
  126. bridge_ips_today,
  127. &negative_reports,
  128. negative_reports_today,
  129. ) {
  130. blocked_in.insert(country.to_string());
  131. }
  132. } else {
  133. // invite-only bridge that has been up long enough that it
  134. // might have 30+ days of historical data on positive reports
  135. if analyzer.stage_three(
  136. confidence,
  137. &bridge_ips,
  138. bridge_ips_today,
  139. &negative_reports,
  140. negative_reports_today,
  141. &positive_reports,
  142. positive_reports_today,
  143. ) {
  144. blocked_in.insert(country.to_string());
  145. }
  146. }
  147. }
  148. }
  149. blocked_in
  150. }
  151. // Analyzer implementations
  152. /// Dummy example that never thinks bridges are blocked
  153. pub struct ExampleAnalyzer {}
  154. impl Analyzer for ExampleAnalyzer {
  155. fn stage_one(
  156. &self,
  157. _confidence: f64,
  158. _bridge_ips: &[u32],
  159. _bridge_ips_today: u32,
  160. _negative_reports: &[u32],
  161. _negative_reports_today: u32,
  162. ) -> bool {
  163. false
  164. }
  165. fn stage_two(
  166. &self,
  167. _confidence: f64,
  168. _bridge_ips: &[u32],
  169. _bridge_ips_today: u32,
  170. _negative_reports: &[u32],
  171. _negative_reports_today: u32,
  172. ) -> bool {
  173. false
  174. }
  175. fn stage_three(
  176. &self,
  177. _confidence: f64,
  178. _bridge_ips: &[u32],
  179. _bridge_ips_today: u32,
  180. _negative_reports: &[u32],
  181. _negative_reports_today: u32,
  182. _positive_reports: &[u32],
  183. _positive_reports_today: u32,
  184. ) -> bool {
  185. false
  186. }
  187. }
  188. /// Model data as multivariate normal distribution
  189. pub struct NormalAnalyzer {
  190. max_threshold: u32,
  191. scaling_factor: f64,
  192. }
  193. impl NormalAnalyzer {
  194. pub fn new(max_threshold: u32, scaling_factor: f64) -> Self {
  195. Self {
  196. max_threshold,
  197. scaling_factor,
  198. }
  199. }
  200. fn mean_vector_and_covariance_matrix(data: &[&[u32]]) -> (Vec<f64>, Vec<f64>) {
  201. let n = data.len();
  202. // Compute mean vector
  203. let mean_vec = {
  204. let mut mean_vec = Vec::<f64>::new();
  205. for var in data {
  206. mean_vec.push({
  207. let mut sum = 0.0;
  208. for count in *var {
  209. sum += *count as f64;
  210. }
  211. sum / var.len() as f64
  212. });
  213. }
  214. mean_vec
  215. };
  216. // Compute covariance matrix
  217. let cov_mat = {
  218. let mut cov_mat = Vec::<f64>::new();
  219. // We don't need to recompute Syx, but we currently do
  220. for i in 0..n {
  221. for j in 0..n {
  222. cov_mat.push({
  223. let var1 = data[i];
  224. let var1_mean = mean_vec[i];
  225. let var2 = data[j];
  226. let var2_mean = mean_vec[j];
  227. assert_eq!(var1.len(), var2.len());
  228. let mut sum = 0.0;
  229. for index in 0..var1.len() {
  230. sum +=
  231. (var1[index] as f64 - var1_mean) * (var2[index] as f64 - var2_mean);
  232. }
  233. sum / var1.len() as f64
  234. });
  235. }
  236. }
  237. cov_mat
  238. };
  239. (mean_vec, cov_mat)
  240. }
  241. }
  242. impl Analyzer for NormalAnalyzer {
  243. /// Evaluate open-entry bridge based on only today's data
  244. fn stage_one(
  245. &self,
  246. _confidence: f64,
  247. _bridge_ips: &[u32],
  248. bridge_ips_today: u32,
  249. _negative_reports: &[u32],
  250. negative_reports_today: u32,
  251. ) -> bool {
  252. negative_reports_today > self.max_threshold
  253. || f64::from(negative_reports_today) > self.scaling_factor * f64::from(bridge_ips_today)
  254. }
  255. /// Evaluate invite-only bridge based on last 30 days
  256. fn stage_two(
  257. &self,
  258. confidence: f64,
  259. bridge_ips: &[u32],
  260. bridge_ips_today: u32,
  261. negative_reports: &[u32],
  262. negative_reports_today: u32,
  263. ) -> bool {
  264. assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
  265. assert_eq!(bridge_ips.len(), negative_reports.len());
  266. let (mean_vec, cov_mat) =
  267. Self::mean_vector_and_covariance_matrix(&[bridge_ips, negative_reports]);
  268. let bridge_ips_mean = mean_vec[0];
  269. let negative_reports_mean = mean_vec[1];
  270. let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
  271. if mvn.pdf(&DVector::from_vec(vec![
  272. bridge_ips_today as f64,
  273. negative_reports_today as f64,
  274. ])) < confidence
  275. {
  276. (negative_reports_today as f64) > negative_reports_mean
  277. || (bridge_ips_today as f64) < bridge_ips_mean
  278. } else {
  279. false
  280. }
  281. }
  282. /// Evaluate invite-only bridge with lv3+ users submitting positive reports
  283. fn stage_three(
  284. &self,
  285. confidence: f64,
  286. bridge_ips: &[u32],
  287. bridge_ips_today: u32,
  288. negative_reports: &[u32],
  289. negative_reports_today: u32,
  290. positive_reports: &[u32],
  291. positive_reports_today: u32,
  292. ) -> bool {
  293. assert!(bridge_ips.len() >= UNTRUSTED_INTERVAL as usize);
  294. assert_eq!(bridge_ips.len(), negative_reports.len());
  295. assert_eq!(bridge_ips.len(), positive_reports.len());
  296. let (mean_vec, cov_mat) = Self::mean_vector_and_covariance_matrix(&[
  297. bridge_ips,
  298. negative_reports,
  299. positive_reports,
  300. ]);
  301. let bridge_ips_mean = mean_vec[0];
  302. let negative_reports_mean = mean_vec[1];
  303. let positive_reports_mean = mean_vec[2];
  304. let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
  305. if mvn.pdf(&DVector::from_vec(vec![
  306. bridge_ips_today as f64,
  307. negative_reports_today as f64,
  308. positive_reports_today as f64,
  309. ])) < confidence
  310. {
  311. (negative_reports_today as f64) > negative_reports_mean
  312. || (bridge_ips_today as f64) < bridge_ips_mean
  313. || (positive_reports_today as f64) < positive_reports_mean
  314. } else {
  315. false
  316. }
  317. }
  318. }