Ver código fonte

Start stage 3 based on actual observation of a positive report

Also track how much historical data we have on a per-country basis
Vecna 3 semanas atrás
pai
commit
e2b1f9aa7d
3 arquivos alterados com 407 adições e 22 exclusões
  1. 2 3
      src/analysis.rs
  2. 29 12
      src/lib.rs
  3. 376 7
      src/tests.rs

+ 2 - 3
src/analysis.rs

@@ -54,8 +54,8 @@ pub fn blocked_in(
 ) -> HashSet<String> {
     let mut blocked_in = HashSet::<String>::new();
     let today = date;
-    let age = today - bridge_info.first_seen;
     for (country, info) in &bridge_info.info_by_country {
+        let age = today - info.first_seen;
         if info.blocked {
             // Assume bridges never become unblocked
             blocked_in.insert(country.to_string());
@@ -122,8 +122,7 @@ pub fn blocked_in(
                 ) {
                     blocked_in.insert(country.to_string());
                 }
-            } else if age
-                < UNTRUSTED_INTERVAL + LEVEL_INTERVAL[1] + LEVEL_INTERVAL[2] + UNTRUSTED_INTERVAL
+            } else if info.first_pr.is_none() || today < info.first_pr.unwrap() + UNTRUSTED_INTERVAL
             {
                 // invite-only bridge without 30+ days of historical data on
                 // positive reports

+ 29 - 12
src/lib.rs

@@ -57,9 +57,6 @@ pub struct BridgeInfo {
     /// nickname of bridge (probably not necessary)
     pub nickname: String,
 
-    /// first Julian date we started collecting data on this bridge
-    pub first_seen: u32,
-
     /// map of countries to data for this bridge in that country
     pub info_by_country: HashMap<String, BridgeCountryInfo>,
 }
@@ -69,7 +66,6 @@ impl BridgeInfo {
         Self {
             fingerprint: fingerprint,
             nickname: nickname.to_string(),
-            first_seen: get_date(),
             info_by_country: HashMap::<String, BridgeCountryInfo>::new(),
         }
     }
@@ -82,7 +78,7 @@ impl fmt::Display for BridgeInfo {
             array_bytes::bytes2hex("", self.fingerprint).as_str()
         );
         str.push_str(format!("nickname: {}\n", self.nickname).as_str());
-        str.push_str(format!("first_seen: {}\n", self.first_seen).as_str());
+        //str.push_str(format!("first_seen: {}\n", self.first_seen).as_str());
         str.push_str("info_by_country:");
         for country in self.info_by_country.keys() {
             str.push_str(format!("\n  country: {}", country).as_str());
@@ -107,13 +103,21 @@ pub enum BridgeInfoType {
 pub struct BridgeCountryInfo {
     pub info_by_day: BTreeMap<u32, BTreeMap<BridgeInfoType, u32>>,
     pub blocked: bool,
+
+    /// first Julian date we saw data from this country for this bridge
+    pub first_seen: u32,
+
+    /// first Julian date we saw a positive report from this country for this bridge
+    pub first_pr: Option<u32>,
 }
 
 impl BridgeCountryInfo {
-    pub fn new() -> Self {
+    pub fn new(first_seen: u32) -> Self {
         Self {
             info_by_day: BTreeMap::<u32, BTreeMap<BridgeInfoType, u32>>::new(),
             blocked: false,
+            first_seen: first_seen,
+            first_pr: None,
         }
     }
 
@@ -137,12 +141,25 @@ impl BridgeCountryInfo {
             info.insert(info_type, count);
             self.info_by_day.insert(date, info);
         }
+
+        // If this is the first instance of positive reports, save the date
+        if self.first_pr.is_none() && info_type == BridgeInfoType::PositiveReports && count > 0 {
+            self.first_pr = Some(date);
+        }
     }
 }
 
 impl fmt::Display for BridgeCountryInfo {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let mut str = String::from("info:");
+        let mut str = format!("blocked: {}\n", self.blocked);
+        str.push_str(format!("first seen: {}\n", self.first_seen).as_str());
+        let first_pr = if self.first_pr.is_none() {
+            "never".to_string()
+        } else {
+            self.first_pr.unwrap().to_string()
+        };
+        str.push_str(format!("first positive report observed: {}\n", first_pr).as_str());
+        str.push_str("info:");
         for date in self.info_by_day.keys() {
             let info = self.info_by_day.get(date).unwrap();
             let ip_count = match info.get(&BridgeInfoType::BridgeIps) {
@@ -239,7 +256,7 @@ pub fn add_extra_info_to_db(db: &Db, extra_info: ExtraInfo) {
                 );
         } else {
             // No existing entry; make a new one.
-            let mut bridge_country_info = BridgeCountryInfo::new();
+            let mut bridge_country_info = BridgeCountryInfo::new(extra_info.date);
             bridge_country_info.add_info(
                 BridgeInfoType::BridgeIps,
                 extra_info.date,
@@ -444,7 +461,7 @@ pub async fn verify_negative_reports(
 /// Process today's negative reports and store the count of verified reports in
 /// the database.
 pub async fn update_negative_reports(db: &Db, distributors: &BTreeMap<BridgeDistributor, String>) {
-    let mut all_negative_reports = match db.get("nrs-to-process").unwrap() {
+    let all_negative_reports = match db.get("nrs-to-process").unwrap() {
         Some(v) => bincode::deserialize(&v).unwrap(),
         None => BTreeMap::<String, Vec<SerializableNegativeReport>>::new(),
     };
@@ -475,7 +492,7 @@ pub async fn update_negative_reports(db: &Db, distributors: &BTreeMap<BridgeDist
                 bridge_country_info.add_info(BridgeInfoType::NegativeReports, date, count_valid);
             } else {
                 // No existing entry; make a new one.
-                let mut bridge_country_info = BridgeCountryInfo::new();
+                let mut bridge_country_info = BridgeCountryInfo::new(date);
                 bridge_country_info.add_info(BridgeInfoType::NegativeReports, date, count_valid);
                 bridge_info
                     .info_by_country
@@ -557,7 +574,7 @@ pub async fn verify_positive_reports(
 /// Process today's positive reports and store the count of verified reports in
 /// the database.
 pub async fn update_positive_reports(db: &Db, distributors: &BTreeMap<BridgeDistributor, String>) {
-    let mut all_positive_reports = match db.get("prs-to-process").unwrap() {
+    let all_positive_reports = match db.get("prs-to-process").unwrap() {
         Some(v) => bincode::deserialize(&v).unwrap(),
         None => BTreeMap::<String, Vec<SerializablePositiveReport>>::new(),
     };
@@ -588,7 +605,7 @@ pub async fn update_positive_reports(db: &Db, distributors: &BTreeMap<BridgeDist
                 bridge_country_info.add_info(BridgeInfoType::PositiveReports, date, count_valid);
             } else {
                 // No existing entry; make a new one.
-                let mut bridge_country_info = BridgeCountryInfo::new();
+                let mut bridge_country_info = BridgeCountryInfo::new(date);
                 bridge_country_info.add_info(BridgeInfoType::PositiveReports, date, count_valid);
                 bridge_info
                     .info_by_country

+ 376 - 7
src/tests.rs

@@ -840,7 +840,7 @@ fn test_analysis() {
 
         bridge_info
             .info_by_country
-            .insert("ru".to_string(), BridgeCountryInfo::new());
+            .insert("ru".to_string(), BridgeCountryInfo::new(date));
         let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
         let confidence = 0.95;
 
@@ -899,7 +899,7 @@ fn test_analysis() {
 
         bridge_info
             .info_by_country
-            .insert("ru".to_string(), BridgeCountryInfo::new());
+            .insert("ru".to_string(), BridgeCountryInfo::new(date));
         let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
         let confidence = 0.95;
 
@@ -973,7 +973,7 @@ fn test_analysis() {
 
         bridge_info
             .info_by_country
-            .insert("ru".to_string(), BridgeCountryInfo::new());
+            .insert("ru".to_string(), BridgeCountryInfo::new(date));
         let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
         let confidence = 0.95;
 
@@ -1025,7 +1025,7 @@ fn test_analysis() {
 
         bridge_info
             .info_by_country
-            .insert("ru".to_string(), BridgeCountryInfo::new());
+            .insert("ru".to_string(), BridgeCountryInfo::new(date));
         let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
         let confidence = 0.95;
 
@@ -1127,7 +1127,7 @@ fn test_analysis() {
 
         bridge_info
             .info_by_country
-            .insert("ru".to_string(), BridgeCountryInfo::new());
+            .insert("ru".to_string(), BridgeCountryInfo::new(date));
         let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
         let confidence = 0.95;
 
@@ -1230,7 +1230,7 @@ fn test_analysis() {
 
         bridge_info
             .info_by_country
-            .insert("ru".to_string(), BridgeCountryInfo::new());
+            .insert("ru".to_string(), BridgeCountryInfo::new(date));
         let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
         let confidence = 0.95;
 
@@ -1324,5 +1324,374 @@ fn test_analysis() {
         );
     }
 
-    // TODO: Test stage 3 analysis
+    // Test stage 3 analysis
+
+    {
+        let mut date = get_date();
+
+        // New bridge info
+        let mut bridge_info = BridgeInfo::new([0; 20], &String::default());
+
+        bridge_info
+            .info_by_country
+            .insert("ru".to_string(), BridgeCountryInfo::new(date));
+        let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
+        let confidence = 0.95;
+
+        let mut blocking_countries = HashSet::<String>::new();
+
+        // No data today
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+
+        for i in 1..30 {
+            // 9-32 connections, 0-3 negative reports, 16-20 positive reports each day
+            date += 1;
+            bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+                BridgeInfoType::BridgeIps,
+                date,
+                8 * (i % 3 + 2),
+            );
+            bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+                BridgeInfoType::NegativeReports,
+                date,
+                i % 4,
+            );
+            bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+                BridgeInfoType::PositiveReports,
+                date,
+                16 + i % 5,
+            );
+            assert_eq!(
+                blocked_in(&analyzer, &bridge_info, confidence, date),
+                blocking_countries
+            );
+        }
+
+        // Data similar to previous days:
+        // 24 connections, 2 negative reports, 17 positive reports
+        date += 1;
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::BridgeIps,
+            date,
+            24,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::NegativeReports,
+            date,
+            2,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::PositiveReports,
+            date,
+            17,
+        );
+
+        // Should not be blocked because we have similar data.
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+
+        // Data different from previous days:
+        // 104 connections, 1 negative report, 100 positive reports
+        date += 1;
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::BridgeIps,
+            date,
+            104,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::NegativeReports,
+            date,
+            1,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::PositiveReports,
+            date,
+            100,
+        );
+
+        // This should not be blocked even though it's very different because
+        // it's different in the good direction.
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+
+        // Data different from previous days:
+        // 40 connections, 12 negative reports, 40 positive reports
+        date += 1;
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::BridgeIps,
+            date,
+            40,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::NegativeReports,
+            date,
+            12,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::PositiveReports,
+            date,
+            40,
+        );
+        blocking_countries.insert("ru".to_string());
+
+        // This should be blocked because it's different in the bad direction.
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+    }
+
+    {
+        let mut date = get_date();
+
+        // New bridge info
+        let mut bridge_info = BridgeInfo::new([0; 20], &String::default());
+
+        bridge_info
+            .info_by_country
+            .insert("ru".to_string(), BridgeCountryInfo::new(date));
+        let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
+        let confidence = 0.95;
+
+        let mut blocking_countries = HashSet::<String>::new();
+
+        // No data today
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+
+        for i in 1..30 {
+            // 9-32 connections, 0-3 negative reports, 16-20 positive reports each day
+            date += 1;
+            bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+                BridgeInfoType::BridgeIps,
+                date,
+                8 * (i % 3 + 2),
+            );
+            bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+                BridgeInfoType::NegativeReports,
+                date,
+                i % 4,
+            );
+            bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+                BridgeInfoType::PositiveReports,
+                date,
+                16 + i % 5,
+            );
+            assert_eq!(
+                blocked_in(&analyzer, &bridge_info, confidence, date),
+                blocking_countries
+            );
+        }
+
+        // Data similar to previous days:
+        // 24 connections, 2 negative reports, 17 positive reports
+        date += 1;
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::BridgeIps,
+            date,
+            24,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::NegativeReports,
+            date,
+            2,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::PositiveReports,
+            date,
+            17,
+        );
+
+        // Should not be blocked because we have similar data.
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+
+        // Data different from previous days:
+        // 104 connections, 1 negative report, 85 positive reports
+        date += 1;
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::BridgeIps,
+            date,
+            104,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::NegativeReports,
+            date,
+            1,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::PositiveReports,
+            date,
+            85,
+        );
+
+        // This should not be blocked even though it's very different because
+        // it's different in the good direction.
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+
+        // Data different from previous days:
+        // 800 connections, 12 negative reports, 750 positive reports
+        date += 1;
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::BridgeIps,
+            date,
+            800,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::NegativeReports,
+            date,
+            12,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::PositiveReports,
+            date,
+            750,
+        );
+
+        blocking_countries.insert("ru".to_string());
+
+        // The censor artificially inflated bridge stats to prevent detection.
+        // Ensure we still detect the censorship from negative reports.
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+    }
+
+    {
+        let mut date = get_date();
+
+        // New bridge info
+        let mut bridge_info = BridgeInfo::new([0; 20], &String::default());
+
+        bridge_info
+            .info_by_country
+            .insert("ru".to_string(), BridgeCountryInfo::new(date));
+        let analyzer = analysis::NormalAnalyzer::new(5, 0.25);
+        let confidence = 0.95;
+
+        let mut blocking_countries = HashSet::<String>::new();
+
+        // No data today
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+
+        for i in 1..30 {
+            // 9-32 connections, 0-3 negative reports, 16-20 positive reports each day
+            date += 1;
+            bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+                BridgeInfoType::BridgeIps,
+                date,
+                8 * (i % 3 + 2),
+            );
+            bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+                BridgeInfoType::NegativeReports,
+                date,
+                i % 4,
+            );
+            bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+                BridgeInfoType::PositiveReports,
+                date,
+                16 + i % 5,
+            );
+            assert_eq!(
+                blocked_in(&analyzer, &bridge_info, confidence, date),
+                blocking_countries
+            );
+        }
+
+        // Data similar to previous days:
+        // 24 connections, 2 negative reports, 17 positive reports
+        date += 1;
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::BridgeIps,
+            date,
+            24,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::NegativeReports,
+            date,
+            2,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::PositiveReports,
+            date,
+            17,
+        );
+
+        // Should not be blocked because we have similar data.
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+
+        // Data different from previous days:
+        // 104 connections, 1 negative report, 100 positive reports
+        date += 1;
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::BridgeIps,
+            date,
+            104,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::NegativeReports,
+            date,
+            1,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::PositiveReports,
+            date,
+            100,
+        );
+
+        // This should not be blocked even though it's very different because
+        // it's different in the good direction.
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+
+        // Data different from previous days:
+        // 24 connections, 1 negative report, 1 positive report
+        date += 1;
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::BridgeIps,
+            date,
+            24,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::NegativeReports,
+            date,
+            1,
+        );
+        bridge_info.info_by_country.get_mut("ru").unwrap().add_info(
+            BridgeInfoType::PositiveReports,
+            date,
+            1,
+        );
+
+        blocking_countries.insert("ru".to_string());
+
+        // This should be blocked because it's different in the bad direction.
+        assert_eq!(
+            blocked_in(&analyzer, &bridge_info, confidence, date),
+            blocking_countries
+        );
+    }
 }