Browse Source

Use bridge-stats-end if available, refactor db to counts of reports

Vecna 1 year ago
parent
commit
cde22fef67
2 changed files with 93 additions and 72 deletions
  1. 25 14
      src/extra_info.rs
  2. 68 58
      src/lib.rs

+ 25 - 14
src/extra_info.rs

@@ -26,9 +26,11 @@ pub struct ExtraInfo {
     pub nickname: String,
     /// Bridge fingerprint, a SHA-1 hash of the bridge ID
     pub fingerprint: [u8; 20],
-    /// Date (in UTC) that this document was published, stored as a Julian
-    /// date because we don't need to know more precisely than the day.
-    pub published: u32,
+    /// Date (in UTC) that this document covered (bridge-stats-end if
+    /// available) or that the document was published (published), stored
+    /// as a Julian date because we don't need to know more precisely than
+    /// the day.
+    pub date: u32,
     /// Map of country codes and how many users (rounded up to a multiple of
     /// 8) have connected to that bridge during the day.
     /// Uses BTreeMap instead of HashMap so ExtraInfo can implement Hash.
@@ -40,7 +42,7 @@ fn get_extra_info_or_error(entry: &HashMap<String, String>) -> Result<ExtraInfo,
         // How did we get here??
         return Err("Cannot parse extra-info: Missing nickname or fingerprint".to_string());
     }
-    if !entry.contains_key("published") || !entry.contains_key("bridge-ips") {
+    if !(entry.contains_key("bridge-stats-end") || entry.contains_key("published")) || !entry.contains_key("bridge-ips") {
         // Some extra-infos are missing data on connecting IPs...
         // But we can't do anything in that case.
         return Err(format!(
@@ -55,17 +57,26 @@ fn get_extra_info_or_error(entry: &HashMap<String, String>) -> Result<ExtraInfo,
         return Err("Fingerprint must be 20 bytes".to_string());
     }
     let fingerprint = array_bytes::hex2array(fingerprint_str).unwrap();
-    let published: u32 = JulianDay::from(
-        DateTime::parse_from_str(
-            &(entry.get("published").unwrap().to_owned() + " +0000"),
-            "%F %T %z",
+    let date: u32 = {
+        let date_str = if entry.contains_key("bridge-stats-end") {
+            let line = entry.get("bridge-stats-end").unwrap();
+            // Parse out (86400 s) from end of line
+            &line[..line.find("(").unwrap()-1]
+        } else {
+            entry.get("published").unwrap().as_str()
+        };
+        JulianDay::from(
+            DateTime::parse_from_str(
+                &(date_str.to_owned() + " +0000"),
+                "%F %T %z",
+            )
+            .unwrap()
+            .date_naive(),
         )
+        .inner()
+        .try_into()
         .unwrap()
-        .date_naive(),
-    )
-    .inner()
-    .try_into()
-    .unwrap();
+    };
     let bridge_ips_str = entry.get("bridge-ips").unwrap();
     let mut bridge_ips: BTreeMap<String, u32> = BTreeMap::new();
     let countries: Vec<&str> = bridge_ips_str.split(',').collect();
@@ -80,7 +91,7 @@ fn get_extra_info_or_error(entry: &HashMap<String, String>) -> Result<ExtraInfo,
     Ok(ExtraInfo {
         nickname,
         fingerprint,
-        published,
+        date,
         bridge_ips,
     })
 }

+ 68 - 58
src/lib.rs

@@ -74,10 +74,10 @@ pub struct BridgeInfo {
 }
 
 impl BridgeInfo {
-    pub fn new(fingerprint: [u8; 20], nickname: String) -> Self {
+    pub fn new(fingerprint: [u8; 20], nickname: &String) -> Self {
         Self {
             fingerprint: fingerprint,
-            nickname: nickname,
+            nickname: nickname.to_string(),
             first_seen: get_date(),
             is_blocked: false,
             info_by_day: HashMap::<u32, DailyBridgeInfo>::new(),
@@ -106,49 +106,79 @@ impl fmt::Display for BridgeInfo {
     }
 }
 
-// TODO: Should this be an enum to make it easier to implement different
-// versions for plugins?
+#[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Serialize, Deserialize)]
+pub enum BridgeInfoType {
+    BridgeIps,
+    NegativeReports,
+    PositiveReports,
+}
 
 /// Information about bridge reachability, gathered daily
 #[derive(Serialize, Deserialize)]
 pub struct DailyBridgeInfo {
-    /// Map of country codes and how many users (rounded up to a multiple of
-    /// 8) have connected to that bridge during the day.
-    pub bridge_ips: BTreeMap<String, u32>,
-    /// Map of negative reports to count of negative reports received
-    pub negative_reports: BTreeMap<SerializableNegativeReport, u32>,
-    /// Set of positive reports received during this day
-    pub positive_reports: Vec<SerializablePositiveReport>,
-    // We don't care about ordering of the reports, but I'm using vectors for
-    // reports because we don't want a set to deduplicate our reports, and
-    // I don't want to implement Hash or Ord. Another possibility might be a
-    // map of the report to the number of that exact report we received.
-    // Positive reports include a Lox proof and should be unique, but negative
-    // reports could be deduplicated.
+    pub info_by_country: BTreeMap<String, BTreeMap<BridgeInfoType, u32>>,
 }
 
 impl DailyBridgeInfo {
     pub fn new() -> Self {
         Self {
-            bridge_ips: BTreeMap::<String, u32>::new(),
-            negative_reports: BTreeMap::<SerializableNegativeReport, u32>::new(),
-            positive_reports: Vec::<SerializablePositiveReport>::new(),
+            info_by_country: BTreeMap::<String, BTreeMap::<BridgeInfoType, u32>>::new(),
+        }
+    }
+
+    pub fn add_info(&mut self, info_type: BridgeInfoType, count_per_country: &BTreeMap::<String, u32>) {
+        for country in count_per_country.keys() {
+            if self.info_by_country.contains_key(country) {
+                let info = self.info_by_country.get_mut(country).unwrap();
+                if !info.contains_key(&info_type) {
+                    info.insert(info_type, *count_per_country.get(&country.to_string()).unwrap());
+                } else if info_type == BridgeInfoType::BridgeIps {
+                    // Use newest value we've seen today
+                    if info.get(&info_type).unwrap() < count_per_country.get(country).unwrap() {
+                        info.insert(BridgeInfoType::BridgeIps, *count_per_country.get(&country.to_string()).unwrap());
+                    }
+                } else {
+                    let new_count = info.get(&info_type).unwrap() + *count_per_country.get(&country.to_string()).unwrap();
+                    info.insert(info_type, new_count);
+                }
+            } else {
+                let mut info = BTreeMap::<BridgeInfoType, u32>::new();
+                info.insert(info_type, *count_per_country.get(&country.to_string()).unwrap());
+                self.info_by_country.insert(country.to_string(), info);
+            }
         }
     }
 }
 
 impl fmt::Display for DailyBridgeInfo {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let mut str = String::from("bridge_ips:");
-        for country in self.bridge_ips.keys() {
-            str.push_str(
-                format!(
-                    "\n  cc: {}, connections: {}",
-                    country,
-                    self.bridge_ips.get(country).unwrap()
-                )
-                .as_str(),
-            );
+        let mut str = String::from("info:");
+        for country in self.info_by_country.keys() {
+            let info = self.info_by_country.get(country).unwrap();
+            let ip_count = match info.get(&BridgeInfoType::BridgeIps) {
+                Some(v) => v,
+                None => &0,
+            };
+            let nr_count = match info.get(&BridgeInfoType::NegativeReports) {
+                Some(v) => v,
+                None => &0,
+            };
+            let pr_count = match info.get(&BridgeInfoType::PositiveReports) {
+                Some(v) => v,
+                None => &0,
+            };
+            if ip_count > &0 || nr_count > &0 || pr_count > &0 {
+                str.push_str(
+                    format!(
+                        "\n  cc: {}\n    connections: {}\n    negative reports: {}\n    positive reports: {}",
+                        country,
+                        ip_count,
+                        nr_count,
+                        pr_count,
+                    )
+                    .as_str(),
+                );
+            }
         }
         write!(f, "{}", str)
     }
@@ -162,45 +192,25 @@ pub fn add_extra_info_to_db(db: &Db, extra_info: ExtraInfo) {
     let fingerprint = extra_info.fingerprint;
     let mut bridge_info = match db.get(&fingerprint).unwrap() {
         Some(v) => bincode::deserialize(&v).unwrap(),
-        None => BridgeInfo::new(fingerprint, extra_info.nickname),
+        None => BridgeInfo::new(fingerprint, &extra_info.nickname),
     };
     // If we already have an entry, compare it with the new one. For each
     // country:count mapping, use the greater of the two counts.
-    if bridge_info.info_by_day.contains_key(&extra_info.published) {
+    if bridge_info.info_by_day.contains_key(&extra_info.date) {
         let daily_bridge_info = bridge_info
             .info_by_day
-            .get_mut(&extra_info.published)
+            .get_mut(&extra_info.date)
             .unwrap();
-        if extra_info.bridge_ips != daily_bridge_info.bridge_ips {
-            for country in extra_info.bridge_ips.keys() {
-                if daily_bridge_info.bridge_ips.contains_key(country) {
-                    // Use greatest value we've seen today
-                    if daily_bridge_info.bridge_ips.get(country).unwrap()
-                        < extra_info.bridge_ips.get(country).unwrap()
-                    {
-                        daily_bridge_info.bridge_ips.insert(
-                            country.to_string(),
-                            *extra_info.bridge_ips.get(country).unwrap(),
-                        );
-                    }
-                } else {
-                    daily_bridge_info.bridge_ips.insert(
-                        country.to_string(),
-                        *extra_info.bridge_ips.get(country).unwrap(),
-                    );
-                }
-            }
-        }
+        daily_bridge_info.add_info(BridgeInfoType::BridgeIps, &extra_info.bridge_ips);
     } else {
         // No existing entry; make a new one.
-        let daily_bridge_info = DailyBridgeInfo {
-            bridge_ips: extra_info.bridge_ips,
-            negative_reports: BTreeMap::<SerializableNegativeReport, u32>::new(),
-            positive_reports: Vec::<SerializablePositiveReport>::new(),
+        let mut daily_bridge_info = DailyBridgeInfo {
+            info_by_country: BTreeMap::<String, BTreeMap::<BridgeInfoType, u32>>::new(),
         };
+        daily_bridge_info.add_info(BridgeInfoType::BridgeIps, &extra_info.bridge_ips);
         bridge_info
             .info_by_day
-            .insert(extra_info.published, daily_bridge_info);
+            .insert(extra_info.date, daily_bridge_info);
     }
     // Commit changes to database
     db.insert(fingerprint, bincode::serialize(&bridge_info).unwrap())