|
@@ -3,21 +3,9 @@ Note, this is NOT a complete implementation of the document format.
|
|
|
(https://spec.torproject.org/dir-spec/extra-info-document-format.html) */
|
|
|
|
|
|
use chrono::DateTime;
|
|
|
-use http::status::StatusCode;
|
|
|
-use http_body_util::{BodyExt, Empty};
|
|
|
-use hyper::body::Bytes;
|
|
|
-use hyper_util::{client::legacy::Client, rt::TokioExecutor};
|
|
|
use julianday::JulianDay;
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
-use std::{
|
|
|
- collections::{BTreeMap, HashMap, HashSet},
|
|
|
- fs::File,
|
|
|
- io::{prelude::*, BufReader, Write},
|
|
|
- path::Path,
|
|
|
-};
|
|
|
-
|
|
|
-/// Directory where we store these files
|
|
|
-pub const DIRECTORY: &str = "extra_infos";
|
|
|
+use std::collections::{BTreeMap, HashMap, HashSet};
|
|
|
|
|
|
/// Fields we need from extra-info document
|
|
|
#[derive(Eq, PartialEq, Hash, Serialize, Deserialize)]
|
|
@@ -40,182 +28,112 @@ pub struct ExtraInfo {
|
|
|
pub bridge_ips: BTreeMap<String, u32>, // TODO: What size for count?
|
|
|
}
|
|
|
|
|
|
-fn get_extra_info_or_error(entry: &HashMap<String, String>) -> Result<ExtraInfo, String> {
|
|
|
- if !entry.contains_key("nickname") || !entry.contains_key("fingerprint") {
|
|
|
- // How did we get here??
|
|
|
- return Err("Cannot parse extra-info: Missing nickname or fingerprint".to_string());
|
|
|
- }
|
|
|
- if !(entry.contains_key("bridge-stats-end") || entry.contains_key("published"))
|
|
|
- || !entry.contains_key("bridge-ips")
|
|
|
- {
|
|
|
- // Some extra-infos are missing data on connecting IPs...
|
|
|
- // But we can't do anything in that case.
|
|
|
- return Err(format!(
|
|
|
- "Failed to parse extra-info for {} {}",
|
|
|
- entry.get("nickname").unwrap(),
|
|
|
- entry.get("fingerprint").unwrap()
|
|
|
- ));
|
|
|
- }
|
|
|
- let nickname = entry.get("nickname").unwrap().to_string();
|
|
|
- let fingerprint_str = entry.get("fingerprint").unwrap();
|
|
|
- if fingerprint_str.len() != 40 {
|
|
|
- return Err("Fingerprint must be 20 bytes".to_string());
|
|
|
- }
|
|
|
- let fingerprint = array_bytes::hex2array(fingerprint_str).unwrap();
|
|
|
- let date: u32 = {
|
|
|
- let date_str = if entry.contains_key("bridge-stats-end") {
|
|
|
- let line = entry.get("bridge-stats-end").unwrap();
|
|
|
- // Parse out (86400 s) from end of line
|
|
|
- &line[..line.find("(").unwrap() - 1]
|
|
|
- } else {
|
|
|
- entry.get("published").unwrap().as_str()
|
|
|
- };
|
|
|
- JulianDay::from(
|
|
|
- DateTime::parse_from_str(&(date_str.to_owned() + " +0000"), "%F %T %z")
|
|
|
- .unwrap()
|
|
|
- .date_naive(),
|
|
|
- )
|
|
|
- .inner()
|
|
|
- .try_into()
|
|
|
- .unwrap()
|
|
|
- };
|
|
|
- let bridge_ips_str = entry.get("bridge-ips").unwrap();
|
|
|
- let mut bridge_ips: BTreeMap<String, u32> = BTreeMap::new();
|
|
|
- let countries: Vec<&str> = bridge_ips_str.split(',').collect();
|
|
|
- for country in countries {
|
|
|
- if country != "" {
|
|
|
- // bridge-ips may be empty
|
|
|
- let (cc, count) = country.split_once('=').unwrap();
|
|
|
- bridge_ips.insert(cc.to_string(), count.parse::<u32>().unwrap());
|
|
|
+impl ExtraInfo {
|
|
|
+ /// Converts a map of keys and values into an ExtraInfo if all necessary fields
|
|
|
+ /// are represented.
|
|
|
+ fn from_map(entry: &HashMap<String, String>) -> Result<Self, String> {
|
|
|
+ if !entry.contains_key("nickname") || !entry.contains_key("fingerprint") {
|
|
|
+ // How did we get here??
|
|
|
+ return Err("Cannot parse extra-info: Missing nickname or fingerprint".to_string());
|
|
|
}
|
|
|
- }
|
|
|
-
|
|
|
- Ok(ExtraInfo {
|
|
|
- nickname,
|
|
|
- fingerprint,
|
|
|
- date,
|
|
|
- bridge_ips,
|
|
|
- })
|
|
|
-}
|
|
|
-
|
|
|
-pub fn add_extra_infos<'a>(filename: &str, set: &mut HashSet<ExtraInfo>) {
|
|
|
- let infile = File::open(format!("{}/{}", DIRECTORY, filename)).unwrap();
|
|
|
- let reader = BufReader::new(infile);
|
|
|
-
|
|
|
- let mut entry = HashMap::<String, String>::new();
|
|
|
- for line in reader.lines() {
|
|
|
- let line = line.unwrap();
|
|
|
- if line.starts_with("@type bridge-extra-info ") {
|
|
|
- if !entry.is_empty() {
|
|
|
- let extra_info = get_extra_info_or_error(&entry);
|
|
|
- if extra_info.is_ok() {
|
|
|
- set.insert(extra_info.unwrap());
|
|
|
- } else {
|
|
|
- // Just print the error and continue.
|
|
|
- println!("{}", extra_info.err().unwrap());
|
|
|
- }
|
|
|
- entry = HashMap::<String, String>::new();
|
|
|
- }
|
|
|
- } else {
|
|
|
- if line.starts_with("extra-info ") {
|
|
|
- // extra-info line has format:
|
|
|
- // extra-info <nickname> <fingerprint>
|
|
|
- let line_split: Vec<&str> = line.split(' ').collect();
|
|
|
- if line_split.len() != 3 {
|
|
|
- println!("Misformed extra-info line");
|
|
|
- } else {
|
|
|
- entry.insert("nickname".to_string(), line_split[1].to_string());
|
|
|
- entry.insert("fingerprint".to_string(), line_split[2].to_string());
|
|
|
- }
|
|
|
- } else {
|
|
|
- let (key, value) = match line.split_once(' ') {
|
|
|
- Some((k, v)) => (k, v),
|
|
|
- None => (line.as_str(), ""),
|
|
|
- };
|
|
|
- entry.insert(key.to_string(), value.to_string());
|
|
|
- }
|
|
|
+ if !(entry.contains_key("bridge-stats-end") || entry.contains_key("published"))
|
|
|
+ || !entry.contains_key("bridge-ips")
|
|
|
+ {
|
|
|
+ // Some extra-infos are missing data on connecting IPs...
|
|
|
+ // But we can't do anything in that case.
|
|
|
+ return Err(format!(
|
|
|
+ "Failed to parse extra-info for {} {}",
|
|
|
+ entry.get("nickname").unwrap(),
|
|
|
+ entry.get("fingerprint").unwrap()
|
|
|
+ ));
|
|
|
}
|
|
|
- }
|
|
|
- // Do for the last one
|
|
|
- let extra_info = get_extra_info_or_error(&entry);
|
|
|
- if extra_info.is_ok() {
|
|
|
- set.insert(extra_info.unwrap());
|
|
|
- } else {
|
|
|
- println!("{}", extra_info.err().unwrap());
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-/// Download new extra-infos files and save them in DIRECTORY. This function
|
|
|
-/// returns the set of newly downloaded filenames.
|
|
|
-pub async fn download_extra_infos(
|
|
|
- base_url: &str,
|
|
|
-) -> Result<HashSet<String>, Box<dyn std::error::Error + Send + Sync>> {
|
|
|
- // Download directory of recent extra-infos
|
|
|
- let url = base_url.parse().unwrap();
|
|
|
- let https = hyper_rustls::HttpsConnectorBuilder::new()
|
|
|
- .with_native_roots() // TODO: Pin certificate? Is this data signed/verifiable?
|
|
|
- .expect("no native root CA certificates found")
|
|
|
- .https_only()
|
|
|
- .enable_http1()
|
|
|
- .build();
|
|
|
-
|
|
|
- let client: Client<_, Empty<Bytes>> = Client::builder(TokioExecutor::new()).build(https);
|
|
|
-
|
|
|
- println!("Downloading {}", base_url);
|
|
|
- let mut res = client.get(url).await?;
|
|
|
-
|
|
|
- assert_eq!(res.status(), StatusCode::OK);
|
|
|
- let mut body_str = String::from("");
|
|
|
- while let Some(next) = res.frame().await {
|
|
|
- let frame = next?;
|
|
|
- if let Some(chunk) = frame.data_ref() {
|
|
|
- body_str.push_str(&String::from_utf8(chunk.to_vec())?);
|
|
|
+ let nickname = entry.get("nickname").unwrap().to_string();
|
|
|
+ let fingerprint_str = entry.get("fingerprint").unwrap();
|
|
|
+ if fingerprint_str.len() != 40 {
|
|
|
+ return Err("Fingerprint must be 20 bytes".to_string());
|
|
|
}
|
|
|
- }
|
|
|
-
|
|
|
- // Removed because it caused some problem...
|
|
|
- //let doc = Document::from(body_str.clone().as_str());
|
|
|
- // Instead, do this
|
|
|
- let mut links = HashSet::<String>::new();
|
|
|
- for line in body_str.lines() {
|
|
|
- let begin_match = "<a href=\"";
|
|
|
- let end_match = "\">";
|
|
|
- if line.contains(begin_match) {
|
|
|
- let link = &line[line.find(begin_match).unwrap() + begin_match.len()..];
|
|
|
- if link.contains(end_match) {
|
|
|
- let link = &link[0..link.find(end_match).unwrap()];
|
|
|
- links.insert(link.to_string());
|
|
|
+ let fingerprint = array_bytes::hex2array(fingerprint_str).unwrap();
|
|
|
+ let date: u32 = {
|
|
|
+ let date_str = if entry.contains_key("bridge-stats-end") {
|
|
|
+ let line = entry.get("bridge-stats-end").unwrap();
|
|
|
+ // Parse out (86400 s) from end of line
|
|
|
+ &line[..line.find("(").unwrap() - 1]
|
|
|
+ } else {
|
|
|
+ entry.get("published").unwrap().as_str()
|
|
|
+ };
|
|
|
+ JulianDay::from(
|
|
|
+ DateTime::parse_from_str(&(date_str.to_owned() + " +0000"), "%F %T %z")
|
|
|
+ .unwrap()
|
|
|
+ .date_naive(),
|
|
|
+ )
|
|
|
+ .inner()
|
|
|
+ .try_into()
|
|
|
+ .unwrap()
|
|
|
+ };
|
|
|
+ let bridge_ips_str = entry.get("bridge-ips").unwrap();
|
|
|
+ let mut bridge_ips: BTreeMap<String, u32> = BTreeMap::new();
|
|
|
+ let countries: Vec<&str> = bridge_ips_str.split(',').collect();
|
|
|
+ for country in countries {
|
|
|
+ if country != "" {
|
|
|
+ // bridge-ips may be empty
|
|
|
+ let (cc, count) = country.split_once('=').unwrap();
|
|
|
+ bridge_ips.insert(cc.to_string(), count.parse::<u32>().unwrap());
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- // Create extra-infos directory if it doesn't exist
|
|
|
- std::fs::create_dir_all(&DIRECTORY)?;
|
|
|
-
|
|
|
- let mut new_files = HashSet::<String>::new();
|
|
|
-
|
|
|
- // Go through all the links in the page and download new files
|
|
|
- for link in links {
|
|
|
- if link.ends_with("-extra-infos") {
|
|
|
- let filename = format!("{}/{}", DIRECTORY, link);
|
|
|
+ Ok(Self {
|
|
|
+ nickname,
|
|
|
+ fingerprint,
|
|
|
+ date,
|
|
|
+ bridge_ips,
|
|
|
+ })
|
|
|
+ }
|
|
|
|
|
|
- // Download file if it's not already downloaded
|
|
|
- if !Path::new(&filename).exists() {
|
|
|
- let extra_infos_url = format!("{}{}", base_url, link);
|
|
|
- println!("Downloading {}", extra_infos_url);
|
|
|
- let mut res = client.get(extra_infos_url.parse().unwrap()).await.unwrap();
|
|
|
- assert_eq!(res.status(), StatusCode::OK);
|
|
|
- let mut file = std::fs::File::create(filename).unwrap();
|
|
|
- while let Some(next) = res.frame().await {
|
|
|
- let frame = next?;
|
|
|
- if let Some(chunk) = frame.data_ref() {
|
|
|
- file.write_all(&chunk)?;
|
|
|
+ /// Accepts a downloaded extra-infos file as a big string, returns a set of
|
|
|
+ /// the ExtraInfos represented by the file.
|
|
|
+ pub fn parse_file<'a>(extra_info_str: &str) -> HashSet<Self> {
|
|
|
+ let mut set = HashSet::<Self>::new();
|
|
|
+ let mut entry = HashMap::<String, String>::new();
|
|
|
+ for line in extra_info_str.lines() {
|
|
|
+ let line = line;
|
|
|
+ if line.starts_with("@type bridge-extra-info ") {
|
|
|
+ if !entry.is_empty() {
|
|
|
+ let extra_info = Self::from_map(&entry);
|
|
|
+ if extra_info.is_ok() {
|
|
|
+ set.insert(extra_info.unwrap());
|
|
|
+ } else {
|
|
|
+ // Just print the error and continue.
|
|
|
+ println!("{}", extra_info.err().unwrap());
|
|
|
+ }
|
|
|
+ entry = HashMap::<String, String>::new();
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if line.starts_with("extra-info ") {
|
|
|
+ // extra-info line has format:
|
|
|
+ // extra-info <nickname> <fingerprint>
|
|
|
+ let line_split: Vec<&str> = line.split(' ').collect();
|
|
|
+ if line_split.len() != 3 {
|
|
|
+ println!("Misformed extra-info line");
|
|
|
+ } else {
|
|
|
+ entry.insert("nickname".to_string(), line_split[1].to_string());
|
|
|
+ entry.insert("fingerprint".to_string(), line_split[2].to_string());
|
|
|
}
|
|
|
+ } else {
|
|
|
+ let (key, value) = match line.split_once(' ') {
|
|
|
+ Some((k, v)) => (k, v),
|
|
|
+ None => (line, ""),
|
|
|
+ };
|
|
|
+ entry.insert(key.to_string(), value.to_string());
|
|
|
}
|
|
|
- new_files.insert(link.to_string());
|
|
|
}
|
|
|
}
|
|
|
+ // Do for the last one
|
|
|
+ let extra_info = Self::from_map(&entry);
|
|
|
+ if extra_info.is_ok() {
|
|
|
+ set.insert(extra_info.unwrap());
|
|
|
+ } else {
|
|
|
+ println!("{}", extra_info.err().unwrap());
|
|
|
+ }
|
|
|
+ set
|
|
|
}
|
|
|
-
|
|
|
- Ok(new_files)
|
|
|
}
|