|
@@ -1,3 +1,4 @@
|
|
|
+use rand_distr::Distribution;
|
|
|
use rayon::prelude::*;
|
|
|
use sam_extractor::*;
|
|
|
use std::collections::HashMap;
|
|
@@ -15,10 +16,21 @@ fn main() {
|
|
|
let this_program = args.next().unwrap();
|
|
|
|
|
|
if args.len() < 2 {
|
|
|
- panic!("Usage: {} stats_directory chat.json...", this_program);
|
|
|
+ panic!(
|
|
|
+ "Usage: {} [-s file_sizes.dat] stats_directory chat.json...",
|
|
|
+ this_program
|
|
|
+ );
|
|
|
}
|
|
|
|
|
|
- let dists_dir = args.next().unwrap();
|
|
|
+ let first_arg = args.next().unwrap();
|
|
|
+ let (file_sizes, dists_dir) = if first_arg != "-s" {
|
|
|
+ (None, first_arg)
|
|
|
+ } else {
|
|
|
+ (
|
|
|
+ Some(parse_weights_file(args.next().unwrap()).unwrap()),
|
|
|
+ args.next().unwrap(),
|
|
|
+ )
|
|
|
+ };
|
|
|
|
|
|
let conversations = args
|
|
|
.flat_map(|a| glob::glob(a.as_str()).unwrap())
|
|
@@ -33,9 +45,16 @@ fn main() {
|
|
|
.collect::<Vec<_>>();
|
|
|
|
|
|
let mut users: HashMap<UserId, Vec<usize>> = HashMap::new();
|
|
|
+ let mut rng = rand::thread_rng();
|
|
|
for conversation in conversations {
|
|
|
for message in conversation.messages {
|
|
|
- let message_len = bytes_to_blocks(message.char_count + message.emoji_count as i32 * 4);
|
|
|
+ let file_size = if let Some((ref dist, ref sizes)) = file_sizes {
|
|
|
+ sizes[dist.sample(&mut rng)]
|
|
|
+ } else {
|
|
|
+ 0
|
|
|
+ };
|
|
|
+ let message_len =
|
|
|
+ bytes_to_blocks(message.char_count + message.emoji_count as i32 * 4 + file_size);
|
|
|
if let Some(lens) = users.get_mut(&message.user) {
|
|
|
lens.push(message_len);
|
|
|
} else {
|