#!/bin/bash # If we haven't already extracted our set of bridges distributed in 2021 # Feb, do that now. if [ ! -f data/all-bridges ]; then echo "Getting list of bridges distributed in 2021 February" ./scripts/get-email-bridges.sh fi # Clean bridge data (sort, remove duplicates) if [ ! -d data/bridge_data_cleaned ]; then echo "Cleaning data for bridges distributed in 2021 February" mkdir data/bridge_data_cleaned while read fpr; do fpr=$(echo -n "$fpr" | tr '[:lower:]' '[:upper:]') if [ -n "$fpr" ]; then if [ -f data/bridge_data/${fpr} ]; then # Get only highest number of observed connections for each day pref="placeholder" sort -r -n -k1.1,1.7 -k1.9 data/bridge_data/${fpr} \ | while read line; do if [[ "$line" != "$pref"* ]]; then echo "$line" pref="${line:0:7}" fi done | sort > data/bridge_data_cleaned/${fpr} else echo "No data/bridge_data/${fpr}" fi fi done < data/all-bridges fi