| 123456789101112131415161718192021222324252627282930313233343536373839404142 |
- #!/bin/bash
- # If we haven't already extracted our set of bridges distributed in 2021
- # Feb, do that now.
- if [ ! -f data/all-bridges ]; then
- echo "Getting list of bridges distributed in 2021 February"
- ./scripts/get-email-bridges.sh
- fi
- # Clean bridge data (sort, remove duplicates)
- if [ ! -d data/bridge_data_cleaned ]; then
- echo "Extracting checkpoints"
- cd data/bridge_data
- for i in *_processed.tar.xz; do
- echo "$i"
- tar xf "$i" || exit 1
- done
- cd ../..
- echo "Cleaning data for bridges distributed in 2021 February"
- mkdir data/bridge_data_cleaned
- while read fpr; do
- fpr=$(echo -n "$fpr" | tr '[:lower:]' '[:upper:]')
- if [ -n "$fpr" ]; then
- # If there's any data on this bridge...
- if $(find data/bridge_data/ | grep -q "$fpr"); then
- # Get only highest number of observed connections for each day
- pref="placeholder"
- cat data/bridge_data/*/bridge_data/${fpr} | \
- sort -r -n -k1.1,1.7 -k1.9 | \
- while read line; do
- if [[ "$line" != "$pref"* ]]; then
- echo "$line"
- pref="${line:0:7}"
- fi
- done | sort > data/bridge_data_cleaned/${fpr}
- else
- echo "No data/bridge_data/${fpr}"
- fi
- fi
- done < data/all-bridges
- fi
|