#!/bin/bash filename="${1%.tar.xz}" if [ ! -f "data/bridge_data/${filename}_processed.tar.xz" ]; then # Clean up any files from past runs rm -rf "data/bridge_data/${filename}" echo "Extracting ${filename}.tar.xz" mkdir -p data/bridge_data/"$filename"/bridge_data && \ cp data/"${filename}.tar.xz" data/bridge_data/"$filename"/ && \ cd data/bridge_data/"$filename"/ tar xf "${filename}.tar.xz" || exit 1 echo "Processing ${filename}" for i in "${filename}"/*; do for j in "${i}"/*; do for k in "${j}"/*; do if [[ -s "${j}" ]]; then fingerprint=$(grep -Po '(?<=^extra-info )(.*)(?=$)' "${k}" | grep -Po '(?<= )(.*)(?=$)') date=$(grep -Po '(?<=^published )(.*)(?= )' "${k}") # Convert to Julian date, thanks to # https://stackoverflow.com/a/43318209 date_julian=$(( $(date +%s -d "${date}") / 86400 + 2440587 )) count=$(grep -Po '(?<=^bridge-ips )(.*)(?=$)' "${k}" | grep -Po '(?<=by=)(.*?)(?=(,|$))') if [ -z "$count" ]; then count=0 fi if [[ -n "${date_julian}" && -n "${fingerprint}" ]]; then echo "${date_julian},${count}" >> bridge_data/"${fingerprint}" else echo "Error for ${l}" echo " fingerprint: ${fingerprint}" echo " date: ${date_julian}" echo " count: ${count}" fi fi done done done echo "Finished processing ${filename}; saving progress" rm "${filename}.tar.xz" # Return to data/bridge_data/ cd .. tar czf "${filename}_processed.tar.xz" "${filename}"/bridge_data/* || exit 1 echo "Removing ${filename} directory to free up space" rm -r "${filename}" # Return to original directory cd ../.. else echo "Already processed ${filename}.tar.xz" fi