| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657 |
- #!/bin/bash
- filename="${1%.tar.xz}"
- if [ ! -f "data/bridge_data/${filename}_processed.tar.xz" ]; then
- # Clean up any files from past runs
- rm -rf "data/bridge_data/${filename}"
- echo "Extracting ${filename}.tar.xz"
- mkdir -p data/bridge_data/"$filename"/bridge_data && \
- cp data/"${filename}.tar.xz" data/bridge_data/"$filename"/ && \
- cd data/bridge_data/"$filename"/
- tar xf "${filename}.tar.xz" || exit 1
- echo "Processing ${filename}"
- for i in "${filename}"/*; do
- for j in "${i}"/*; do
- for k in "${j}"/*; do
- if [[ -s "${j}" ]]; then
- fingerprint=$(grep -Po '(?<=^extra-info )(.*)(?=$)' "${k}" | grep -Po '(?<= )(.*)(?=$)')
- date=$(grep -Po '(?<=^published )(.*)(?= )' "${k}")
- # Convert to Julian date, thanks to
- # https://stackoverflow.com/a/43318209
- date_julian=$(( $(date +%s -d "${date}") / 86400 + 2440587 ))
- count=$(grep -Po '(?<=^bridge-ips )(.*)(?=$)' "${k}" | grep -Po '(?<=by=)(.*?)(?=(,|$))')
- if [ -z "$count" ]; then
- count=0
- fi
- if [[ -n "${date_julian}" && -n "${fingerprint}" ]]; then
- echo "${date_julian},${count}" >> bridge_data/"${fingerprint}"
- else
- echo "Error for ${l}"
- echo " fingerprint: ${fingerprint}"
- echo " date: ${date_julian}"
- echo " count: ${count}"
- fi
- fi
- done
- done
- done
- echo "Finished processing ${filename}; saving progress"
- rm "${filename}.tar.xz"
- # Return to data/bridge_data/
- cd ..
- tar czf "${filename}_processed.tar.xz" "${filename}"/bridge_data/* || exit 1
- echo "Removing ${filename} directory to free up space"
- rm -r "${filename}"
- # Return to original directory
- cd ../..
- else
- echo "Already processed ${filename}.tar.xz"
- fi
|