extract-extra-infos-archive.sh 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. #!/bin/bash
  2. filename="${1%.tar.xz}"
  3. if [ ! -f "data/bridge_data/${filename}_processed.tar.xz" ]; then
  4. # Clean up any files from past runs
  5. rm -rf "data/bridge_data/${filename}"
  6. echo "Extracting ${filename}.tar.xz"
  7. mkdir -p data/bridge_data/"$filename"/bridge_data && \
  8. cp data/"${filename}.tar.xz" data/bridge_data/"$filename"/ && \
  9. cd data/bridge_data/"$filename"/
  10. tar xf "${filename}.tar.xz" || exit 1
  11. echo "Processing ${filename}"
  12. for i in "${filename}"/*; do
  13. for j in "${i}"/*; do
  14. for k in "${j}"/*; do
  15. if [[ -s "${j}" ]]; then
  16. fingerprint=$(grep -Po '(?<=^extra-info )(.*)(?=$)' "${k}" | grep -Po '(?<= )(.*)(?=$)')
  17. date=$(grep -Po '(?<=^published )(.*)(?= )' "${k}")
  18. # Convert to Julian date, thanks to
  19. # https://stackoverflow.com/a/43318209
  20. date_julian=$(( $(date +%s -d "${date}") / 86400 + 2440587 ))
  21. count=$(grep -Po '(?<=^bridge-ips )(.*)(?=$)' "${k}" | grep -Po '(?<=by=)(.*?)(?=(,|$))')
  22. if [ -z "$count" ]; then
  23. count=0
  24. fi
  25. if [[ -n "${date_julian}" && -n "${fingerprint}" ]]; then
  26. echo "${date_julian},${count}" >> bridge_data/"${fingerprint}"
  27. else
  28. echo "Error for ${l}"
  29. echo " fingerprint: ${fingerprint}"
  30. echo " date: ${date_julian}"
  31. echo " count: ${count}"
  32. fi
  33. fi
  34. done
  35. done
  36. done
  37. echo "Finished processing ${filename}; saving progress"
  38. rm "${filename}.tar.xz"
  39. # Return to data/bridge_data/
  40. cd ..
  41. tar czf "${filename}_processed.tar.xz" "${filename}"/bridge_data/* || exit 1
  42. echo "Removing ${filename} directory to free up space"
  43. rm -r "${filename}"
  44. # Return to original directory
  45. cd ../..
  46. else
  47. echo "Already processed ${filename}.tar.xz"
  48. fi