Przeglądaj źródła

Use -s for sequential; clean up

Vecna 1 tydzień temu
rodzic
commit
c305a1c822

+ 12 - 19
run.sh

@@ -16,13 +16,13 @@ if [ "$?" != 0 ]; then
 fi
 
 fast=false
-parallel=false
+sequential=false
 
 while [ -n "$1" ]; do
     if [ "$1" == "--fast" ]; then
         fast=true
-    elif [ "$1" == "-p" ]; then
-        parallel=true
+    elif [ "$1" == "-s" ]; then
+        sequential=true
     fi
     shift
 done
@@ -30,28 +30,21 @@ done
 # Get bridge data
 if [ "$fast" == "true" ]; then
     echo "Repacking some pre-processed data for the next step..."
-    # We want data/bridge_data/pre_processed/bridge_data/
-    cd data && \
-        rm -rf bridge_data && \
-        tar xzf bridge_data.tar.gz && \
-        mkdir processed && \
-        mv bridge_data processed && \
-        tar czf pre_processed.tar.xz processed/* && \
-        rm -r processed && \
-        mkdir -p bridge_data && \
-        mv pre_processed.tar.xz bridge_data && \
-        cd ..
+    ./scripts/repack-preprocessed-data.sh
 else
-    if [ "$parallel" == "true" ]; then
-        echo "Downloading and processing data from step 1 in 10 parallel steps..."
+    echo "Downloading data from the Tor Project..."
+    ./scripts/download-bridge-data.sh
+
+    if [ "$sequential" != "true" ]; then
+        echo "Processing data from step 1 in 10 parallel steps..."
         echo "This will take a while (around an hour on my device)"
         echo "and require around 20 GB of free space while running."
-        ./scripts/get-bridge-data.sh -p
+        ./scripts/get-bridge-data.sh
     else
-        echo "Downloading and processing data from step 1 sequentially..."
+        echo "Processing data from step 1 sequentially..."
         echo "This will take quite a long time (around 12.5 hours on my device)"
         echo "and require a few GB of free space while running."
-        ./scripts/get-bridge-data.sh
+        ./scripts/get-bridge-data.sh -s
     fi
 fi
 

+ 22 - 0
scripts/download-bridge-data.sh

@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Populate array of months we care about
+months=()
+# 2020
+for i in {7..12}; do
+    months+=( 2020-$(printf %02d $i) )
+done
+# 2021
+for i in {1..4}; do
+    months+=( 2021-$(printf %02d $i) )
+done
+
+# Download the archives if we don't have them already
+for i in ${months[@]}; do
+    if [ ! -f data/bridge-extra-infos-${i}.tar.xz ]; then
+        curl -Lo data/bridge-extra-infos-${i}.tar.xz https://collector.torproject.org/archive/bridge-descriptors/extra-infos/bridge-extra-infos-${i}.tar.xz || exit 1
+    fi
+done
+
+# Check that we have the right archives
+sha256sum -c data/bridge-extra-infos.sha256 || exit 1

+ 6 - 16
scripts/get-bridge-data.sh

@@ -1,33 +1,23 @@
 #!/bin/bash
 
-parallel=false
-if [ "$1" == "-p" ]; then
-    parallel=true
+sequential=false
+if [ "$1" == "-s" ]; then
+    sequential=true
 fi
 
 # Populate array of months we care about
 months=()
 # 2020
-for i in $(seq 7 12); do
+for i in {7..12}; do
     months+=( 2020-$(printf %02d $i) )
 done
 # 2021
-for i in $(seq 1 4); do
+for i in {1..4}; do
     months+=( 2021-$(printf %02d $i) )
 done
 
-# Download the archives if we don't have them already
-for i in ${months[@]}; do
-    if [ ! -f data/bridge-extra-infos-${i}.tar.xz ]; then
-        curl -Lo data/bridge-extra-infos-${i}.tar.xz https://collector.torproject.org/archive/bridge-descriptors/extra-infos/bridge-extra-infos-${i}.tar.xz || exit 1
-    fi
-done
-
-# Check that we have the right archives
-sha256sum -c data/bridge-extra-infos.sha256 || exit 1
-
 # Extract the data for each month
-if [ "$parallel" == "true" ]; then
+if [ "$sequential" != "true" ]; then
     # Do it in parallel
     for i in ${months[@]}; do
         ./scripts/extract-extra-infos-archive.sh bridge-extra-infos-${i}.tar.xz &

+ 14 - 0
scripts/repack-preprocessed-data.sh

@@ -0,0 +1,14 @@
+#!/bin/bash
+
+# We want our data in the directory:
+# data/bridge_data/pre_processed/bridge_data/
+cd data && \
+    rm -rf bridge_data && \
+    tar xzf bridge_data.tar.gz && \
+    mkdir processed && \
+    mv bridge_data processed && \
+    tar czf pre_processed.tar.xz processed/* && \
+    rm -r processed && \
+    mkdir -p bridge_data && \
+    mv pre_processed.tar.xz bridge_data && \
+    cd ..