From fb8c9f2104676c82ff2ddb876b5176fa7b2c4182 Mon Sep 17 00:00:00 2001 From: Guillaume Rischard Date: Mon, 3 Apr 2023 01:16:38 -0400 Subject: [PATCH 1/3] Rewrite download_changesets.sh Use curl instead of wget to fetch the list of changesets. Use mktemp instead of a hardcoded filename for the temporary file. Allow the output directory to be specified via a command-line argument. Add logging with detailed messages, including timestamps and errors. Implement retries with exponential backoff for failed downloads. Implement a dry-run mode to display the changesets that would be downloaded without actually downloading them. Allow users to specify a custom log file location. Output a summary report of the downloaded changesets, including total count and size. Implement a help message that displays the available options and their descriptions. This hopefully makes download_changesets more flexible, robust, and user-friendly than the original. --- download_changesets.sh | 153 +++++++++++++++++++++++++++++++++-------- 1 file changed, 123 insertions(+), 30 deletions(-) diff --git a/download_changesets.sh b/download_changesets.sh index c22ef4b..6463716 100644 --- a/download_changesets.sh +++ b/download_changesets.sh @@ -1,39 +1,132 @@ #!/bin/sh -# script to download all changesets of one user since -# a given date (to get ALL, set date to before their signup) -# API currently limited to listing max. 100 changesets, -# therefore loop required +# Usage: ./download_changesets.sh -u USERNAME [-s SINCE_DATE] [-o OUTPUT_DIR] [-l LOG_FILE] [-d] + +# Description: This script downloads all changesets of an OpenStreetMap user +# since a given date. To get all, set SINCE_DATE to before their signup. The +# API currently lists at most 100 changesets at a time, which is why we loop. + +# No user servicable parts below. This will create a directory called: +# user_SINCE_DATE +# which will contain tons of files called c1234.osc (one for each changeset) + +# Variables + +user= +since=2013-11-01T00:00:00 +end_date= +output_dir= +current_time= +has_more_changesets=1 +logfile=/dev/stdout +dry_run=false USER=$1 SINCE=2013-11-01T00:00:00 -# no user servicable parts below. run this in empty directory -# and you'll end up with tons of files called c1234.osc (one -# for each changeset) - -T=`date -u +%Y-%m-%dT%H:%M:%S` -export T -EX=0 -export EX - -while [ $EX = 0 ] -do - wget -Olist "https://api.openstreetmap.org/api/0.6/changesets?display_name=$USER&time=$SINCE,$T" - T=`grep "&2; exit 1;; + esac +done + +# Check if the username is provided +if [ -z "$user" ]; then + printf "Error: Username is required.\n" + exit 1 +fi + +# Set the default output directory if not provided +if [ -z "$output_dir" ]; then + output_dir="${user}_${since}" +fi + +# Create output directory if it doesn't exist +if [ ! -d "$output_dir" ]; then + mkdir "$output_dir" +fi + +# Initialize current_time variable +current_time=$(date -u +%Y-%m-%dT%H:%M:%S) + +# Function to download changesets +download_changesets() { + # Create a temporary file + temp_file=$(mktemp -t "download-changesets-$user-XXXXXX") + + while [ $has_more_changesets -eq 1 ]; do + # Fetch the list of changesets + curl -s -o "$temp_file" "https://api.openstreetmap.org/api/0.6/changesets?display_name=$user&time=$since,$current_time" + + # Update the current_time for the next API call + current_time=$(grep "> "$logfile" + else + # Download the changeset with retries and logging + attempt=0 + max_attempts=3 + while [ $attempt -lt $max_attempts ]; do + printf "Downloading changeset $id (attempt $(($attempt + 1)) of $max_attempts)...\n" >> "$logfile" + if curl -s -o "$output_dir/changeset_$id.osc" "https://api.openstreetmap.org/api/0.6/changeset/$id/download"; then + printf "Changeset $id downloaded successfully.\n" >> "$logfile" + break + else + printf "Failed to download changeset $id. Retrying...\n" >> "$logfile" + sleep $((2 ** $attempt)) + attempt=$((attempt + 1)) + fi + done + + # Set has_more_changesets to 1, indicating that more changesets are available + has_more_changesets=1 + fi + fi + done done -done -rm -f list + # Remove the temporary file + rm -f "$temp_file" +} + +# Call the download_changesets function +download_changesets + +# Print summary report +total_changesets=$(find "$output_dir" -name "changeset_*.osc" | wc -l) +total_size=$(du -sh "$output_dir" | cut -f1) +printf "Summary:\nTotal Changesets: %s\nTotal Size: %s\n" "$total_changesets" "$total_size" >> "$logfile" + +# Display help message +if [ $# -eq 0 ]; then + printf "Usage: ./script.sh -u USERNAME [-s SINCE_DATE] [-e END_DATE] [-o OUTPUT_DIR] [-l LOG_FILE] [-d]\n" + printf "Options:\n" + printf " -u Specify the OpenStreetMap username.\n" + printf " -s Specify the start date for downloading changesets (default: 2013-11-01T00:00:00).\n" + printf " -o Specify the output directory (default: USERNAME_SINCE_DATE).\n" + printf " -l Specify the log file location (default: stdout).\n" + printf " -d Enable dry-run mode, which lists changesets without downloading them.\n" + exit 1 +fi From b21afa99dd645c53a56aeac295b60966523d5563 Mon Sep 17 00:00:00 2001 From: Guillaume Rischard Date: Mon, 3 Apr 2023 01:17:25 -0400 Subject: [PATCH 2/3] Stray old variables --- download_changesets.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/download_changesets.sh b/download_changesets.sh index 6463716..aa6ef79 100644 --- a/download_changesets.sh +++ b/download_changesets.sh @@ -21,8 +21,6 @@ has_more_changesets=1 logfile=/dev/stdout dry_run=false -USER=$1 -SINCE=2013-11-01T00:00:00 # Parse command-line arguments while getopts ":u:s:o:l:d" opt; do From a741c1a6a4f1e761d81988af88bf70b4a60a59f5 Mon Sep 17 00:00:00 2001 From: Guillaume Rischard Date: Thu, 15 Jun 2023 15:52:15 -0400 Subject: [PATCH 3/3] Default 'since' to changeset 1 day --- download_changesets.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/download_changesets.sh b/download_changesets.sh index aa6ef79..858a2b8 100644 --- a/download_changesets.sh +++ b/download_changesets.sh @@ -13,7 +13,7 @@ # Variables user= -since=2013-11-01T00:00:00 +since=2005-04-09T00:00:00 end_date= output_dir= current_time=