From 8af9b08921269e2bb65f36397b47fab4a1b657e1 Mon Sep 17 00:00:00 2001 From: ANDRI ANDRI <47277287+lamcodeofpwnosec@users.noreply.github.com> Date: Mon, 21 Oct 2024 21:44:06 +0700 Subject: [PATCH] add new features --- README.md | 3 +++ features/download_archived.sh | 12 ++++++++++++ features/filter_by_date.sh | 19 +++++++++++++++++++ features/filter_by_mimetype.sh | 15 +++++++++++++++ features/track_changes.sh | 15 +++++++++++++++ features/verbose_mode.sh | 13 +++++++++++++ 6 files changed, 77 insertions(+) create mode 100644 features/download_archived.sh create mode 100644 features/filter_by_date.sh create mode 100644 features/filter_by_mimetype.sh create mode 100644 features/track_changes.sh create mode 100644 features/verbose_mode.sh diff --git a/README.md b/README.md index a8d1a99..e0beff7 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,9 @@ The script retrieves a list of URLs that have been archived over time, offering * `curl`, `sed`, and `tee` should be installed on the system (most Unix-based systems come with these tools by default). ## Steps to Install and Run +You can now use `./wayback.sh -help` to display the help menu and description for each feature. Each feature is now modular and easy to maintain or expand. + + 1. Clone or Download the Script ``` git clone https://github.com/lamcodeofpwnosec/Waybash.git diff --git a/features/download_archived.sh b/features/download_archived.sh new file mode 100644 index 0000000..4eadde3 --- /dev/null +++ b/features/download_archived.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# Download Archived Pages feature + +echo "Enter your Domain Name:" +read domain + +mkdir -p "$domain-archived-pages" +cat "$domain.txt" | while read url; do + wget "http://web.archive.org/web/$url" -P "$domain-archived-pages/" +done + +echo "Download complete. Archived pages are saved in the $domain-archived-pages/ folder." diff --git a/features/filter_by_date.sh b/features/filter_by_date.sh new file mode 100644 index 0000000..0421ffe --- /dev/null +++ b/features/filter_by_date.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Filter by Date Range feature +# Description: Filters Wayback Machine URLs by a specific date range. + +echo "Enter the start date (YYYYMMDD):" +read start_date +echo "Enter the end date (YYYYMMDD):" +read end_date + +echo "Enter your Domain Name:" +read domain + +curl "http://web.archive.org/cdx/search/cdx?url=*.$domain/*&output=json&fl=original&collapse=urlkey&from=$start_date&to=$end_date" -s -k --insecure --path-as-is \ + | sed 's/\["//g' \ + | sed 's/"\],//g' \ + | sort -u \ + | tee -a "$domain-$start_date-to-$end_date.txt" + +echo "Crawling complete. The results are saved in $domain-$start_date-to-$end_date.txt" diff --git a/features/filter_by_mimetype.sh b/features/filter_by_mimetype.sh new file mode 100644 index 0000000..78a78e7 --- /dev/null +++ b/features/filter_by_mimetype.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Filter by MIME Type feature + +echo "Enter your Domain Name:" +read domain +echo "Enter the MIME type to filter (e.g., text/html, image/jpeg):" +read mime_type + +curl "http://web.archive.org/cdx/search/cdx?url=*.$domain/*&output=json&fl=original,mimetype&filter=!mimetype:$mime_type&collapse=urlkey" -s -k --insecure --path-as-is \ + | sed 's/\["//g' \ + | sed 's/"\],//g' \ + | sort -u \ + | tee -a "$domain_filtered_by_mime_type.txt" + +echo "Crawling complete. The filtered results (MIME type: $mime_type) are saved in $domain_filtered_by_mime_type.txt" diff --git a/features/track_changes.sh b/features/track_changes.sh new file mode 100644 index 0000000..32df48f --- /dev/null +++ b/features/track_changes.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Track URL Changes Over Time feature + +echo "Enter your Domain Name:" +read domain +echo "Enter the specific URL to track changes (e.g., /path/to/page):" +read url_path + +curl "http://web.archive.org/cdx/search/cdx?url=$domain$url_path&output=json&fl=timestamp,original" -s -k --insecure --path-as-is \ + | sed 's/\["//g' \ + | sed 's/"\],//g' \ + | sort -u \ + | tee -a "$domain$url_path_change_log.txt" + +echo "Tracking complete. Changes to $url_path are saved in $domain$url_path_change_log.txt" diff --git a/features/verbose_mode.sh b/features/verbose_mode.sh new file mode 100644 index 0000000..c4ef414 --- /dev/null +++ b/features/verbose_mode.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# Verbose Mode feature + +echo "Enter your Domain Name:" +read domain + +curl "http://web.archive.org/cdx/search/cdx?url=*.$domain/*&output=json&fl=original&collapse=urlkey" -v -k --insecure --path-as-is \ + | sed 's/\["//g' \ + | sed 's/"\],//g' \ + | sort -u \ + | tee -a "$domain_verbose.txt" + +echo "Verbose crawling complete. The results are saved in $domain_verbose.txt"