-
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5ad9a22
commit 8af9b08
Showing
6 changed files
with
77 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#!/bin/bash | ||
# Download Archived Pages feature | ||
|
||
echo "Enter your Domain Name:" | ||
read domain | ||
|
||
mkdir -p "$domain-archived-pages" | ||
cat "$domain.txt" | while read url; do | ||
wget "http://web.archive.org/web/$url" -P "$domain-archived-pages/" | ||
done | ||
|
||
echo "Download complete. Archived pages are saved in the $domain-archived-pages/ folder." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#!/bin/bash | ||
# Filter by Date Range feature | ||
# Description: Filters Wayback Machine URLs by a specific date range. | ||
|
||
echo "Enter the start date (YYYYMMDD):" | ||
read start_date | ||
echo "Enter the end date (YYYYMMDD):" | ||
read end_date | ||
|
||
echo "Enter your Domain Name:" | ||
read domain | ||
|
||
curl "http://web.archive.org/cdx/search/cdx?url=*.$domain/*&output=json&fl=original&collapse=urlkey&from=$start_date&to=$end_date" -s -k --insecure --path-as-is \ | ||
| sed 's/\["//g' \ | ||
| sed 's/"\],//g' \ | ||
| sort -u \ | ||
| tee -a "$domain-$start_date-to-$end_date.txt" | ||
|
||
echo "Crawling complete. The results are saved in $domain-$start_date-to-$end_date.txt" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/bin/bash | ||
# Filter by MIME Type feature | ||
|
||
echo "Enter your Domain Name:" | ||
read domain | ||
echo "Enter the MIME type to filter (e.g., text/html, image/jpeg):" | ||
read mime_type | ||
|
||
curl "http://web.archive.org/cdx/search/cdx?url=*.$domain/*&output=json&fl=original,mimetype&filter=!mimetype:$mime_type&collapse=urlkey" -s -k --insecure --path-as-is \ | ||
| sed 's/\["//g' \ | ||
| sed 's/"\],//g' \ | ||
| sort -u \ | ||
| tee -a "$domain_filtered_by_mime_type.txt" | ||
|
||
echo "Crawling complete. The filtered results (MIME type: $mime_type) are saved in $domain_filtered_by_mime_type.txt" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#!/bin/bash | ||
# Track URL Changes Over Time feature | ||
|
||
echo "Enter your Domain Name:" | ||
read domain | ||
echo "Enter the specific URL to track changes (e.g., /path/to/page):" | ||
read url_path | ||
|
||
curl "http://web.archive.org/cdx/search/cdx?url=$domain$url_path&output=json&fl=timestamp,original" -s -k --insecure --path-as-is \ | ||
| sed 's/\["//g' \ | ||
| sed 's/"\],//g' \ | ||
| sort -u \ | ||
| tee -a "$domain$url_path_change_log.txt" | ||
|
||
echo "Tracking complete. Changes to $url_path are saved in $domain$url_path_change_log.txt" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/bash | ||
# Verbose Mode feature | ||
|
||
echo "Enter your Domain Name:" | ||
read domain | ||
|
||
curl "http://web.archive.org/cdx/search/cdx?url=*.$domain/*&output=json&fl=original&collapse=urlkey" -v -k --insecure --path-as-is \ | ||
| sed 's/\["//g' \ | ||
| sed 's/"\],//g' \ | ||
| sort -u \ | ||
| tee -a "$domain_verbose.txt" | ||
|
||
echo "Verbose crawling complete. The results are saved in $domain_verbose.txt" |