-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
47b744f
commit d1ba34a
Showing
2 changed files
with
24 additions
and
16 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,42 @@ | ||
import requests | ||
""" | ||
Functions to ingest and process data | ||
""" | ||
import zipfile | ||
import requests | ||
|
||
def ingest_data(): | ||
|
||
file_url = "https://archive.ics.uci.edu/static/public/352/online+retail.zip" | ||
""" | ||
Function to download file from URL | ||
""" | ||
file_url = "https://archive.ics.uci.edu/static/public/352/online+retail.zip" | ||
|
||
# Send an HTTP GET request to the URL | ||
response = requests.get(file_url) | ||
# Send an HTTP GET request to the URL | ||
response = requests.get(file_url, timeout=30) | ||
|
||
# Check if the request was successful (status code 200) | ||
if response.status_code == 200: | ||
# Save file to data | ||
with open("data/data.zip", "wb") as file: | ||
file.write(response.content) | ||
print("File downloaded successfully.") | ||
else: | ||
print(f"Failed to download the file. Status code: {response.status_code}") | ||
# Check if the request was successful (status code 200) | ||
if response.status_code == 200: | ||
# Save file to data | ||
with open("data/data.zip", "wb") as file: | ||
file.write(response.content) | ||
print("File downloaded successfully.") | ||
else: | ||
print(f"Failed to download the file. Status code: {response.status_code}") | ||
|
||
|
||
def unzip_file(): | ||
""" | ||
Function to unzip the downloaded data | ||
""" | ||
zip_filename ='data/data.zip' | ||
extract_to = 'data/' | ||
try: | ||
with zipfile.ZipFile(zip_filename, 'r') as zip_ref: | ||
zip_ref.extractall(extract_to) | ||
print(f"File {zip_filename} successfully unzipped to {extract_to}") | ||
except Exception as e: | ||
print(f"Failed to unzip {zip_filename}: {e}") | ||
except zipfile.BadZipFile: | ||
print(f"Failed to unzip {zip_filename}") | ||
|
||
|
||
if __name__ == "__main__": | ||
ingest_data() | ||
unzip_file() | ||
unzip_file() |