diff --git a/README.md b/README.md index 13c78cc..705639e 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,5 @@ +[![Pytest](https://github.com/Thomas-George-T/Ecommerce-Data-MLOps/actions/workflows/pytest.yml/badge.svg)](https://github.com/Thomas-George-T/Ecommerce-Data-MLOps/actions/workflows/pytest.yml) + + # MLOps-Ecomm Work in Progress \ No newline at end of file diff --git a/data/Online Retail.xlsx b/data/Online Retail.xlsx new file mode 100644 index 0000000..a58636c Binary files /dev/null and b/data/Online Retail.xlsx differ diff --git a/data/data.zip b/data/data.zip new file mode 100644 index 0000000..9e9b894 Binary files /dev/null and b/data/data.zip differ diff --git a/requirements.txt b/requirements.txt index 0f731fa..0223d96 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,6 @@ numpy pandas pytest ipykernel -mlflow \ No newline at end of file +mlflow +requests +zipfile \ No newline at end of file diff --git a/src/__pycache__/__init__.cpython-310.pyc b/src/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..c71e53b Binary files /dev/null and b/src/__pycache__/__init__.cpython-310.pyc differ diff --git a/src/__pycache__/datapipeline.cpython-310.pyc b/src/__pycache__/datapipeline.cpython-310.pyc new file mode 100644 index 0000000..e380b82 Binary files /dev/null and b/src/__pycache__/datapipeline.cpython-310.pyc differ diff --git a/src/datapipeline.py b/src/datapipeline.py new file mode 100644 index 0000000..5073ab1 --- /dev/null +++ b/src/datapipeline.py @@ -0,0 +1,34 @@ +import requests +import zipfile + +def ingest_data(): + + file_url = "https://archive.ics.uci.edu/static/public/352/online+retail.zip" + + # Send an HTTP GET request to the URL + response = requests.get(file_url) + + # Check if the request was successful (status code 200) + if response.status_code == 200: + # Save file to data + with open("data/data.zip", "wb") as file: + file.write(response.content) + print("File downloaded successfully.") + else: + print(f"Failed to download the file. Status code: {response.status_code}") + + +def unzip_file(): + zip_filename ='data/data.zip' + extract_to = 'data/' + try: + with zipfile.ZipFile(zip_filename, 'r') as zip_ref: + zip_ref.extractall(extract_to) + print(f"File {zip_filename} successfully unzipped to {extract_to}") + except Exception as e: + print(f"Failed to unzip {zip_filename}: {e}") + + +if __name__ == "__main__": + ingest_data() + unzip_file() \ No newline at end of file diff --git a/test/__pycache__/__init__.cpython-310.pyc b/test/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000..d5cfe2b Binary files /dev/null and b/test/__pycache__/__init__.cpython-310.pyc differ diff --git a/test/__pycache__/test_datapipeline.cpython-310-pytest-7.4.2.pyc b/test/__pycache__/test_datapipeline.cpython-310-pytest-7.4.2.pyc new file mode 100644 index 0000000..4e10ac8 Binary files /dev/null and b/test/__pycache__/test_datapipeline.cpython-310-pytest-7.4.2.pyc differ diff --git a/test/test_datapipeline.py b/test/test_datapipeline.py new file mode 100644 index 0000000..6bf06c7 --- /dev/null +++ b/test/test_datapipeline.py @@ -0,0 +1,32 @@ +import requests +import pytest +from src import datapipeline + +def test_ingest_data(mocker): + # arrange: + # mocked dependencies + mock_print = mocker.MagicMock(name='print') + mocker.patch('src.datapipeline.print', new=mock_print) + + # act: invoking the tested code + datapipeline.ingest_data() + + # assert: + assert 1 == mock_print.call_count + + +def test_unzip_file(mocker): + # arrange: + # mocked dependencies + mock_ZipFile = mocker.MagicMock(name='ZipFile') + mocker.patch('src.datapipeline.zipfile.ZipFile', new=mock_ZipFile) + mock_print = mocker.MagicMock(name='print') + mocker.patch('src.datapipeline.print', new=mock_print) + mock_Exception = mocker.MagicMock(name='Exception') + mocker.patch('src.datapipeline.Exception', new=mock_Exception) + + # act: invoking the tested code + datapipeline.unzip_file() + + # assert: + mock_Exception.assert_not_called() \ No newline at end of file