From 6c4354c79e51f06fe40b9ddcac2e538ba4c3d041 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mar=C3=ADa=20Limones=20Andrade?= Date: Thu, 10 Oct 2024 09:12:37 +0000 Subject: [PATCH] Feature/add filter data --- config_dev.yaml | 3 +- inesdata_mov_datasets/sources/create/aemet.py | 51 +- .../sources/create/informo.py | 5 +- inesdata_mov_datasets/sources/extract/emt.py | 7 +- .../sources/filter_data/aemet.py | 37 + .../sources/filter_data/emt.py | 104 +++ .../sources/filter_data/informo.py | 33 + inesdata_mov_datasets/utils.py | 2 +- requirements/requirements.in | 1 + requirements/requirements.txt | 22 +- requirements/requirements_dev.in | 7 +- requirements/requirements_dev.txt | 39 +- test/test_app.py | 43 +- test/test_create_aemet.py | 226 ++++++ test/test_create_emt.py | 691 ++++++++++++++++++ test/test_create_informo.py | 179 +++++ test/test_download_aemet.py | 77 ++ test/test_extract_aemet.py | 137 ++++ test/test_extract_emt.py | 488 +++++++++++++ test/test_extract_informo.py | 212 ++++++ test/test_utils.py | 448 ++++++++++++ 21 files changed, 2750 insertions(+), 62 deletions(-) create mode 100644 inesdata_mov_datasets/sources/filter_data/aemet.py create mode 100644 inesdata_mov_datasets/sources/filter_data/emt.py create mode 100644 inesdata_mov_datasets/sources/filter_data/informo.py create mode 100644 test/test_create_aemet.py create mode 100644 test/test_create_emt.py create mode 100644 test/test_create_informo.py create mode 100644 test/test_download_aemet.py create mode 100644 test/test_extract_aemet.py create mode 100644 test/test_extract_emt.py create mode 100644 test/test_extract_informo.py create mode 100644 test/test_utils.py diff --git a/config_dev.yaml b/config_dev.yaml index 259c308..89ac0fc 100644 --- a/config_dev.yaml +++ b/config_dev.yaml @@ -13,7 +13,8 @@ sources: storage: - default: minio + # default: minio + default: local config: minio: access_key : trmlia diff --git a/inesdata_mov_datasets/sources/create/aemet.py b/inesdata_mov_datasets/sources/create/aemet.py index c1670b7..a4f48a9 100644 --- a/inesdata_mov_datasets/sources/create/aemet.py +++ b/inesdata_mov_datasets/sources/create/aemet.py @@ -32,12 +32,15 @@ def download_aemet( aws_access_key_id (str): minio user aws_secret_access_key (str): minio password """ - loop = asyncio.new_event_loop() - loop.run_until_complete( - download_objs( - bucket, prefix, output_path, endpoint_url, aws_access_key_id, aws_secret_access_key + try: + loop = asyncio.new_event_loop() + loop.run_until_complete( + download_objs( + bucket, prefix, output_path, endpoint_url, aws_access_key_id, aws_secret_access_key + ) ) - ) + except Exception as e: + logger.error(e) def generate_df_from_file(content: dict, date: str) -> pd.DataFrame: @@ -99,26 +102,28 @@ def generate_day_df(storage_path: str, date: str): content = json.load(f) df = generate_df_from_file(content, date) dfs.append(df) - if len(dfs) > 0: final_df = pd.concat(dfs) - # fix hour col - final_df["periodo"] = ( - final_df["periodo"] - .str.pad(width=4, side="right", fillchar="0") - .replace(r"(\d{2})(\d+)", r"\1:\2", regex=True) - ) - # add datetime col - final_df["datetime"] = pd.to_datetime(date + " " + final_df["periodo"]) - final_df.drop(columns="periodo", inplace=True) - # sort values - final_df = final_df.sort_values(by="datetime") - # export final df - processed_storage_dir = Path(storage_path) / Path("processed") / "aemet" / date - date_formatted = date.replace("/", "") - Path(processed_storage_dir).mkdir(parents=True, exist_ok=True) - final_df.to_csv(processed_storage_dir / f"aemet_{date_formatted}.csv", index=None) - logger.info(f"Created AEMET df of shape {final_df.shape}") + if 'periodo' in final_df.columns: + # fix hour col + final_df["periodo"] = ( + final_df["periodo"] + .str.pad(width=4, side="right", fillchar="0") + .replace(r"(\d{2})(\d+)", r"\1:\2", regex=True) + ) + # add datetime col + final_df["datetime"] = pd.to_datetime(date + " " + final_df["periodo"]) + final_df.drop(columns="periodo", inplace=True) + # sort values + final_df = final_df.sort_values(by="datetime") + # export final df + processed_storage_dir = Path(storage_path) / Path("processed") / "aemet" / date + date_formatted = date.replace("/", "") + Path(processed_storage_dir).mkdir(parents=True, exist_ok=True) + final_df.to_csv(processed_storage_dir / f"aemet_{date_formatted}.csv", index=None) + logger.info(f"Created AEMET df of shape {final_df.shape}") + else: + logger.debug("There is no data to create") else: logger.debug("There is no data to create") diff --git a/inesdata_mov_datasets/sources/create/informo.py b/inesdata_mov_datasets/sources/create/informo.py index 43557c3..2d061be 100644 --- a/inesdata_mov_datasets/sources/create/informo.py +++ b/inesdata_mov_datasets/sources/create/informo.py @@ -79,8 +79,9 @@ def generate_day_df(storage_path: str, date: str): filename = raw_storage_dir / file with open(filename, "r") as f: content = json.load(f) - df = generate_df_from_file(content["pms"]) - dfs.append(df) + if "pms" in content: + df = generate_df_from_file(content["pms"]) + dfs.append(df) if len(dfs) > 0: final_df = pd.concat(dfs) diff --git a/inesdata_mov_datasets/sources/extract/emt.py b/inesdata_mov_datasets/sources/extract/emt.py index 1cf1ed7..8f56be7 100644 --- a/inesdata_mov_datasets/sources/extract/emt.py +++ b/inesdata_mov_datasets/sources/extract/emt.py @@ -44,6 +44,7 @@ async def get_calendar( ) async with session.get(calendar_url, headers=headers) as response: try: + response.raise_for_status() return await response.json() except Exception as e: logger.error("Error in calendar call to the server") @@ -73,6 +74,7 @@ async def get_line_detail( ) async with session.get(line_detail_url, headers=headers) as response: try: + response.raise_for_status() return await response.json() except Exception as e: logger.error(f"Error in line_detail call line {line_id} to the server") @@ -100,6 +102,7 @@ async def get_eta(session: aiohttp, stop_id: str, headers: json) -> json: eta_url = f"https://openapi.emtmadrid.es/v2/transport/busemtmad/stops/{stop_id}/arrives/" async with session.post(eta_url, headers=headers, json=body) as response: try: + response.raise_for_status() return await response.json() except Exception as e: logger.error(f"Error in ETA call stop {stop_id} to the server") @@ -153,7 +156,7 @@ async def login_emt(config: Settings, object_login_name: str, local_path: Path = login_dict_upload, ) - if config.storage.default == "local": + if config.storage.default == "local" and local_path: os.makedirs(local_path, exist_ok=True) with open(os.path.join(local_path, object_login_name), "w") as file: file.write(login_json_str) @@ -243,6 +246,7 @@ async def token_control(config: Settings, date_slash: str, date_day: str) -> str #Catching an error that ocurrs when the server doesnt provide the token expiration try: expiration_date_unix = data["data"][0]["tokenDteExpiration"]["$date"] + print(expiration_date_unix) except: logger.error(f"Error saving time expiration from login. Solving the problem retrying the call.") token = await login_emt(config, object_login_name, local_path=dir_path) @@ -252,6 +256,7 @@ async def token_control(config: Settings, date_slash: str, date_day: str) -> str expiration_date_unix / 1000 ) # miliseconds to seconds now = datetime.datetime.now() + print(now) # Compare the time expiration of the token withthe actual date if now >= expiration_date: # reset token token = await login_emt(config, object_login_name, local_path=dir_path) diff --git a/inesdata_mov_datasets/sources/filter_data/aemet.py b/inesdata_mov_datasets/sources/filter_data/aemet.py new file mode 100644 index 0000000..1d990da --- /dev/null +++ b/inesdata_mov_datasets/sources/filter_data/aemet.py @@ -0,0 +1,37 @@ +"""Gather raw data from aemet.""" + +import traceback + +import requests +from loguru import logger + +from inesdata_mov_datasets.settings import Settings + + +async def get_filter_aemet(config: Settings): + """Request aemet API to get data from Madrid weather. + + Args: + config (Settings): Object with the config file. + """ + try: + url_madrid = ( + "https://opendata.aemet.es/opendata/api/prediccion/especifica/municipio/horaria/28079" + ) + + headers = { + "api_key": config.sources.aemet.credentials.api_key, + "Content-Type": "application/json", + "Accept": "application/json", + } + + r = requests.get(url_madrid, headers=headers) + r_json = requests.get(r.json()["datos"]).json() + + logger.info("Extracted AEMET") + + return r_json + + except Exception as e: + logger.error(e) + logger.error(traceback.format_exc()) diff --git a/inesdata_mov_datasets/sources/filter_data/emt.py b/inesdata_mov_datasets/sources/filter_data/emt.py new file mode 100644 index 0000000..7764473 --- /dev/null +++ b/inesdata_mov_datasets/sources/filter_data/emt.py @@ -0,0 +1,104 @@ +"""Gather raw data from EMT.""" + +import asyncio +import datetime +import json +import traceback + +import aiohttp +import pytz +from loguru import logger + +from inesdata_mov_datasets.settings import Settings +from inesdata_mov_datasets.sources.extract.emt import ( + get_calendar, + token_control, +) + + +async def get_eta(session: aiohttp, stop_id: str, line_id: str, headers: json) -> json: + """Make the API call to ETA endpoint. + + Args: + session (aiohttp): Call session to make faster the calls to the same API. + stop_id (str): Id of the bus stop. + line_id (str): Id of the bus line. + headers (json): Headers of the http call. + + Returns: + json: Response of the petition in json format. + """ + body = { + "cultureInfo": "ES", + "Text_StopRequired_YN": "N", + "Text_EstimationsRequired_YN": "Y", + "Text_IncidencesRequired_YN": "N", + } + eta_url = ( + f"https://openapi.emtmadrid.es/v2/transport/busemtmad/stops/{stop_id}/arrives/{line_id}" + ) + + async with session.post(eta_url, headers=headers, json=body) as response: + try: + return await response.json() + except Exception as e: + logger.error(f"Error in ETA call stop {stop_id} to the server") + logger.error(e) + return {"code": -1} + + +async def get_filter_emt(config: Settings, stop_id: str, line_id: str): + """Get all the data from EMT endpoints. + + Args: + config (Settings): Object with the config file. + stop_id (str): The stop id. + line_id (str): The line id. + """ + try: + # Get the timezone from Madrid and formated the dates for the object_name of the files + europe_timezone = pytz.timezone("Europe/Madrid") + current_datetime = datetime.datetime.now(europe_timezone).replace(second=0) + formatted_date_day = current_datetime.strftime( + "%Y%m%d" + ) # formatted date year|month|day all together + formatted_date_slash = current_datetime.strftime( + "%Y/%m/%d" + ) # formatted date year/month/day for storage in Minio + + access_token = await token_control( + config, formatted_date_slash, formatted_date_day + ) # Obtain token from EMT + + # Headers for requests to the EMT API + headers = { + "accessToken": access_token, + "Content-Type": "application/json", + "Accept": "application/json", + } + + async with aiohttp.ClientSession() as session: + # List to store tasks asynchronously + calendar_tasks = [] + eta_tasks = [] + + # ETA + eta_task = asyncio.ensure_future(get_eta(session, stop_id, line_id, headers)) + + eta_tasks.append(eta_task) + + # Calendar + calendar_task = asyncio.ensure_future( + get_calendar(session, formatted_date_day, formatted_date_day, headers) + ) + calendar_tasks.append(calendar_task) + + eta_responses = await asyncio.gather(*eta_tasks) + calendar_responses = await asyncio.gather(*calendar_tasks) + + logger.info("Extracted EMT") + return eta_responses[0], calendar_responses[0] + + except Exception as e: + logger.error(e) + logger.error(traceback.format_exc()) diff --git a/inesdata_mov_datasets/sources/filter_data/informo.py b/inesdata_mov_datasets/sources/filter_data/informo.py new file mode 100644 index 0000000..5e71e83 --- /dev/null +++ b/inesdata_mov_datasets/sources/filter_data/informo.py @@ -0,0 +1,33 @@ +"""Gather raw data from Informo.""" + +import traceback + +import requests +import xmltodict +from loguru import logger + +from inesdata_mov_datasets.settings import Settings + + +async def get_filter_informo(config: Settings): + """Request informo API to get data from Madrid traffic. + + Args: + config (Settings): Object with the config file. + """ + try: + url_informo = "https://informo.madrid.es/informo/tmadrid/pm.xml" + + r = requests.get(url_informo) + + # Parse XML + xml_dict = xmltodict.parse(r.content) + # await save_informo(config, xml_dict) + + logger.info("Extracted INFORMO") + + return xml_dict["pms"] + + except Exception as e: + logger.error(e) + logger.error(traceback.format_exc()) diff --git a/inesdata_mov_datasets/utils.py b/inesdata_mov_datasets/utils.py index dd7608e..16f1f33 100644 --- a/inesdata_mov_datasets/utils.py +++ b/inesdata_mov_datasets/utils.py @@ -297,7 +297,7 @@ def check_local_file_exists(path_dir: Path, object_name: str) -> bool: """ # Create a Path object for the file file_path = Path(path_dir) / object_name - + print(file_path) # Check if the file exists if file_path.exists(): return True diff --git a/requirements/requirements.in b/requirements/requirements.in index 340a441..433da2d 100755 --- a/requirements/requirements.in +++ b/requirements/requirements.in @@ -6,6 +6,7 @@ pandas==2.2.0 polars==0.20.22 pyarrow==15.0.0 pydantic-settings==2.1.0 +pydantic==2.6.1 pytz==2024.1 PyYAML==6.0.1 requests == 2.31.0 diff --git a/requirements/requirements.txt b/requirements/requirements.txt index eeb310a..57bbea1 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,15 +1,17 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile requirements/requirements.in # +--trusted-host pypi.org +--trusted-host files.pythonhosted.org +--trusted-host pypi.python.org + aioboto3==12.3.0 # via -r requirements/requirements.in aiobotocore[boto3]==2.11.2 - # via - # aioboto3 - # aiobotocore + # via aioboto3 aiofiles==23.2.1 # via -r requirements/requirements.in aiohttp==3.9.3 @@ -22,6 +24,8 @@ aiosignal==1.3.1 # via aiohttp annotated-types==0.6.0 # via pydantic +async-timeout==4.0.3 + # via aiohttp attrs==23.2.0 # via aiohttp boto3==1.34.34 @@ -67,10 +71,14 @@ numpy==1.26.4 # pyarrow pandas==2.2.0 # via -r requirements/requirements.in +polars==0.20.22 + # via -r requirements/requirements.in pyarrow==15.0.0 # via -r requirements/requirements.in pydantic==2.6.1 - # via pydantic-settings + # via + # -r requirements/requirements.in + # pydantic-settings pydantic-core==2.16.2 # via pydantic pydantic-settings==2.1.0 @@ -100,9 +108,7 @@ shellingham==1.5.4 six==1.16.0 # via python-dateutil typer[all]==0.9.0 - # via - # -r requirements/requirements.in - # typer + # via -r requirements/requirements.in typing-extensions==4.9.0 # via # pydantic diff --git a/requirements/requirements_dev.in b/requirements/requirements_dev.in index a454bd4..1ee222c 100755 --- a/requirements/requirements_dev.in +++ b/requirements/requirements_dev.in @@ -4,8 +4,11 @@ black~=23.3.0 # Code formater isort==5.13.2 # Sort imports pre-commit==3.2.2 # Pre-commit hooks pydocstyle==6.3.0 # Analyze docstring conventions -pytest==8.0.0 # Unit tests -pytest-cov ==4.1.0 # Coverage +pytest==8.2.0 # Unit tests +pytest-cov ==4.1.0 +pytest-mock==3.14.0 +pytest-tornasync==0.6.0 +pytest-asyncio==0.24.0 # Coverage ruff==0.2.1 # A built-package formattwine twine==4.0.2 mkdocs==1.5.3 # Documentation diff --git a/requirements/requirements_dev.txt b/requirements/requirements_dev.txt index 90c81f9..985f83a 100644 --- a/requirements/requirements_dev.txt +++ b/requirements/requirements_dev.txt @@ -1,9 +1,13 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile requirements/requirements_dev.in # +--trusted-host pypi.org +--trusted-host files.pythonhosted.org +--trusted-host pypi.python.org + annotated-types==0.6.0 # via # -c requirements/requirements.txt @@ -48,9 +52,7 @@ comm==0.2.2 contourpy==1.2.0 # via matplotlib coverage[toml]==7.4.3 - # via - # coverage - # pytest-cov + # via pytest-cov cryptography==42.0.5 # via secretstorage cycler==0.12.1 @@ -65,6 +67,10 @@ distlib==0.3.4 # via virtualenv docutils==0.20.1 # via readme-renderer +exceptiongroup==1.2.2 + # via + # ipython + # pytest executing==2.0.1 # via stack-data filelock==3.4.2 @@ -248,7 +254,7 @@ platformdirs==2.5.0 # jupyter-core # mkdocs # virtualenv -pluggy==1.4.0 +pluggy==1.5.0 # via pytest pre-commit==3.2.2 # via -r requirements/requirements_dev.in @@ -283,12 +289,21 @@ pymdown-extensions==10.7.1 # via mkdocs-material pyparsing==3.1.2 # via matplotlib -pytest==8.0.0 +pytest==8.2.0 # via # -r requirements/requirements_dev.in + # pytest-asyncio # pytest-cov + # pytest-mock + # pytest-tornasync +pytest-asyncio==0.24.0 + # via -r requirements/requirements_dev.in pytest-cov==4.1.0 # via -r requirements/requirements_dev.in +pytest-mock==3.14.0 + # via -r requirements/requirements_dev.in +pytest-tornasync==0.6.0 + # via -r requirements/requirements_dev.in python-dateutil==2.8.2 # via # -c requirements/requirements.txt @@ -362,10 +377,16 @@ statsmodels==0.14.1 # via ydata-profiling tangled-up-in-unicode==0.2.0 # via visions +tomli==2.0.2 + # via + # black + # coverage + # pytest tornado==6.4 # via # ipykernel # jupyter-client + # pytest-tornasync tqdm==4.66.2 # via ydata-profiling traitlets==5.14.2 @@ -398,10 +419,8 @@ urllib3==2.0.7 # twine virtualenv==20.13.1 # via pre-commit -visions[type-image-path]==0.7.5 - # via - # visions - # ydata-profiling +visions[type_image_path]==0.7.5 + # via ydata-profiling watchdog==4.0.0 # via mkdocs wcwidth==0.2.13 diff --git a/test/test_app.py b/test/test_app.py index 70319dd..a1c970a 100644 --- a/test/test_app.py +++ b/test/test_app.py @@ -4,25 +4,40 @@ runner = CliRunner() - -def test_command_gather(): - result = runner.invoke(app, ["gather", "--config", "config.yaml"]) - assert result.exit_code == 0 - def test_command_create(): - # if --start-date is not provided, an error (exit_code = 2) is expected. - result = runner.invoke(app, ["create", "--config", "config.yaml"]) - assert result.exit_code == 2 - assert """Missing option '--start-date'""" in result.stdout + result = runner.invoke(app, ["create", "--config-path", "config.yaml"]) + assert result.exit_code == 1 # if --start-date does not match the format YYYYMMDD, an error (exit_code = 2) is expected. - bad_date = "20221401" - result = runner.invoke(app, ["create", "--config", "config.yaml", "--start-date", bad_date]) + bad_start_date = "20221401" + result = runner.invoke(app, ["create", "--config-path", "config.yaml", "--start-date", bad_start_date]) + assert result.exit_code == 2 + assert """Invalid value for '--start-date': '{}'""".format(bad_start_date) in result.stdout + + # if --end-date does not match the format YYYYMMDD, an error (exit_code = 2) is expected. + bad_end_date = "20221401" + result = runner.invoke(app, ["create", "--config-path", "config.yaml", "--end-date", bad_end_date]) assert result.exit_code == 2 - assert """Invalid value for '--start-date': '{}'""".format(bad_date) in result.stdout + assert """Invalid value for '--end-date': '{}'""".format(bad_end_date) in result.stdout # if --config, --start-date and --end-date are provided, no error is expected. good_date = "20220501" - result = runner.invoke(app, ["create", "--config", "config.yaml", "--start-date", good_date, "--end-date", good_date]) - assert result.exit_code == 0 \ No newline at end of file + result = runner.invoke(app, ["create", "--config-path", "config.yaml", "--start-date", good_date, "--end-date", good_date]) + assert result.exit_code == 0 + +def test_command_extract(): + # if --config-path is provided, no error is expected. + result = runner.invoke(app, ["extract", "--config-path", "config.yaml"]) + assert result.exit_code == 0 + + # if --config-path not is provided, error is expected (exit_code = 2). + result = runner.invoke(app, ["extract"]) + assert """ Missing option '--config-path'.""" in result.stdout + assert result.exit_code == 2 + + # if --sources not in [all, emt, aemet, informo], an error (exit_code = 2) is expected. + bad_source = "bad_source" + result = runner.invoke(app, ["create", "--config-path", "config.yaml", "--sources", bad_source]) + assert result.exit_code == 2 + assert """Invalid value for '--sources': '{}' is not one of 'all', 'emt',""".format(bad_source) in result.stdout diff --git a/test/test_create_aemet.py b/test/test_create_aemet.py new file mode 100644 index 0000000..8039bb0 --- /dev/null +++ b/test/test_create_aemet.py @@ -0,0 +1,226 @@ +import pytest +import pandas as pd +from pathlib import Path + +import os +from unittest.mock import patch, MagicMock, AsyncMock, mock_open +from inesdata_mov_datasets.sources.create.aemet import generate_df_from_file, download_aemet, generate_day_df, create_aemet +from inesdata_mov_datasets.settings import Settings + +import io + +import json +from datetime import datetime +import tempfile +from loguru import logger +import traceback + +###################### generate_df_from_file +# Caso exitoso +def test_generate_df_from_file_success(): + """Test de éxito para la función `generate_df_from_file`.""" + + # Simulación de contenido de entrada (dict) + mock_content = [{ + "prediccion": { + "dia": [{ + "fecha": "2024/10/07", + "orto": "07:00", + "ocaso": "19:00", + "temperatura": [{ + "periodo": 0, + "value": 15, + "descripcion": "15" + }], + "viento": [{ + "periodo": 0, + "value": 10, + "descripcion": "SO" + }] + }] + } + }] + + # Fecha para filtrar + date = "2024/10/07" + + # Ejecutar la función + result_df = generate_df_from_file(mock_content, date) + + # Comprobaciones + assert not result_df.empty + assert "temperatura_value" in result_df.columns + assert "viento_value" in result_df.columns + assert result_df["temperatura_value"].iloc[0] == 15 + assert result_df["viento_value"].iloc[0] == 10 + +# Caso con predicción vacía +def test_generate_df_from_file_empty_prediction(): + """Test cuando no hay predicción en el contenido.""" + + # Simulación de contenido con predicción vacía + mock_content = [{"prediccion": []}] + + # Fecha para filtrar + date = "2024/10/07" + + # Ejecutar la función + result_df = generate_df_from_file(mock_content, date) + + # Comprobaciones + assert result_df.empty + +# Caso de excepción +def test_generate_df_from_file_exception(): + """Test para manejar una excepción dentro de `generate_df_from_file`.""" + + # Simulación de contenido incorrecto que cause una excepción + mock_content = [{"prediccion": None}] + date = "2024/10/07" + + with patch('loguru.logger.error') as mock_logger_error: + # Ejecutar la función + result_df = generate_df_from_file(mock_content, date) + + # Comprobaciones + assert result_df.empty + + # Verificar que el error fue registrado en los logs + assert mock_logger_error.called + + + +###################### generate_day_df +@patch('inesdata_mov_datasets.sources.create.aemet.logger') +@patch('inesdata_mov_datasets.sources.create.aemet.os.listdir') +@patch('inesdata_mov_datasets.sources.create.aemet.open', new_callable=mock_open, read_data='{"data": [{"valor": 10, "periodo": "1200"}]}') +@patch('inesdata_mov_datasets.sources.create.aemet.generate_df_from_file') +def test_generate_day_df_valid_data(mock_generate_df_from_file, mock_open, mock_listdir, mock_logger): + """Test para verificar la generación de DataFrame con datos válidos.""" + # Mock listdir para devolver archivos ficticios + mock_listdir.return_value = ["file1.json", "file2.json"] + + # Simular la salida de generate_df_from_file con un DataFrame válido + mock_generate_df_from_file.return_value = pd.DataFrame( + {"periodo": ["1200"], "valor": [10], "datetime": pd.to_datetime("2024-10-01 12:00:00")} + ) + + storage_path = "/tmp" + date = "2024/10/01" + + # Ejecutar la función + generate_day_df(storage_path, date) + + # Verificar que el DataFrame fue generado y guardado correctamente + processed_file_path = Path(storage_path) / Path("processed") / "aemet" / date / f"aemet_{date.replace('/', '')}.csv" + assert processed_file_path.is_file(), "El archivo procesado no fue creado" + +@patch('inesdata_mov_datasets.sources.create.aemet.logger') +@patch('inesdata_mov_datasets.sources.create.aemet.os.listdir') +def test_generate_day_df_no_files(mock_listdir, mock_logger): + """Test para verificar el comportamiento cuando no hay archivos en el directorio.""" + # Simular que no hay archivos en el directorio + mock_listdir.return_value = [] + + storage_path = "/tmp" + date = "2024/10/02" + + # Ejecutar la función + generate_day_df(storage_path, date) + + # Verificar que no se generó ningún archivo procesado + processed_file_path = Path(storage_path) / Path("processed") / "aemet" / date / f"aemet_{date.replace('/', '')}.csv" + print(processed_file_path) + assert not processed_file_path.is_file(), "No debería haberse creado un archivo procesado" + +@patch('inesdata_mov_datasets.sources.create.aemet.logger') +@patch('inesdata_mov_datasets.sources.create.aemet.os.listdir') +@patch('builtins.open', new_callable=mock_open, read_data='{"no_data_key": [{"valor": 10}]}') +def test_generate_day_df_incorrect_structure(mock_open, mock_listdir, mock_logger): + """Test para verificar el comportamiento cuando la estructura del archivo es incorrecta.""" + # Simular que hay un archivo en el directorio, pero con una estructura incorrecta + mock_listdir.return_value = ["file1.json"] + + storage_path = "/tmp" + date = "2024/10/03" + + # Ejecutar la función + generate_day_df(storage_path, date) + + # Verificar que el logger registró un mensaje de debug indicando que no se creó un DataFrame + mock_logger.debug.assert_called_with("There is no data to create") + +###################### create_aemet +@pytest.fixture +def mock_settings(): + """Fixture para simular la configuración de settings.""" + # Crear un mock para la clase Settings + settings = MagicMock() + + # Configuración del mock + settings.storage = MagicMock() + settings.storage.config = MagicMock() + settings.storage.config.local = MagicMock() + settings.storage.config.local.path = "/tmp" + settings.storage.default = "local" + settings.storage.config.minio = MagicMock() + settings.storage.config.minio.bucket = "test-bucket" + settings.storage.config.minio.endpoint = "http://localhost:9000" + settings.storage.config.minio.access_key = "test-access-key" + settings.storage.config.minio.secret_key = "test-secret-key" + + return settings + +@patch('inesdata_mov_datasets.sources.create.aemet.instantiate_logger') +@patch('inesdata_mov_datasets.sources.create.aemet.logger.info') +@patch('inesdata_mov_datasets.sources.create.aemet.download_aemet') +@patch('inesdata_mov_datasets.sources.create.aemet.generate_day_df') +@patch('inesdata_mov_datasets.sources.create.aemet.logger.debug') +def test_create_aemet(mock_logger_debug, mock_generate_day_df, mock_download_aemet, mock_logger_info, mock_instantiate_logger, mock_settings): + """Test para la función `create_aemet`.""" + + date = "2024/10/07" + + # Ejecutar la función + create_aemet(mock_settings, date) + + # Verificar que se llamó al logger de info + mock_logger_info.assert_called_once_with(f"Creating AEMET dataset for date: {date}") + + # Verificar que el logger fue instanciado + mock_instantiate_logger.assert_called_once_with(mock_settings, "AEMET", "create") + + # Verificar que se descargaron los datos si no está en modo local + mock_download_aemet.assert_not_called() # Cambia esto si es necesario + + # Verificar que se llamó a `generate_day_df` con los argumentos correctos + mock_generate_day_df.assert_called_once_with(storage_path="/tmp", date=date) + +@patch('inesdata_mov_datasets.sources.create.aemet.instantiate_logger') +@patch('inesdata_mov_datasets.sources.create.aemet.logger.error') +@patch('inesdata_mov_datasets.sources.create.aemet.download_aemet') +@patch('inesdata_mov_datasets.sources.create.aemet.generate_day_df') +@patch('inesdata_mov_datasets.sources.create.aemet.logger.info') +def test_create_aemet_exception( + mock_logger_info, + mock_generate_day_df, + mock_settings, + mock_logger_error, + mock_instantiate_logger): + """Test para manejar excepciones en `create_aemet`.""" + + # Simular una excepción al llamar a `generate_day_df` + mock_generate_day_df.side_effect = Exception("Test error") + + date = "2024/10/07" + + # Llamar a la función + create_aemet(mock_settings, date) + # Verificar que el logger de error fue llamado dos veces + assert mock_logger_error.call_count == 2 # Verificar que se llame dos veces + + # Verificar que el logger de info fue llamado + mock_logger_info.assert_called_once_with(f"Creating AEMET dataset for date: {date}") + + # Verificar que el logger fue instanciado + mock_instantiate_logger.assert_called_once_with(mock_settings, "AEMET", "create") \ No newline at end of file diff --git a/test/test_create_emt.py b/test/test_create_emt.py new file mode 100644 index 0000000..8f08b92 --- /dev/null +++ b/test/test_create_emt.py @@ -0,0 +1,691 @@ +import pytest +import pandas as pd +import json +import os +from pathlib import Path +import tempfile +import logging +from unittest.mock import patch, mock_open, MagicMock +from pydantic import BaseModel +from inesdata_mov_datasets.sources.create.emt import generate_calendar_df_from_file, generate_calendar_day_df, create_calendar_emt, generate_line_df_from_file, generate_line_day_df, create_line_detail_emt, generate_eta_df_from_file, generate_eta_day_df, create_eta_emt, join_calendar_line_datasets, join_eta_dataset, create_emt +from inesdata_mov_datasets.settings import Settings + +###################### generate_calendar_df_from_file +def test_generate_calendar_df_from_file(): + # Caso de prueba con datos válidos + content_valid = { + "data": [ + {"date": "2024-10-01", "dayType": "workday"}, + {"date": "2024-10-02", "dayType": "holiday"} + ], + "datetime": "2024-10-01 00:00:00" + } + + # Llamar a la función con datos válidos + df_valid = generate_calendar_df_from_file(content_valid) + + # Verificaciones + assert not df_valid.empty # El DataFrame no debe estar vacío + assert df_valid.shape[0] == 2 # Debe haber 2 filas + assert "datetime" in df_valid.columns # La columna 'datetime' debe estar presente + assert "date" in df_valid.columns # La columna 'date' debe estar presente + assert pd.to_datetime(df_valid["datetime"].iloc[0]) == pd.to_datetime(content_valid["datetime"]) # Verificar datetime + assert pd.to_datetime(df_valid["date"].iloc[0]) == pd.to_datetime("2024-10-01",dayfirst=True) # Verificar la fecha + + # Caso de prueba con datos vacíos + content_empty = { + "data": [], + "datetime": "2024-10-01 00:00:00" + } + + df_empty = generate_calendar_df_from_file(content_empty) + + # Verificaciones para datos vacíos + assert df_empty.empty # El DataFrame debe estar vacío + + # Caso de prueba con datos inválidos (sin la clave "data") + content_invalid = { + "datetime": "2024-10-01 00:00:00" + } + + df_invalid = generate_calendar_df_from_file(content_invalid) + + # Verificaciones para datos inválidos + assert df_invalid.empty # El DataFrame debe estar vacío + + # Caso de prueba con datos donde ocurre una excepción + with patch('inesdata_mov_datasets.sources.create.emt.logger') as mock_logger: # Cambia 'inesdata_mov_datasets.sources.create.emt' por el nombre de tu módulo + content_error = { + "data": None, # Esto generará un error en len() + "datetime": "2024-10-01 00:00:00" + } + + df_error = generate_calendar_df_from_file(content_error) + + # Verificaciones para datos que causan error + assert df_error.empty # El DataFrame debe estar vacío + assert mock_logger.error.called # Verificar que se registró un error + +###################### generate_calendar_day_df +@pytest.fixture +def mock_file_content(): + # Simula el contenido de un archivo JSON + return [{"data": [{"date": "2024-01-10", "dayType": "working"}], "datetime": "2024-01-10"}] + +@pytest.fixture +def mock_storage_path(tmp_path): + # Usa la ruta temporal proporcionada por pytest + return str(tmp_path) + +@patch("builtins.open", new_callable=mock_open) +@patch("os.listdir") +@patch("inesdata_mov_datasets.sources.create.emt.generate_calendar_df_from_file") # Cambia 'inesdata_mov_datasets.sources.create.emt' por el nombre del módulo correcto +def test_generate_calendar_day_df(mock_generate_calendar_df_from_file, mock_listdir, mock_open, mock_storage_path, mock_file_content): + # Configura el mock para el contenido del archivo + mock_open.return_value.__enter__.return_value.read = lambda: json.dumps(mock_file_content) + + # Configura el mock para listar archivos + mock_listdir.return_value = ["file1.json", "file2.json"] + + # Configura el mock para generate_calendar_df_from_file + mock_generate_calendar_df_from_file.return_value = pd.DataFrame({ + "datetime": [mock_file_content[0]["datetime"]], + "date": ["2024-01-10"], + "dayType": ["working"] + }) + + # Llama a la función + result_df = generate_calendar_day_df(mock_storage_path, "2024/01/10") + + # Verifica que el DataFrame generado tiene la forma correcta + assert isinstance(result_df, pd.DataFrame) + assert not result_df.empty + assert result_df.shape[0] == 2 # Debería haber 2 filas + assert list(result_df["dayType"]) == ["working", "working"] # Verifica el contenido esperado + + # Verifica que las funciones de log fueron llamadas correctamente + mock_generate_calendar_df_from_file.assert_called() + mock_listdir.assert_called_once() + +###################### create_calendar_emt +@pytest.fixture +def mock_settings_create_calendar_emt(): + """Fixture para simular la configuración de settings.""" + # Crear un mock para la clase Settings + settings = MagicMock() + + # Configuración del mock + settings.storage = MagicMock() + settings.storage.config = MagicMock() + settings.storage.config.local = MagicMock() + settings.storage.config.local.path = "/tmp" + settings.storage.default = "minio" + settings.storage.config.minio = MagicMock() + settings.storage.config.minio.bucket = "test-bucket" + settings.storage.config.minio.endpoint = "http://localhost:9000" + settings.storage.config.minio.access_key = "test-access-key" + settings.storage.config.minio.secret_key = "test-secret-key" + + return settings + +@pytest.fixture +def mock_generate_calendar_day_df(): + with patch('inesdata_mov_datasets.sources.create.emt.generate_calendar_day_df', return_value=pd.DataFrame({"datetime": []})) as mock: + yield mock + +@pytest.fixture +def mock_async_download(): + with patch('inesdata_mov_datasets.sources.create.emt.async_download') as mock: + yield mock + +def test_create_calendar_emt(mock_settings_create_calendar_emt, mock_generate_calendar_day_df, mock_async_download): + date = "2024/10/01" + + # Ejecutar la función + df = create_calendar_emt(mock_settings_create_calendar_emt, date) + # Verificar que async_download se llamó correctamente + mock_async_download.assert_called_once_with( + bucket=mock_settings_create_calendar_emt.storage.config.minio.bucket, + prefix=f"raw/emt/{date}/calendar/", + output_path=mock_settings_create_calendar_emt.storage.config.local.path, + endpoint_url=mock_settings_create_calendar_emt.storage.config.minio.endpoint, + aws_access_key_id=mock_settings_create_calendar_emt.storage.config.minio.access_key, + aws_secret_access_key=mock_settings_create_calendar_emt.storage.config.minio.secret_key, + ) + + # Verificar que generate_calendar_day_df se llamó + mock_generate_calendar_day_df.assert_called_once_with(storage_path="/tmp", date=date) + + # Verificar que se devolvió un DataFrame + assert isinstance(df, pd.DataFrame) + +def test_create_calendar_emt_no_download(mock_settings_create_calendar_emt, mock_generate_calendar_day_df): + mock_settings_create_calendar_emt.storage.default = "local" # Cambia a "local" para no descargar + + date = "2024/10/01" + df = create_calendar_emt(mock_settings_create_calendar_emt, date) + + # Verificar que no se llamó a async_download + with patch('inesdata_mov_datasets.sources.create.emt.async_download') as mock_async_download: + mock_async_download.assert_not_called() + + # Verificar que generate_calendar_day_df se llamó + mock_generate_calendar_day_df.assert_called_once_with(storage_path=mock_settings_create_calendar_emt.storage.config.local.path, date=date) + assert isinstance(df, pd.DataFrame) + +###################### generate_line_df_from_file +def test_generate_line_df_from_file(): + # Simular contenido de entrada + content = { + "data": [ + { + "line": 10, + "timeTable": [ + { + "Direction2": + {"StartTime": "08:00", + "StopTime": "10:00", + "MinimunFrequency": 10, + "MaximumFrequency": 15, + "FrequencyText": "De 07:00 a 23:30 -> Cada 8 - 30min./",}, + "Direction1":{ + "StartTime": "08:00", + "StopTime": "10:00", + "MinimunFrequency": 10, + "MaximumFrequency": 15, + "FrequencyText": "De 07:00 a 23:30 -> Cada 8 - 30min./",}, + "idDayType": + "FESTIVO" + }, + ], + } + ], + "datetime": "2024-10-01T00:00:00" + } + + # Ejecutar la función + df = generate_line_df_from_file(content) + + # Verificar que el DataFrame no está vacío + assert not df.empty + + # Verificar las columnas del DataFrame + expected_columns = [ + "dayType", + "StartTime", + "StopTime", + "MinimunFrequency", + "MaximumFrequency", + "datetime", + "date", + "line", + ] + assert list(df.columns) == expected_columns + print(df) + # Verificar el contenido del DataFrame + assert df["dayType"].iloc[0] == "FESTIVO" # Correspondiente al primer elemento de Direction1 + assert df["StartTime"].iloc[0] == "08:00" + assert df["StopTime"].iloc[0] == "10:00" + assert df["MinimunFrequency"].iloc[0] == 10 + assert df["MaximumFrequency"].iloc[0] == 15 + assert pd.to_datetime(df["datetime"].iloc[0]) == pd.to_datetime("2024-10-01T00:00:00") + assert pd.to_datetime(df["date"].iloc[0]).date() == pd.to_datetime("2024-10-01").date() + assert df["line"].iloc[0] == 10 # Línea correspondiente al contenido + +def test_generate_line_df_from_file_empty_data(): + # Simular contenido de entrada con datos vacíos + content = { + "data": [], + "datetime": "2024-10-01T00:00:00" + } + + # Ejecutar la función + df = generate_line_df_from_file(content) + + # Verificar que el DataFrame está vacío + assert df.empty + +###################### generate_line_day_df +# Función simulada para generar un DataFrame a partir de contenido (mock) +def mock_generate_line_df_from_file(content): + return pd.DataFrame({ + "dayType": [content["data"][0]["timeTable"][0]["idDayType"]], + "StartTime": [content["data"][0]["timeTable"][0]['Direction1']["StartTime"]], + "StopTime": [content["data"][0]["timeTable"][0]['Direction1']["StopTime"]], + "MinimunFrequency": [content["data"][0]["timeTable"][0]['Direction1']["MinimunFrequency"]], + "MaximumFrequency": [content["data"][0]["timeTable"][0]['Direction1']["MaximumFrequency"]], + "datetime": [pd.to_datetime(content["datetime"])], + "date": [pd.to_datetime(content["datetime"]).date()], + "line": [content["data"][0]["line"]] + }) + +@patch('inesdata_mov_datasets.sources.create.emt.generate_line_df_from_file', side_effect=mock_generate_line_df_from_file) +@patch('os.listdir', return_value=['file1.json', 'file2.json']) +@patch('builtins.open', new_callable=mock_open, read_data=json.dumps({ + "data": [ + { + "line": 10, + "timeTable": [ + {"Direction1": + { + "StartTime": "08:00", + "StopTime": "10:00", + "MinimunFrequency": 10, + "MaximumFrequency": 15}, + "idDayType": 1} + ] + } + ], + "datetime": "2024-10-01T00:00:00" +})) +def test_generate_line_day_df(mock_open, mock_listdir, mock_generate_line_df_from_file): + # Establecer el path y la fecha + storage_path = "/tmp" + date = "2024/10/01" + + # Ejecutar la función + df = generate_line_day_df(storage_path, date) + + # Verificar que el DataFrame no está vacío + assert not df.empty + + # Verificar las columnas del DataFrame + expected_columns = [ + "dayType", + "StartTime", + "StopTime", + "MinimunFrequency", + "MaximumFrequency", + "datetime", + "date", + "line", + ] + assert list(df.columns) == expected_columns + + # Verificar el contenido del DataFrame + assert df["dayType"].iloc[0] == 1 + assert df["StartTime"].iloc[0] == "08:00" + assert df["StopTime"].iloc[0] == "10:00" + assert df["MinimunFrequency"].iloc[0] == 10 + assert df["MaximumFrequency"].iloc[0] == 15 + assert pd.to_datetime(df["datetime"].iloc[0]) == pd.to_datetime("2024-10-01T00:00:00") + assert pd.to_datetime(df["date"].iloc[0]).date() == pd.to_datetime("2024-10-01").date() + assert df["line"].iloc[0] == 10 + +def test_generate_line_day_df_no_files(): + # Simular la situación donde no hay archivos en el directorio + with patch('os.listdir', return_value=[]): + df = generate_line_day_df("/tmp", "2024/10/01") + assert df.empty # El DataFrame debe estar vacío + +###################### create_line_detail_emt +# Mock para el retorno de la configuración de almacenamiento +def mock_storage_settings_create_line_detail_emt(): + """Fixture para simular la configuración de settings.""" + # Crear un mock para la clase Settings + settings = MagicMock() + + # Configuración del mock + settings.storage = MagicMock() + settings.storage.config = MagicMock() + settings.storage.config.local = MagicMock() + settings.storage.config.local.path = "/tmp" + settings.storage.default = "minio" + settings.storage.config.minio = MagicMock() + settings.storage.config.minio.bucket = "test-bucket" + settings.storage.config.minio.endpoint = "http://localhost:9000" + settings.storage.config.minio.access_key = "test-access-key" + settings.storage.config.minio.secret_key = "test-secret-key" + + return settings + +# Mock de la función de descarga +@patch('inesdata_mov_datasets.sources.create.emt.async_download') +@patch('inesdata_mov_datasets.sources.create.emt.generate_line_day_df') +def test_create_line_detail_emt(mock_generate_line_day_df, mock_async_download): + # Preparar el mock para generate_line_day_df + mock_generate_line_day_df.return_value = pd.DataFrame({ + "dayType": [1], + "StartTime": ["08:00"], + "StopTime": ["10:00"], + "MinimunFrequency": [10], + "MaximumFrequency": [15], + "datetime": [pd.to_datetime("2024-10-01T00:00:00")], + "date": [pd.to_datetime("2024-10-01").date()], + "line": [10] + }) + + # Preparar la configuración de settings + settings = mock_storage_settings_create_line_detail_emt() + date = "2024/10/01" + + # Ejecutar la función + df = create_line_detail_emt(settings, date) + + # Verificar que la función de descarga fue llamada + mock_async_download.assert_called_once_with( + bucket=settings.storage.config.minio.bucket, + prefix=f"raw/emt/{date}/line_detail/", + output_path=settings.storage.config.local.path, + endpoint_url=settings.storage.config.minio.endpoint, + aws_access_key_id=settings.storage.config.minio.access_key, + aws_secret_access_key=settings.storage.config.minio.secret_key, + ) + + # Verificar que el DataFrame no está vacío + assert not df.empty + + # Verificar las columnas del DataFrame + expected_columns = [ + "dayType", + "StartTime", + "StopTime", + "MinimunFrequency", + "MaximumFrequency", + "datetime", + "date", + "line", + ] + assert list(df.columns) == expected_columns + + # Verificar el contenido del DataFrame + assert df["dayType"].iloc[0] == 1 + assert df["StartTime"].iloc[0] == "08:00" + assert df["StopTime"].iloc[0] == "10:00" + assert df["MinimunFrequency"].iloc[0] == 10 + assert df["MaximumFrequency"].iloc[0] == 15 + assert pd.to_datetime(df["datetime"].iloc[0]) == pd.to_datetime("2024-10-01T00:00:00") + assert pd.to_datetime(df["date"].iloc[0]).date() == pd.to_datetime("2024-10-01").date() + +###################### generate_eta_df_from_file +def test_generate_eta_df_from_file(): + # Simular contenido de entrada + content = { + "data": [ + { + "Arrive": [ + { + "line": 10, + "stop": "Main St", + "bus": "Bus 1", + "geometry": {"coordinates": [10.0, 20.0]}, + "DistanceBus": 5.0, + "estimateArrive": "2024-10-01T10:00:00Z" + }, + { + "line": 10, + "stop": "Second St", + "bus": "Bus 2", + "geometry": {"coordinates": [10.5, 20.5]}, + "DistanceBus": 3.0, + "estimateArrive": "2024-10-01T10:15:00Z" + } + ] + } + ], + "datetime": "2024-10-01T09:00:00Z" + } + + # Ejecutar la función + df = generate_eta_df_from_file(content) + + # Verificar que el DataFrame no está vacío + assert not df.empty + + # Verificar las columnas del DataFrame + expected_columns = ['line', 'stop', 'bus', 'DistanceBus', 'estimateArrive', 'datetime', 'date', 'positionBusLon', 'positionBusLat'] + assert list(df.columns) == expected_columns + + # Verificar el contenido del DataFrame + assert df["line"].iloc[0] == 10 + assert df["stop"].iloc[0] == "Main St" + assert df["bus"].iloc[0] == "Bus 1" + assert df["DistanceBus"].iloc[0] == 5.0 + assert pd.to_datetime(df["datetime"].iloc[0]) == pd.to_datetime("2024-10-01T09:00:00Z") + assert df["positionBusLon"].iloc[0] == 10.0 + assert df["positionBusLat"].iloc[0] == 20.0 + + # Verificar que el segundo registro tiene los valores correctos + assert df["line"].iloc[1] == 10 + assert df["stop"].iloc[1] == "Second St" + assert df["bus"].iloc[1] == "Bus 2" + assert df["DistanceBus"].iloc[1] == 3.0 + assert pd.to_datetime(df["datetime"].iloc[1]) == pd.to_datetime("2024-10-01T09:00:00Z") + assert df["positionBusLon"].iloc[1] == 10.5 + assert df["positionBusLat"].iloc[1] == 20.5 + + +###################### generate_eta_day_df +@pytest.fixture +def mock_storage_path(tmp_path): + """Fixture para un path de almacenamiento temporal.""" + return str(tmp_path) + +@pytest.mark.asyncio +def test_generate_eta_day_df_no_files(mock_storage_path): + """Test para verificar la generación del DataFrame de ETA cuando no hay archivos.""" + + with patch('os.listdir', return_value=[]): + result_df = generate_eta_day_df(mock_storage_path, "2024/10/08") + + # Verifica que el DataFrame resultante esté vacío + assert result_df.empty + +###################### create_eta_emt +@pytest.fixture +def settings_create_eta_emt(): + """Fixture para simular la configuración de settings.""" + # Crear un mock para la clase Settings + settings = MagicMock() + + # Configuración del mock + settings.storage = MagicMock() + settings.storage.config = MagicMock() + settings.storage.config.local = MagicMock() + settings.storage.config.local.path = "/tmp" + settings.storage.default = "minio" + settings.storage.config.minio = MagicMock() + settings.storage.config.minio.bucket = "test-bucket" + settings.storage.config.minio.endpoint = "http://localhost:9000" + settings.storage.config.minio.access_key = "test-access-key" + settings.storage.config.minio.secret_key = "test-secret-key" + + return settings + +# Test para la función create_eta_emt +@patch('inesdata_mov_datasets.sources.create.emt.generate_eta_day_df') +@patch('inesdata_mov_datasets.sources.create.emt.async_download') +def test_create_eta_emt(mock_async_download, mock_generate_eta_day_df, settings_create_eta_emt): + # Configura el mock para que devuelva un DataFrame simulado + mock_generate_eta_day_df.return_value = pd.DataFrame({ + "line": [10, 20], + "stop": ["Main St", "Second St"], + "bus": ["Bus 1", "Bus 2"], + "datetime": pd.to_datetime(["2024-10-01 10:00:00", "2024-10-01 10:05:00"]), + "date": pd.to_datetime(["2024-10-01", "2024-10-01"]), + "DistanceBus": [5.0, 10.0], + "positionBusLon": [10.0, 30.0], + "positionBusLat": [20.0, 40.0] + }) + + # Llamar a la función + result_df = create_eta_emt(settings_create_eta_emt, "2024-10-01") + + # Verifica que la función async_download fue llamada con los parámetros correctos + mock_async_download.assert_called_once_with( + bucket=settings_create_eta_emt.storage.config.minio.bucket, + prefix="raw/emt/2024-10-01/eta/", + output_path=settings_create_eta_emt.storage.config.local.path, + endpoint_url=settings_create_eta_emt.storage.config.minio.endpoint, + aws_access_key_id=settings_create_eta_emt.storage.config.minio.access_key, + aws_secret_access_key=settings_create_eta_emt.storage.config.minio.secret_key, + ) + + # Verifica que el DataFrame devuelto es el esperado + assert not result_df.empty + assert result_df.shape == (2, 8) # Dos filas, ocho columnas + assert list(result_df.columns) == [ + "line", "stop", "bus", "datetime", "date", "DistanceBus", "positionBusLon", "positionBusLat" + ] + +###################### join_calendar_line_datasets +def test_join_calendar_line_datasets(): + # Crear un DataFrame de ejemplo para calendar_df + calendar_data = { + "date": ["2024-10-01", "2024-10-02"], + "dayType": [1, 2], + "datetime": ["2024-10-01 08:00:00", "2024-10-02 08:00:00"] + } + calendar_df = pd.DataFrame(calendar_data) + calendar_df["date"] = pd.to_datetime(calendar_df["date"]) + calendar_df["datetime"] = pd.to_datetime(calendar_df["datetime"]) + + # Crear un DataFrame de ejemplo para line_df + line_data = { + "date": ["2024-10-01", "2024-10-02"], + "dayType": [1, 2], + "datetime": ["2024-10-01 09:00:00", "2024-10-02 09:00:00"], + "line": [101, 202], + "StartTime": ["08:00:00", "08:30:00"], + "StopTime": ["09:00:00", "09:30:00"] + } + line_df = pd.DataFrame(line_data) + line_df["date"] = pd.to_datetime(line_df["date"]) + line_df["datetime"] = pd.to_datetime(line_df["datetime"]) + + # Llamar a la función para unir los DataFrames + result_df = join_calendar_line_datasets(calendar_df, line_df) + + # Comprobar el resultado + expected_columns = ["date", "dayType", "datetime", "line", "StartTime", "StopTime"] + assert list(result_df.columns) == expected_columns, "Columnas no coinciden" + assert len(result_df) == 2, "El número de filas no es el esperado" + assert not result_df.empty, "El DataFrame resultante está vacío" + + # Verificar que las columnas datetime se unieron correctamente + assert all(result_df["datetime"] == line_df["datetime"]), "La columna datetime no se unió correctamente" + +###################### join_eta_dataset +def test_join_eta_dataset(): + # Crear un DataFrame de ejemplo para calendar_line_df + calendar_line_data = { + "line": ["101", "202", "303"], + "dayType": [1, 2, 3], + "datetime": ["2024-10-01 08:00:00", "2024-10-02 08:30:00", "2024-10-03 09:00:00"], + "date": ["2024-10-01", "2024-10-02", "2024-10-03"], + "StartTime": ["08:00:00", "08:30:00", "09:00:00"], + "StopTime": ["09:00:00", "09:30:00", "10:00:00"] + } + calendar_line_df = pd.DataFrame(calendar_line_data) + calendar_line_df["datetime"] = pd.to_datetime(calendar_line_df["datetime"]) + calendar_line_df["date"] = pd.to_datetime(calendar_line_df["date"]) + + # Crear un DataFrame de ejemplo para eta_df + eta_data = { + "line": ["101", "303", "404"], # Línea 404 no debería hacer match + "bus": [1, 2, 3], + "datetime": ["2024-10-01 08:05:00", "2024-10-03 09:10:00", "2024-10-04 09:30:00"], + "date": ["2024-10-01", "2024-10-03", "2024-10-04"], + "estimateArrive": [300, 600, 900] + } + eta_df = pd.DataFrame(eta_data) + eta_df["datetime"] = pd.to_datetime(eta_df["datetime"]) + eta_df["date"] = pd.to_datetime(eta_df["date"]) + + # Llamar a la función para unir los DataFrames + result_df = join_eta_dataset(calendar_line_df, eta_df) + + # Comprobar el resultado + expected_columns = ["line", "bus", "datetime", "date", "estimateArrive", "dayType", "StartTime", "StopTime"] + assert list(result_df.columns) == expected_columns, "Las columnas no coinciden con las esperadas" + assert len(result_df) == 3, "El número de filas no es el esperado" + + # Verificar que la fila con la línea 404 no se haya unido (debe tener valores nulos en las columnas de calendar_line_df) + assert pd.isna(result_df.loc[result_df["line"] == "404", "dayType"]).all(), "La fila con línea 404 no debería tener coincidencias" + + # Verificar que la columna `line` no tenga ceros iniciales + assert not any(result_df["line"].str.startswith("0")), "La columna 'line' tiene valores con ceros iniciales" + + # Verificar que los valores datetime y date estén correctos + assert all(result_df[result_df["line"] == "101"]["datetime"] == eta_df[eta_df["line"] == "101"]["datetime"]), "El valor de 'datetime' no coincide para la línea 101" + assert all(result_df[result_df["line"] == "303"]["datetime"] == eta_df[eta_df["line"] == "303"]["datetime"]), "El valor de 'datetime' no coincide para la línea 303" + + +###################### create_emt +@pytest.fixture +def mock_settings(): + """Fixture para simular la configuración de settings.""" + # Crear un mock para la clase Settings + settings = MagicMock() + + # Configuración del mock + settings.storage = MagicMock() + settings.storage.config = MagicMock() + settings.storage.config.local = MagicMock() + settings.storage.config.local.path = "/tmp" + settings.storage.default = "local" + settings.storage.config.minio = MagicMock() + settings.storage.config.minio.bucket = "test-bucket" + settings.storage.config.minio.endpoint = "http://localhost:9000" + settings.storage.config.minio.access_key = "test-access-key" + settings.storage.config.minio.secret_key = "test-secret-key" + + return settings + +@patch('inesdata_mov_datasets.sources.create.emt.create_calendar_emt') +@patch('inesdata_mov_datasets.sources.create.emt.create_line_detail_emt') +@patch('inesdata_mov_datasets.sources.create.emt.create_eta_emt') +@patch('inesdata_mov_datasets.sources.create.emt.join_calendar_line_datasets') +@patch('inesdata_mov_datasets.sources.create.emt.join_eta_dataset') +@patch('inesdata_mov_datasets.sources.create.emt.Path.mkdir') # Parchea la creación de directorios +@patch('inesdata_mov_datasets.sources.create.emt.pd.DataFrame.to_csv') # Parchea la exportación a CSV +@patch('inesdata_mov_datasets.sources.create.emt.instantiate_logger') # Parchea el logger +@pytest.mark.asyncio +def test_create_emt_success(mock_instantiate_logger, mock_to_csv, mock_mkdir, mock_join_eta, mock_join_calendar, mock_create_eta, mock_create_line_detail, mock_create_calendar, mock_settings): + """Test para verificar la creación exitosa del dataset EMT.""" + + # Simula DataFrames no vacíos + mock_create_calendar.return_value = MagicMock(empty=False) + mock_create_line_detail.return_value = MagicMock(empty=False) + mock_create_eta.return_value = MagicMock(empty=False) + + # Simula el DataFrame final + final_df = pd.DataFrame({ + "date": ["2024-10-08"], + "datetime": ["2024-10-08 14:30"], + "bus": ["Bus 1"], + "line": ["Line A"], + "stop": ["Stop 1"], + "positionBusLon": [1.0], + "positionBusLat": [1.0], + "positionTypeBus": ["Type 1"], + "DistanceBus": [0.5], + "destination": ["Destination 1"], + "deviation": [0], + "StartTime": ["14:00"], + "StopTime": ["15:00"], + "MinimunFrequency": [5], + "MaximumFrequency": [10], + "isHead": [True], + "dayType": ["weekday"], + "strike": [False], + "estimateArrive": ["14:30"], +}) + mock_join_calendar.return_value = MagicMock() + mock_join_eta.return_value = final_df + + # Llamar a la función + create_emt(mock_settings, "2024/10/08") + print(mock_to_csv) + # Verificar que se llama a to_csv para exportar el DataFrame + mock_to_csv.assert_called_once() + + # Verificar que los DataFrames se unieron correctamente + mock_join_calendar.assert_called_once() + mock_join_eta.assert_called_once() + + # Verificar que se creó el directorio correspondiente + mock_mkdir.assert_called_once() + + # Verificar que el logger fue llamado para iniciar la creación del dataset + mock_instantiate_logger.assert_called_once() \ No newline at end of file diff --git a/test/test_create_informo.py b/test/test_create_informo.py new file mode 100644 index 0000000..3ecd16d --- /dev/null +++ b/test/test_create_informo.py @@ -0,0 +1,179 @@ +import pytest +import pandas as pd +from unittest.mock import patch, MagicMock, mock_open +from pathlib import Path +from datetime import datetime +from inesdata_mov_datasets.sources.create.informo import generate_df_from_file, generate_day_df, create_informo + +###################### generate_df_from_file +@patch('inesdata_mov_datasets.sources.create.informo.logger') # Parchea el logger para evitar la salida real en los tests +def test_generate_df_from_file_valid_data(mock_logger): + """Test para verificar la generación de DataFrame con datos válidos.""" + content = { + "pm": [ + {"valor": 10, "otro_valor": 20}, + {"valor": 15, "otro_valor": 25} + ], + "fecha_hora": "2024-10-01 12:00:00" + } + + expected_df = pd.DataFrame(content["pm"]) + expected_df["datetime"] = pd.to_datetime(content["fecha_hora"], dayfirst=True) + expected_df["date"] = pd.to_datetime(expected_df["datetime"].dt.date) + + # Ejecutar la función + result_df = generate_df_from_file(content) + + # Verificar que el DataFrame resultante es igual al esperado + pd.testing.assert_frame_equal(result_df, expected_df) + +@patch('inesdata_mov_datasets.sources.create.informo.logger') +def test_generate_df_from_file_empty_content(mock_logger): + """Test para verificar el comportamiento con contenido vacío.""" + content = {} + + # Ejecutar la función + result_df = generate_df_from_file(content) + + # Verificar que el DataFrame resultante está vacío + assert result_df.empty, "El DataFrame no debe estar vacío para contenido vacío" + +@patch('inesdata_mov_datasets.sources.create.informo.logger') +def test_generate_df_from_file_invalid_data(mock_logger): + """Test para verificar el manejo de excepciones con datos inválidos.""" + content = { + "pm": "not a valid data structure", + "fecha_hora": "2024-10-01 12:00:00" + } + + # Ejecutar la función + result_df = generate_df_from_file(content) + + # Verificar que el DataFrame resultante está vacío y no se produjo una excepción + assert result_df.empty, "El DataFrame no debe estar vacío para datos inválidos" + + # Verificar que se registró el error + mock_logger.error.assert_called() + +@patch('inesdata_mov_datasets.sources.create.informo.logger') +def test_generate_df_from_file_no_pm_key(mock_logger): + """Test para verificar el comportamiento cuando no hay clave 'pm' en el contenido.""" + content = { + "fecha_hora": "2024-10-01 12:00:00" + } + + # Ejecutar la función + result_df = generate_df_from_file(content) + + # Verificar que el DataFrame resultante está vacío + assert result_df.empty, "El DataFrame no debe estar vacío si falta la clave 'pm'" + +###################### generate_day_df +@patch('inesdata_mov_datasets.sources.create.informo.logger') +@patch('inesdata_mov_datasets.sources.create.informo.os.listdir') +@patch('inesdata_mov_datasets.sources.create.informo.open', new_callable=mock_open, read_data='{"pms": [{"valor": 10, "fecha_hora": "2024-10-01 12:00:00"}]}') +@patch('inesdata_mov_datasets.sources.create.informo.generate_df_from_file') + +def test_generate_day_df_valid_data(mock_generate_df_from_file, mock_open, mock_listdir, mock_logger): + """Test para verificar la generación de DataFrame con datos válidos.""" + mock_listdir.return_value = ["file1.json", "file2.json"] + mock_generate_df_from_file.return_value = pd.DataFrame( + {"valor": [10], + "datetime": pd.to_datetime("2024-10-01 12:00:00")}) + + storage_path = "/tmp" + date = "2024/10/01" + + # Ejecutar la función + generate_day_df(storage_path, date) + + # Verificar que el DataFrame fue generado y guardado + processed_file_path = Path(storage_path) / Path("processed") / "informo" / date / f"informo_{date.replace('/', '')}.csv" + assert processed_file_path.is_file(), "El archivo procesado no fue creado" + +@patch('inesdata_mov_datasets.sources.create.informo.logger') +@patch('inesdata_mov_datasets.sources.create.informo.os.listdir') +def test_generate_day_df_no_files(mock_listdir, mock_logger): + """Test para verificar el comportamiento cuando no hay archivos en el directorio.""" + mock_listdir.return_value = [] + + storage_path = "/tmp" + date = "2024/10/01" + + # Ejecutar la función + generate_day_df(storage_path, date) + + # Verificar que no se generó el archivo procesado + processed_file_path = Path(storage_path) / Path("processed") / "informo" / date.replace("/", "") / f"informo_{date.replace('/', '')}.csv" + assert not processed_file_path.is_file(), "No debería haberse creado un archivo procesado" + + +@patch('inesdata_mov_datasets.sources.create.informo.logger') +@patch('inesdata_mov_datasets.sources.create.informo.os.listdir') +@patch('builtins.open', new_callable=mock_open, read_data='{"not_pms": [{"valor": 10}]}') +def test_generate_day_df_no_pms_key(mock_open, mock_listdir, mock_logger): + """Test para verificar el comportamiento cuando no hay clave 'pms' en el contenido.""" + mock_listdir.return_value = ["file1.json"] + + storage_path = "/tmp" + date = "2024/10/01" + + # Ejecutar la función + generate_day_df(storage_path, date) + + # Verificar que el logger registró un mensaje de debug + mock_logger.debug.assert_called_with("There is no data to create") + + +###################### create_informo +@pytest.fixture +def mock_settings(): + """Fixture para simular la configuración de settings.""" + settings = MagicMock() + settings.storage = MagicMock() + settings.storage.config = MagicMock() + settings.storage.config.local = MagicMock() + settings.storage.config.local.path = "/tmp" + settings.storage.default = "minio" + settings.storage.config.minio = MagicMock() + settings.storage.config.minio.bucket = "test-bucket" + settings.storage.config.minio.endpoint = "http://localhost:9000" + settings.storage.config.minio.access_key = "test-access-key" + settings.storage.config.minio.secret_key = "test-secret-key" + return settings + +@patch('inesdata_mov_datasets.sources.create.informo.instantiate_logger') # Cambia esto por el nombre real de tu módulo +@patch('inesdata_mov_datasets.sources.create.informo.download_informo') # Cambia esto por el nombre real de tu módulo +@patch('inesdata_mov_datasets.sources.create.informo.generate_day_df') # Cambia esto por el nombre real de tu módulo +@patch('inesdata_mov_datasets.sources.create.informo.logger.info') # Cambia esto por el nombre real de tu módulo +@patch('inesdata_mov_datasets.sources.create.informo.logger.debug') # Cambia esto por el nombre real de tu módulo +@patch('inesdata_mov_datasets.sources.create.informo.logger.error') # Cambia esto por el nombre real de tu módulo +def test_create_informo(mock_error, mock_debug, mock_info, mock_generate_day_df, mock_download_informo, mock_instantiate_logger, mock_settings): + """Test para verificar la creación del dataset de INFORMO.""" + date = "2024/10/01" + + # Ejecutar la función + create_informo(mock_settings, date) + + # Verificar que se llamó a instantiate_logger + mock_instantiate_logger.assert_called_once_with(mock_settings, "INFORMO", "create") + + # Verificar que se llama a logger.info + mock_info.assert_called_once_with(f"Creating INFORMO dataset for date: {date}") + + # Verificar que se llama a download_informo + mock_download_informo.assert_called_once_with( + bucket=mock_settings.storage.config.minio.bucket, + prefix=f"raw/informo/{date}/", + output_path=mock_settings.storage.config.local.path, + endpoint_url=mock_settings.storage.config.minio.endpoint, + aws_access_key_id=mock_settings.storage.config.minio.access_key, + aws_secret_access_key=mock_settings.storage.config.minio.secret_key, + ) + + # Verificar que se llama a generate_day_df + mock_generate_day_df.assert_called_once_with(storage_path=mock_settings.storage.config.local.path, date=date) + + # Verificar que se llama a logger.debug + mock_debug.assert_called() + assert any("Time duration of INFORMO dataset creation" in call[0][0] for call in mock_debug.call_args_list) \ No newline at end of file diff --git a/test/test_download_aemet.py b/test/test_download_aemet.py new file mode 100644 index 0000000..1026e10 --- /dev/null +++ b/test/test_download_aemet.py @@ -0,0 +1,77 @@ +import pytest +import asyncio +from unittest.mock import patch, AsyncMock +from inesdata_mov_datasets.sources.create.aemet import download_aemet + +###################### download_aemet +@pytest.mark.asyncio +async def test_download_aemet_success(): + """Test para verificar el comportamiento exitoso de `download_aemet`.""" + + # Parámetros de prueba + bucket = "test-bucket" + prefix = "raw-data/" + output_path = "/local/path/to/output" + endpoint_url = "http://minio.example.com" + aws_access_key_id = "test_access_key" + aws_secret_access_key = "test_secret_key" + + with patch('inesdata_mov_datasets.sources.create.aemet.download_objs', new_callable=AsyncMock) as mock_download_objs: + # Simula la respuesta de la función download_objs + mock_download_objs.return_value = asyncio.Future() + mock_download_objs.return_value.set_result(None) + + # Llamar a la función + download_aemet( + bucket, + prefix, + output_path, + endpoint_url, + aws_access_key_id, + aws_secret_access_key + ) + # Verificar que download_objs fue llamado con los parámetros correctos + mock_download_objs.assert_called_once_with( + bucket, + prefix, + output_path, + endpoint_url, + aws_access_key_id, + aws_secret_access_key + ) + +@pytest.mark.asyncio +async def test_download_aemet_exception(): + """Test para manejar excepciones en `download_aemet`.""" + + # Parámetros de prueba + bucket = "test-bucket" + prefix = "raw-data/" + output_path = "/local/path/to/output" + endpoint_url = "http://minio.example.com" + aws_access_key_id = "test_access_key" + aws_secret_access_key = "test_secret_key" + + with patch('inesdata_mov_datasets.sources.create.aemet.download_objs', side_effect=Exception("Download error")) as mock_download_objs: + # Llamar a la función y verificar que no se produzca una excepción no controlada + try: + download_aemet( + bucket, + prefix, + output_path, + endpoint_url, + aws_access_key_id, + aws_secret_access_key + ) + except Exception: + pytest.fail("download_aemet raised an unexpected exception") + + # Verificar que download_objs fue llamado + mock_download_objs.assert_called_once_with( + bucket, + prefix, + output_path, + endpoint_url, + aws_access_key_id, + aws_secret_access_key + ) \ No newline at end of file diff --git a/test/test_extract_aemet.py b/test/test_extract_aemet.py new file mode 100644 index 0000000..3ae3457 --- /dev/null +++ b/test/test_extract_aemet.py @@ -0,0 +1,137 @@ +import pytest +from unittest.mock import MagicMock, patch, AsyncMock +import requests +import datetime +from pathlib import Path +import pytz +from inesdata_mov_datasets.sources.extract.aemet import get_aemet, save_aemet # Cambia esto por el nombre real de tu módulo + +###################### get_aemet +@pytest.fixture +def mock_settings(): + """Fixture para simular la configuración de settings.""" + settings = MagicMock() + settings.sources = MagicMock() + settings.sources.aemet.credentials = MagicMock() + settings.sources.aemet.credentials.api_key = "test-api-key" + return settings + +@patch('inesdata_mov_datasets.sources.extract.aemet.instantiate_logger') # Cambia esto por el nombre real de tu módulo +@patch('inesdata_mov_datasets.sources.extract.aemet.logger.info') # Cambia esto por el nombre real de tu módulo +@patch('inesdata_mov_datasets.sources.extract.aemet.logger.debug') # Cambia esto por el nombre real de tu módulo +@patch('inesdata_mov_datasets.sources.extract.aemet.logger.error') # Cambia esto por el nombre real de tu módulo +@patch('inesdata_mov_datasets.sources.extract.aemet.requests.get') # Cambia esto por el nombre real de tu módulo +@patch('inesdata_mov_datasets.sources.extract.aemet.save_aemet') # Cambia esto por el nombre real de tu módulo +@pytest.mark.asyncio +async def test_get_aemet(mock_save_aemet, mock_requests_get, mock_error, mock_debug, mock_info, mock_instantiate_logger, mock_settings): + """Test para verificar la extracción de datos de AEMET.""" + + # Configura la respuesta simulada de requests.get + mock_requests_get.side_effect = [ + MagicMock(status_code=200, json=lambda: {"datos": "https://example.com/aemet_data.json"}), # Primera llamada para la URL de AEMET + MagicMock(status_code=200, json=lambda: {"temperature": 22}) # Segunda llamada para los datos + ] + + # Ejecutar la función + await get_aemet(mock_settings) + + # Verificar que se llama a instantiate_logger + mock_instantiate_logger.assert_called_once_with(mock_settings, "AEMET", "extract") + + # Verificar que se llama a logger.info para iniciar la extracción + mock_info.assert_any_call("Extracting AEMET") + mock_info.assert_any_call("Extracted AEMET") + + # Verificar que se llama a requests.get dos veces + assert mock_requests_get.call_count == 2 + + # Verificar que se llama a save_aemet con los datos obtenidos + mock_save_aemet.assert_called_once_with(mock_settings, {"temperature": 22}) + + # Verificar que se llama a logger.debug + mock_debug.assert_called() + +###################### save_aemet +@pytest.fixture +def mock_settings_minio(): + """Fixture para simular la configuración de settings.""" + settings = MagicMock() + settings.storage = MagicMock() + settings.storage.config = MagicMock() + settings.storage.config.minio = MagicMock() + settings.storage.config.minio.bucket = "test-bucket" + settings.storage.config.minio.endpoint = "http://localhost:9000" + settings.storage.config.minio.access_key = "test-access-key" + settings.storage.config.minio.secret_key = "test-secret-key" + settings.storage.default = "minio" # Cambia esto a "local" para otro test + return settings + +@pytest.fixture +def mock_settings_local(): + """Fixture para simular la configuración de settings con almacenamiento local.""" + settings = MagicMock() + settings.storage.default = "local" + settings.storage.config.local.path = "/tmp" + return settings + +@patch('inesdata_mov_datasets.sources.extract.aemet.upload_objs') # Cambia esto por el nombre real de tu módulo +@patch('inesdata_mov_datasets.sources.extract.aemet.check_s3_file_exists') # Cambia esto por el nombre real de tu módulo +@patch('inesdata_mov_datasets.sources.extract.aemet.logger.debug') # Cambia esto por el nombre real de tu módulo +@pytest.mark.asyncio +async def test_save_aemet_minio(mock_debug, mock_check_s3_file_exists, mock_upload_objs, mock_settings_minio): + """Test para verificar la funcionalidad de guardar datos de AEMET en MinIO.""" + mock_check_s3_file_exists.return_value = False + data = {"temperature": 22} # Datos de prueba + + # Ejecutar la función + await save_aemet(mock_settings_minio, data) + + europe_timezone = pytz.timezone("Europe/Madrid") + current_datetime = datetime.datetime.now(europe_timezone).replace(second=0) + formatted_date_day = current_datetime.strftime( + "%Y%m%d" + ) # formatted date year|month|day all together + formatted_date_slash = current_datetime.strftime( + "%Y/%m/%d" + ) + # Verificar que check_s3_file_exists fue llamado con los parámetros correctos + mock_check_s3_file_exists.assert_called_once_with( + endpoint_url=mock_settings_minio.storage.config.minio.endpoint, + aws_secret_access_key=mock_settings_minio.storage.config.minio.secret_key, + aws_access_key_id=mock_settings_minio.storage.config.minio.access_key, + bucket_name=mock_settings_minio.storage.config.minio.bucket, + object_name=f"raw/aemet/{formatted_date_slash}/aemet_{formatted_date_day}.json" # Aquí se puede especificar el objeto esperado, si es necesario + ) + + # Verificar que se llama a upload_objs + mock_upload_objs.assert_called_once() + +@patch('inesdata_mov_datasets.sources.extract.aemet.upload_objs') # Cambia esto por el nombre real de tu módulo +@patch('inesdata_mov_datasets.sources.extract.aemet.check_local_file_exists') # Cambia esto por el nombre real de tu módulo +@patch('inesdata_mov_datasets.sources.extract.aemet.logger.debug') # Cambia esto por el nombre real de tu módulo +@pytest.mark.asyncio +async def test_save_aemet_local(mock_debug, mock_check_local_file_exists, mock_upload_objs, mock_settings_local): + """Test para verificar la funcionalidad de guardar datos de AEMET localmente.""" + mock_check_local_file_exists.return_value = False # Simulamos que el archivo no existe + data = {"temperature": 22} # Datos de prueba + + # Ejecutar la función + await save_aemet(mock_settings_local, data) + europe_timezone = pytz.timezone("Europe/Madrid") + current_datetime = datetime.datetime.now(europe_timezone).replace(second=0) + formatted_date_day = current_datetime.strftime( + "%Y%m%d" + ) # formatted date year|month|day all together + formatted_date_slash = current_datetime.strftime( + "%Y/%m/%d" + ) + + # Verificar que check_local_file_exists fue llamado con los parámetros correctos + mock_check_local_file_exists.assert_called_once_with( + Path(f"/tmp/raw/aemet/{formatted_date_slash}"), + f"aemet_{formatted_date_day}.json" # Aquí se puede especificar el objeto esperado, si es necesario + ) + + # Verificar que no se llama a upload_objs, ya que se guarda localmente + mock_upload_objs.assert_not_called() + diff --git a/test/test_extract_emt.py b/test/test_extract_emt.py new file mode 100644 index 0000000..de501f4 --- /dev/null +++ b/test/test_extract_emt.py @@ -0,0 +1,488 @@ +import pytest +from aiohttp import ClientSession +from aioresponses import aioresponses +from unittest.mock import patch, AsyncMock, MagicMock +import os +import json +import datetime +from inesdata_mov_datasets.sources.extract.emt import get_calendar, get_line_detail, get_eta, login_emt, token_control, get_emt +from inesdata_mov_datasets.settings import Settings + +###################### get_calendar +@pytest.mark.asyncio +async def test_get_calendar(): + """Test para verificar la funcionalidad de get_calendar.""" + + # Definir los parámetros de entrada + start_date = "2024-10-01" + end_date = "2024-10-07" + headers = {"Authorization": "Bearer your_token"} + + # Crear una sesión de aiohttp + async with ClientSession() as session: + # Mockear la respuesta de la API + calendar_url = f"https://openapi.emtmadrid.es/v1/transport/busemtmad/calendar/{start_date}/{end_date}/" + mock_response = {"calendar": ["2024-10-01", "2024-10-02", "2024-10-03"]} + + with aioresponses() as m: + m.get(calendar_url, payload=mock_response) + + # Llamar a la función + result = await get_calendar(session, start_date, end_date, headers) + + # Verificar que la respuesta sea la esperada + assert result == mock_response + +@pytest.mark.asyncio +async def test_get_calendar_error(): + """Test para verificar el manejo de errores en get_calendar.""" + + # Definir los parámetros de entrada + start_date = "2024-10-01" + end_date = "2024-10-07" + headers = {"Authorization": "Bearer your_token"} + + # Crear una sesión de aiohttp + async with ClientSession() as session: + # Mockear la respuesta de la API con un error + calendar_url = f"https://openapi.emtmadrid.es/v1/transport/busemtmad/calendar/{start_date}/{end_date}/" + + with aioresponses() as m: + m.get(calendar_url, status=500) + + # Llamar a la función + result = await get_calendar(session, start_date, end_date, headers) + print(result) + + # Verificar que el resultado sea un error manejado + assert result == {"code": -1} + +###################### get_line_detail +@pytest.mark.asyncio +async def test_get_line_detail_success(): + """Test para verificar el comportamiento exitoso de get_line_detail.""" + + # Definir los parámetros de entrada + date = "2024-10-01" + line_id = "123" + headers = {"Authorization": "Bearer your_token"} + + # Crear una sesión de aiohttp + async with ClientSession() as session: + # Mockear la respuesta de la API + line_detail_url = f"https://openapi.emtmadrid.es/v1/transport/busemtmad/lines/{line_id}/info/{date}/" + + mock_response = {"line": line_id, "status": "operational"} # Ejemplo de respuesta exitosa + + with aioresponses() as m: + m.get(line_detail_url, payload=mock_response) # Simular respuesta exitosa + + # Llamar a la función + result = await get_line_detail(session, date, line_id, headers) + + # Verificar que el resultado sea el esperado + assert result == mock_response + +@pytest.mark.asyncio +async def test_get_line_detail_error(): + """Test para verificar el manejo de errores en get_line_detail.""" + + # Definir los parámetros de entrada + date = "2024-10-01" + line_id = "123" + headers = {"Authorization": "Bearer your_token"} + + # Crear una sesión de aiohttp + async with ClientSession() as session: + # Mockear la respuesta de la API con un error + line_detail_url = f"https://openapi.emtmadrid.es/v1/transport/busemtmad/lines/{line_id}/info/{date}/" + + with aioresponses() as m: + m.get(line_detail_url, status=500) # Simular un error del servidor + + # Llamar a la función + result = await get_line_detail(session, date, line_id, headers) + + # Verificar que el resultado sea un error manejado + assert result == {"code": -1} + +###################### get_eta +@pytest.mark.asyncio +async def test_get_eta_success(): + """Test para verificar el comportamiento exitoso de get_eta.""" + + # Definir los parámetros de entrada + stop_id = "456" + headers = {"Authorization": "Bearer your_token"} + + # Crear una sesión de aiohttp + async with ClientSession() as session: + # Mockear la respuesta de la API + eta_url = f"https://openapi.emtmadrid.es/v2/transport/busemtmad/stops/{stop_id}/arrives/" + + mock_response = { + "estimatedArrivalTimes": [ + {"line": "L1", "arrivalTime": "10:00"}, + {"line": "L2", "arrivalTime": "10:05"}, + ] + } # Ejemplo de respuesta exitosa + + with aioresponses() as m: + m.post(eta_url, payload=mock_response) # Simular respuesta exitosa + + # Llamar a la función + result = await get_eta(session, stop_id, headers) + + # Verificar que el resultado sea el esperado + assert result == mock_response + +@pytest.mark.asyncio +async def test_get_eta_error(): + """Test para verificar el manejo de errores en get_eta.""" + + # Definir los parámetros de entrada + stop_id = "456" + headers = {"Authorization": "Bearer your_token"} + + # Crear una sesión de aiohttp + async with ClientSession() as session: + # Mockear la respuesta de la API con un error + eta_url = f"https://openapi.emtmadrid.es/v2/transport/busemtmad/stops/{stop_id}/arrives/" + + with aioresponses() as m: + m.post(eta_url, status=500) # Simular un error del servidor + + # Llamar a la función + result = await get_eta(session, stop_id, headers) + + # Verificar que el resultado sea un error manejado + assert result == {"code": -1} + +###################### login_emt +@pytest.fixture +def mock_settings(): + """Fixture para simular la configuración de settings.""" + settings = MagicMock() + settings.sources.emt.credentials.x_client_id = "test_client_id" + settings.sources.emt.credentials.passkey = "test_passkey" + settings.storage.default = "local" + return settings + +@patch('inesdata_mov_datasets.sources.extract.emt.requests.get') +@patch('builtins.open', new_callable=MagicMock) +@patch('os.makedirs') +@pytest.mark.asyncio +async def test_login_emt_success(mock_makedirs, mock_open, mock_requests_get, mock_settings): + """Test para verificar el inicio de sesión exitoso en EMT.""" + + # Simula la respuesta de la API + mock_response = MagicMock() + mock_response.json.return_value = { + "data": [{"accessToken": "mock_access_token"}] + } + mock_requests_get.return_value = mock_response + + # Llama a la función + token = await login_emt(mock_settings, object_login_name="login_response.json", local_path="/test/storage/") + print(token) + # Verifica que el token retornado sea el esperado + assert token == "mock_access_token" + + # Verifica que se llamara a requests.get con los headers correctos + mock_requests_get.assert_called_once_with( + "https://openapi.emtmadrid.es/v2/mobilitylabs/user/login/", + headers={ + "X-ClientId": "test_client_id", + "passKey": "test_passkey", + "Content-Type": "application/json", + "Accept": "application/json", + }, + verify=True + ) + + # Verifica que se creara el directorio si no existe + mock_makedirs.assert_called_once_with("/test/storage/", exist_ok=True) + + # Verifica que se escribiera el archivo con la respuesta de la API + mock_open.assert_called_once_with("/test/storage/login_response.json", "w") + +@patch('inesdata_mov_datasets.sources.extract.emt.requests.get') +@pytest.mark.asyncio +async def test_login_emt_failure(mock_requests_get, mock_settings): + """Test para verificar el manejo de errores en el inicio de sesión.""" + + # Simula una respuesta de error de la API + mock_response = MagicMock() + mock_response.json.return_value = { + "data": [] + } + mock_requests_get.return_value = mock_response + + # Llama a la función + token = await login_emt(mock_settings, "login_response.json") + + # Verifica que el token retornado sea una cadena vacía + assert token == "" + + # Verifica que se llamara a requests.get una vez + mock_requests_get.assert_called_once() + +#TODO ###################### token_control +@pytest.fixture +def mock_config(): + """Fixture para simular la configuración de settings.""" + settings = MagicMock() + settings.storage.default = "local" # Cambiar a "minio" si es necesario para otros tests + settings.storage.config.local.path = "/tmp" + return settings + + +@pytest.fixture +def mock_login_emt(): + """Fixture para simular la función login_emt.""" + with patch('inesdata_mov_datasets.sources.extract.emt.login_emt', new_callable=AsyncMock) as mock: + yield mock + + +@pytest.fixture +def mock_check_local_file_exists(): + """Fixture para simular la función check_local_file_exists.""" + with patch('inesdata_mov_datasets.sources.extract.emt.check_local_file_exists') as mock: + yield mock + + +@pytest.fixture +def mock_read_obj(): + """Fixture para simular la función read_obj.""" + with patch('inesdata_mov_datasets.sources.extract.emt.read_obj', new_callable=AsyncMock) as mock: + yield mock + + +@pytest.mark.asyncio +async def test_token_control_regenerate_token(mock_config, mock_login_emt, mock_check_local_file_exists, mock_read_obj): + """Test para verificar que se regenera el token si no existe o ha caducado.""" + + # Simula que el archivo no existe + mock_check_local_file_exists.return_value = False + + # Simula el token devuelto por login_emt + mock_login_emt.return_value = "new_token" + + # Llama a la función + token = await token_control(mock_config, "2024/10/08", "20241008") + + # Verificar que se llama a login_emt + mock_login_emt.assert_called_once() + + # Verificar que el token es el esperado + assert token == "new_token" + +@pytest.fixture +def mock_config_minio(): + """Fixture para simular la configuración de settings.""" + settings = MagicMock() + settings.storage.default = "minio" # Cambiar a "minio" si es necesario para otros tests + settings.storage.config.local.path = "/tmp" + return settings + +@patch('inesdata_mov_datasets.sources.extract.emt.check_s3_file_exists') +@patch('inesdata_mov_datasets.sources.extract.emt.login_emt') +@pytest.mark.asyncio +async def test_token_control_minio_file_not_exists(mock_login_emt, mock_check_s3_file_exists, mock_config_minio): + + # Simular que el archivo de login no existe en MinIO + mock_check_s3_file_exists.return_value = False + mock_login_emt.return_value = "fake_token" + + token = await token_control(mock_config_minio, "2023-10-09", "20231009") + + assert token == "fake_token" + mock_login_emt.assert_called_once() + mock_check_s3_file_exists.assert_called_once() + + +###################### get_emt +@pytest.fixture +def mock_settings_get_emt(): + """Fixture para simular la configuración de settings.""" + settings = MagicMock() + settings.sources = MagicMock() + settings.sources.emt.lines = ["line1", "line2"] # Ejemplo de líneas + settings.sources.emt.stops = ["1", "2"] # Ejemplo de paradas + settings.storage.default = "local" # Cambia a "minio" si es necesario + settings.storage.config.local.path = "/fake/path" # Ruta ficticia para pruebas + return settings + + +@patch('inesdata_mov_datasets.sources.extract.emt.instantiate_logger') +@patch('inesdata_mov_datasets.sources.extract.emt.logger.info') +@patch('inesdata_mov_datasets.sources.extract.emt.logger.debug') +@patch('inesdata_mov_datasets.sources.extract.emt.logger.error') +@patch('inesdata_mov_datasets.sources.extract.emt.token_control') +@patch('inesdata_mov_datasets.sources.extract.emt.get_line_detail') +@patch('inesdata_mov_datasets.sources.extract.emt.get_calendar') +@patch('inesdata_mov_datasets.sources.extract.emt.get_eta') +@patch('inesdata_mov_datasets.sources.extract.emt.upload_objs') +@patch('inesdata_mov_datasets.sources.extract.emt.check_local_file_exists') +@patch('inesdata_mov_datasets.sources.extract.emt.check_s3_file_exists') +@pytest.mark.asyncio +async def test_get_emt_local(mock_check_s3_file_exists, mock_check_local_file_exists, mock_upload_objs, + mock_get_eta, mock_get_calendar, mock_get_line_detail, + mock_token_control, mock_error, mock_debug, mock_info, mock_instantiate_logger, mock_settings_get_emt): + """Test para verificar la extracción de datos de EMT.""" + + # Configura los mocks + mock_token_control.return_value = "fake_token" + mock_check_local_file_exists.return_value = False # Simula que los archivos no existen + mock_check_s3_file_exists.return_value = False # Simula que los archivos no existen en S3 + + # Simula las respuestas de los métodos asíncronos + mock_get_line_detail.return_value = {"code": "00", "data": "line_data"} + mock_get_calendar.return_value = {"code": "00", "data": "calendar_data"} + mock_get_eta.return_value = {"code": "00", "data": "eta_data"} + + # Ejecutar la función + await get_emt(mock_settings_get_emt) + + # Verificar que se llama a instantiate_logger + mock_instantiate_logger.assert_called_once_with(mock_settings_get_emt, "EMT", "extract") + + # Verificar que se llama a logger.info para iniciar la extracción + mock_info.assert_any_call("Extracting EMT") + mock_info.assert_any_call("Extracted EMT") + + # Verificar que se llama a token_control una vez + assert mock_token_control.call_count ==1 # Verifica que se llama con los argumentos correctos + + # Verificar que se llama a get_line_detail para cada línea + assert mock_get_line_detail.call_count == len(mock_settings_get_emt.sources.emt.lines) + + # Verificar que se llama a get_calendar una vez + mock_get_calendar.assert_called_once() + + + # Verificar que se llama a upload_objs al final si hay datos + mock_upload_objs.assert_not_called() + + # Verificar que se llama a logger.debug + mock_debug.assert_called() + + +@patch('inesdata_mov_datasets.sources.extract.emt.check_local_file_exists') +@patch('inesdata_mov_datasets.sources.extract.emt.get_line_detail') +@pytest.mark.asyncio +async def test_get_emt_file_exists_local(mock_check_local_file_exists, mock_get_line_detail, mock_settings_get_emt): + mock_check_local_file_exists.return_value = True # Simula que el archivo ya existe + + await get_emt(mock_settings_get_emt) + + # Verifica que no se haya llamado a la función get_line_detail + mock_get_line_detail.assert_not_called() + + +@patch('inesdata_mov_datasets.sources.extract.emt.get_line_detail') +@patch('inesdata_mov_datasets.sources.extract.emt.logger.error') +@pytest.mark.asyncio +async def test_get_emt_line_detail_error_local(mock_logger_error,mock_get_line_detail, mock_settings_get_emt): + mock_get_line_detail.return_value = {"code": "99", "error": "Line Error"} # Simula error en la respuesta + + await get_emt(mock_settings_get_emt) + + # Verifica que el error se haya registrado en los logs + assert mock_logger_error.call_count == 2 + +@patch('inesdata_mov_datasets.sources.extract.emt.get_eta') +@patch('inesdata_mov_datasets.sources.extract.emt.logger.error') +@pytest.mark.asyncio +async def test_get_emt_eta_errors_local(mock_logger_error,mock_get_eta, mock_settings_get_emt): + mock_get_eta.return_value = {"code": "99", "error": "ETA Error"} # Simula error en las paradas + + await get_emt(mock_settings_get_emt) + + assert mock_logger_error.call_count == 2 + +@patch('inesdata_mov_datasets.sources.extract.emt.get_calendar') +@patch('inesdata_mov_datasets.sources.extract.emt.logger.error') +@pytest.mark.asyncio +async def test_get_emt_calendar_errors_local(mock_logger_error,mock_get_calendar, mock_settings_get_emt): + mock_get_calendar.return_value = {"code": "99", "error": "ETA Error"} # Simula error en las paradas + + await get_emt(mock_settings_get_emt) + + assert mock_logger_error.call_count == 2 + +@pytest.fixture +async def test_get_emt_minio_success(): + config = Settings() # Instancia la configuración que se utiliza en producción. + config.storage.default = "minio" + + # Mock del response de token_control + with patch("inesdata_mov_datasets.sources.extract.emt.token_control", return_value="mocked_token") as mock_token_control: + # Mock del método de ClientSession + with patch("aiohttp.ClientSession") as MockClientSession: + session_mock = MagicMock() + MockClientSession.return_value.__aenter__.return_value = session_mock + + # Mock de los métodos relacionados con MinIO + with patch("inesdata_mov_datasets.sources.extract.emt.check_s3_file_exists", return_value=False) as mock_check_s3: + with patch("inesdata_mov_datasets.sources.extract.emt.get_line_detail", return_value={"code": "00"}) as mock_line_detail: + with patch("inesdata_mov_datasets.sources.extract.emt.get_calendar", return_value={"code": "00"}) as mock_calendar: + with patch("inesdata_mov_datasets.sources.extract.emt.get_eta", return_value={"code": "00"}) as mock_eta: + with patch("inesdata_mov_datasets.sources.extract.emt.upload_objs") as mock_upload_objs: + # Ejecución de la función a probar + await get_emt(config) + + # Verificación de que las funciones han sido llamadas correctamente + mock_token_control.assert_called_once() + session_mock.get.assert_called() # Verifica que la sesión hizo peticiones + mock_check_s3.assert_called() # Verifica que se comprobaron archivos en S3 + mock_line_detail.assert_called() # Verifica que se llamaron las líneas de detalle + mock_calendar.assert_called() # Verifica que se llamó el calendario + mock_eta.assert_called() # Verifica que se llamaron las ETAs + mock_upload_objs.assert_called() # Verifica que se subieron archivos a S3 + +@pytest.fixture +def mock_settings_get_emt_minio(): + """Fixture para simular la configuración de settings.""" + settings = MagicMock() + settings.sources = MagicMock() + settings.sources.emt.lines = ["line1", "line2"] # Ejemplo de líneas + settings.sources.emt.stops = ["1", "2"] # Ejemplo de paradas + settings.storage.default = "minio" # Cambia a "minio" si es necesario + settings.storage.config.minio.endpoint = "http://localhost:9000" + settings.storage.config.minio.access_key = "minio_access_key" + settings.storage.config.minio.secret_key = "minio_secret_key" + settings.storage.config.minio.bucket = "emt_bucket" + # Ruta ficticia para pruebas + return settings + +@pytest.mark.asyncio +async def test_get_emt_minio_error_handling(mock_settings_get_emt_minio): + + with patch("inesdata_mov_datasets.sources.extract.emt.token_control", return_value="mocked_token"): + with patch("aiohttp.ClientSession") as MockClientSession: + session_mock = MagicMock() + MockClientSession.return_value.__aenter__.return_value = session_mock + + # Simulamos errores en la respuesta de los endpoints + with patch("inesdata_mov_datasets.sources.extract.emt.get_line_detail", return_value={"code": "01"}): # Error en line_detail + with patch("inesdata_mov_datasets.sources.extract.emt.get_eta", return_value={"code": "01"}): # Error en ETA + with patch("inesdata_mov_datasets.sources.extract.emt.upload_objs") as mock_upload_objs: + await get_emt(mock_settings_get_emt_minio) + + # Verificamos que no se subieron archivos debido a errores + mock_upload_objs.assert_not_called() + + +@patch('inesdata_mov_datasets.sources.extract.emt.check_s3_file_exists') +@patch('inesdata_mov_datasets.sources.extract.emt.get_line_detail') +@pytest.mark.asyncio +async def test_get_emt_file_exists_minio(mock_check_s3_file_exists, mock_get_line_detail, mock_settings_get_emt): + mock_check_s3_file_exists.return_value = True # Simula que el archivo ya existe + + await get_emt(mock_settings_get_emt) + + # Verifica que no se haya llamado a la función get_line_detail + mock_get_line_detail.assert_not_called() + + diff --git a/test/test_extract_informo.py b/test/test_extract_informo.py new file mode 100644 index 0000000..1413bcb --- /dev/null +++ b/test/test_extract_informo.py @@ -0,0 +1,212 @@ +import pytest +import asyncio +from unittest.mock import MagicMock,patch, mock_open +from inesdata_mov_datasets.sources.extract.informo import get_informo, save_informo +import xmltodict +from pathlib import Path +from loguru import logger +import json +import pytz +import datetime + + +###################### get_informo +@pytest.fixture +def mock_settings(): + """Fixture para simular la configuración de settings.""" + settings = MagicMock() + settings.sources = MagicMock() + settings.sources.informo.credentials = MagicMock() + settings.sources.informo.credentials.api_key = "test-api-key" + return settings + +# Parcheamos las dependencias de la función get_informo +@patch('inesdata_mov_datasets.sources.extract.informo.instantiate_logger') +@patch('inesdata_mov_datasets.sources.extract.informo.logger.info') +@patch('inesdata_mov_datasets.sources.extract.informo.logger.debug') +@patch('inesdata_mov_datasets.sources.extract.informo.logger.error') +@patch('inesdata_mov_datasets.sources.extract.informo.requests.get') +@patch('inesdata_mov_datasets.sources.extract.informo.save_informo') +@pytest.mark.asyncio +async def test_get_informo(mock_save_informo, mock_requests_get, mock_error, mock_debug, mock_info, mock_instantiate_logger, mock_settings): + """Test para verificar la extracción de datos de INFORMO.""" + + # Configurar la respuesta simulada de requests.get + mock_response_content = "Datos de ejemplo" + mock_requests_get.return_value = MagicMock(status_code=200, content=mock_response_content) + + # Ejecutar la función + await get_informo(mock_settings) + + parsed_data = xmltodict.parse(mock_response_content) + + # Verificar que se llama a instantiate_logger + mock_instantiate_logger.assert_called_once_with(mock_settings, "INFORMO", "extract") + + # Verificar que se llama a logger.info para iniciar la extracción + mock_info.assert_any_call("Extracting INFORMO") + mock_info.assert_any_call("Extracted INFORMO") + + # Verificar que se llama a requests.get una vez + mock_requests_get.assert_called_once_with("https://informo.madrid.es/informo/tmadrid/pm.xml") + # Verificar que se llama a save_informo con los datos obtenidos + mock_save_informo.assert_called_once_with(mock_settings, parsed_data) + + # Verificar que se llama a logger.debug + mock_debug.assert_called() + +# Fixture para simular la configuración de settings +@pytest.fixture +def mock_settings(): + """Fixture para simular la configuración de settings.""" + settings = MagicMock() + settings.sources = MagicMock() + settings.sources.informo.credentials = MagicMock() + settings.sources.informo.credentials.api_key = "test-api-key" + return settings + +# Test para manejar un error HTTP en la solicitud +@patch('inesdata_mov_datasets.sources.extract.informo.instantiate_logger') +@patch('inesdata_mov_datasets.sources.extract.informo.logger.error') +@patch('inesdata_mov_datasets.sources.extract.informo.requests.get') +@pytest.mark.asyncio +async def test_get_informo_http_error(mock_requests_get, mock_error_logger, mock_instantiate_logger, mock_settings): + """Test para verificar que se captura un error HTTP.""" + + # Simular una excepción en requests.get + mock_requests_get.side_effect = Exception("Error en la conexión") + + # Ejecutar la función + await get_informo(mock_settings) + + # Verificar que se llama a instantiate_logger + mock_instantiate_logger.assert_called_once_with(mock_settings, "INFORMO", "extract") + + # Verificar que se capturó un error en los logs + assert mock_error_logger.call_count == 2 + +# Test para manejar un error de parsing XML +@patch('inesdata_mov_datasets.sources.extract.informo.instantiate_logger') +@patch('inesdata_mov_datasets.sources.extract.informo.logger.error') +@patch('inesdata_mov_datasets.sources.extract.informo.requests.get') +@patch('inesdata_mov_datasets.sources.extract.informo.save_informo') +@pytest.mark.asyncio +async def test_get_informo_xml_parsing(mock_save_informo, mock_requests_get, mock_error_logger, mock_instantiate_logger, mock_settings): + """Test para verificar que se captura un error de parsing XML.""" + + # Simular una respuesta XML no válida + mock_response = MagicMock(status_code=200, content="Invalid XML") + mock_requests_get.return_value = mock_response + + # Ejecutar la función + await get_informo(mock_settings) + + # Verificar que se llama a instantiate_logger + mock_instantiate_logger.assert_called_once_with(mock_settings, "INFORMO", "extract") + + # Verificar que se capturó un error en los logs de parsing + assert mock_error_logger.call_count == 2 + + # Asegurarse de que save_informo no se llamó debido al error + mock_save_informo.assert_not_called() + +###################### save_informo +# Fixture para simular la configuración de settings +@pytest.fixture +def mock_settings_minio(): + """Fixture para simular la configuración de settings con almacenamiento Minio.""" + settings = MagicMock() + settings.storage.default = "minio" + settings.storage.config.minio.endpoint = "http://minio.local" + settings.storage.config.minio.access_key = "minio_access_key" + settings.storage.config.minio.secret_key = "minio_secret_key" + settings.storage.config.minio.bucket = "test-bucket" + return settings + +@pytest.fixture +def mock_settings_local(): + """Fixture para simular la configuración de settings con almacenamiento local.""" + settings = MagicMock() + settings.storage.default = "local" + settings.storage.config.local.path = "/tmp" + return settings + +# Simular los datos que se pasarán a la función +@pytest.fixture +def mock_data(): + return { + "pms": { + "fecha_hora": "09/10/2024 14:30:00" + } + } + +# Test para la funcionalidad de Minio +@patch('inesdata_mov_datasets.sources.extract.informo.check_s3_file_exists') # Parchea la función que verifica si el archivo existe en Minio +@patch('inesdata_mov_datasets.sources.extract.informo.upload_objs') # Parchea la función que sube los objetos a Minio +@pytest.mark.asyncio +async def test_save_informo_minio(mock_upload_objs, mock_check_s3_file_exists, mock_settings_minio, mock_data): + """Test para verificar el almacenamiento en Minio.""" + + # Simular que el archivo no existe en Minio + mock_check_s3_file_exists.return_value = False + + # Llamar a la función + await save_informo(mock_settings_minio, mock_data) + + date_from_file = mock_data["pms"]["fecha_hora"] + dt = datetime.datetime.strptime(date_from_file, "%d/%m/%Y %H:%M:%S") + formated_date = dt.strftime("%Y-%m-%dT%H%M") + # Get the timezone from Madrid and formated the dates for the object_name of the files + europe_timezone = pytz.timezone("Europe/Madrid") + current_datetime = datetime.datetime.now(europe_timezone).replace(second=0) + print(current_datetime) + formatted_date_slash = current_datetime.strftime( + "%Y/%m/%d" + ) + + # Verificar que se llamó a check_s3_file_exists con los argumentos correctos + mock_check_s3_file_exists.assert_called_once_with( + endpoint_url=mock_settings_minio.storage.config.minio.endpoint, + aws_secret_access_key=mock_settings_minio.storage.config.minio.secret_key, + aws_access_key_id=mock_settings_minio.storage.config.minio.access_key, + bucket_name=mock_settings_minio.storage.config.minio.bucket, + object_name=f"raw/informo/{formatted_date_slash}/informo_{formated_date}.json" + ) + + # Verificar que se llamó a upload_objs para subir el archivo + mock_upload_objs.assert_called_once() + +# Test para la funcionalidad de almacenamiento local +@patch('inesdata_mov_datasets.sources.extract.informo.check_local_file_exists') # Parchea la función que verifica si el archivo existe localmente +@patch('builtins.open', new_callable=mock_open) # Parchea 'open' para evitar escribir en el sistema de archivos real +@pytest.mark.asyncio +async def test_save_informo_local(mock_open_func, mock_check_local_file_exists, mock_settings_local, mock_data): + """Test para verificar el almacenamiento local.""" + + # Simular que el archivo no existe en local + mock_check_local_file_exists.return_value = False + + # Llamar a la función + await save_informo(mock_settings_local, mock_data) + date_from_file = mock_data["pms"]["fecha_hora"] + dt = datetime.datetime.strptime(date_from_file, "%d/%m/%Y %H:%M:%S") + formated_date = dt.strftime("%Y-%m-%dT%H%M") + # Get the timezone from Madrid and formated the dates for the object_name of the files + europe_timezone = pytz.timezone("Europe/Madrid") + current_datetime = datetime.datetime.now(europe_timezone).replace(second=0) + print(current_datetime) + formatted_date_slash = current_datetime.strftime( + "%Y/%m/%d" + ) + + # Verificar que se llamó a check_local_file_exists con los argumentos correctos + mock_check_local_file_exists.assert_called_once_with( + Path(f"/tmp/raw/informo/{formatted_date_slash}"), + f"informo_{formated_date}.json" + ) + + # Verificar que se abrió el archivo correctamente para escribir los datos + mock_open_func.assert_called_once_with(Path(f"/tmp/raw/informo/{formatted_date_slash}") / f"informo_{formated_date}.json", "w") + + # Verificar que se escribió el contenido JSON en el archivo + mock_open_func().write.assert_called_once_with(json.dumps(mock_data)) \ No newline at end of file diff --git a/test/test_utils.py b/test/test_utils.py new file mode 100644 index 0000000..cff937d --- /dev/null +++ b/test/test_utils.py @@ -0,0 +1,448 @@ +import pytest +import asyncio +import aiofiles +import os +import yaml +from pathlib import Path +from unittest.mock import MagicMock, patch, AsyncMock, Mock, mock_open + +from inesdata_mov_datasets.utils import list_objs, async_download, get_obj, download_obj, download_objs, read_obj, upload_obj, upload_metadata, upload_objs, read_settings, check_local_file_exists, check_s3_file_exists + +###################### list_objs +@patch('inesdata_mov_datasets.utils.botocore.session.get_session') # Cambia 'inesdata_mov_datasets.utils' por el nombre real del módulo +def test_list_objs(mock_get_session): + """Test para verificar la lista de objetos en un bucket S3.""" + + # Configura el cliente S3 simulado + mock_client = MagicMock() + mock_get_session.return_value.create_client.return_value = mock_client + + # Configura el paginador simulado + mock_paginator = MagicMock() + mock_client.get_paginator.return_value = mock_paginator + + # Simula la respuesta del paginador + mock_paginator.paginate.return_value = [ + { + "Contents": [ + {"Key": "prefix/file1.txt"}, + {"Key": "prefix/file2.txt"}, + ] + }, + { + "Contents": [ + {"Key": "prefix/file3.txt"}, + ] + }, + ] + + # Llama a la función + bucket = "my-bucket" + prefix = "prefix/" + endpoint_url = "http://minio:9000" + aws_access_key_id = "my-access-key" + aws_secret_access_key = "my-secret-key" + + result = list_objs(bucket, prefix, endpoint_url, aws_secret_access_key, aws_access_key_id) + + # Verifica que se llame a create_client con los parámetros correctos + mock_get_session.return_value.create_client.assert_called_once_with( + "s3", + endpoint_url=endpoint_url, + aws_secret_access_key=aws_secret_access_key, + aws_access_key_id=aws_access_key_id, + ) + + # Verifica que se llame a get_paginator + mock_client.get_paginator.assert_called_once_with("list_objects_v2") + + # Verifica que el resultado sea el esperado + expected_keys = [ + "prefix/file1.txt", + "prefix/file2.txt", + "prefix/file3.txt", + ] + assert result == expected_keys + + # Verifica que se llame a paginate una vez con los parámetros correctos + mock_paginator.paginate.assert_called_once_with(Bucket=bucket, Prefix=prefix) + +###################### async_download +@patch('inesdata_mov_datasets.utils.download_objs') # Cambia 'inesdata_mov_datasets.utils' por el nombre real del módulo +def test_async_download(mock_download_objs): + """Test para verificar la descarga de objetos desde MinIO.""" + + # Configura el mock para que no haga nada + mock_download_objs.return_value = MagicMock() + + # Define los parámetros de entrada + bucket = "my-bucket" + prefix = "raw_data/" + output_path = "/local/path/" + endpoint_url = "http://minio:9000" + aws_access_key_id = "my-access-key" + aws_secret_access_key = "my-secret-key" + + # Ejecuta la función + async_download(bucket, prefix, output_path, endpoint_url, aws_access_key_id, aws_secret_access_key) + + # Verifica que download_objs se llame con los parámetros correctos + mock_download_objs.assert_called_once_with( + bucket, + prefix, + output_path, + endpoint_url, + aws_access_key_id, + aws_secret_access_key, + ) + + # Verifica que se haya creado un nuevo loop de eventos + assert asyncio.get_event_loop() is not None + +###################### get_obj +@pytest.mark.asyncio +async def test_get_obj(): + """Test para verificar la obtención de un objeto desde S3.""" + + # Crear un cliente simulado + mock_client = AsyncMock() + + # Configura la respuesta simulada de get_object + mock_resp = MagicMock() + mock_resp["Body"].read = AsyncMock(return_value=b'{"key": "value"}') # Simula el contenido del objeto + mock_client.get_object = AsyncMock(return_value=mock_resp) + + # Define los parámetros de entrada + bucket = "my-bucket" + key = "my-object-key" + + # Ejecuta la función + content = await get_obj(mock_client, bucket, key) + + # Verifica que get_object se llama con los parámetros correctos + mock_client.get_object.assert_called_once_with(Bucket=bucket, Key=key) + + # Verifica que el contenido devuelto es correcto + assert content == b'{"key": "value"}' + + +###################### download_obj +@pytest.fixture +def mock_storage_path(tmp_path): + # Usa la ruta temporal proporcionada por pytest + return str(tmp_path) + +@pytest.mark.asyncio +async def test_download_obj(mock_storage_path): + """Test para verificar la descarga de un objeto desde S3.""" + + # Crear un cliente simulado + mock_client = AsyncMock() + + # Simular la respuesta de get_obj + mock_resp = b'{"key": "value"}' # Simula el contenido del objeto + mock_client.get_object = AsyncMock(return_value={"Body": AsyncMock(read=AsyncMock(return_value=mock_resp))}) + + # Define los parámetros de entrada + bucket = "my-bucket" + key = "my-object-key" + semaphore = AsyncMock() # Simular un semáforo + + # Ejecuta la función + await download_obj(mock_client, bucket, key, mock_storage_path, semaphore) + + # Verifica que se llama a get_object con los parámetros correctos + mock_client.get_object.assert_called_once_with(Bucket=bucket, Key=key) + + # Verifica que el archivo se ha escrito en el directorio correcto + async with aiofiles.open(os.path.join(mock_storage_path, key), "r") as out_file: + content = await out_file.read() + + # Verifica que el contenido del archivo es el esperado + assert content == '{"key": "value"}' + + # Verifica que se crea el directorio + os.makedirs(os.path.dirname(mock_storage_path), exist_ok=True) + assert os.path.isdir(os.path.dirname(mock_storage_path)) + + + +###################### download_objs +@pytest.mark.asyncio +@patch('inesdata_mov_datasets.utils.get_session') +@patch('inesdata_mov_datasets.utils.download_obj') +@patch('inesdata_mov_datasets.utils.list_objs') +@patch('inesdata_mov_datasets.utils.logger') +async def test_download_objs_without_eta(mock_logger, mock_list_objs, mock_download_obj, mock_get_session): + """Test para verificar la descarga de objetos desde S3 sin '/eta' en el prefix.""" + + # Simular el cliente S3 + mock_client = AsyncMock() + mock_get_session.return_value.create_client.return_value.__aenter__.return_value = mock_client + + # Simular los objetos listados + mock_list_objs.return_value = ['key1', 'key2', 'key3'] + + bucket = "my-bucket" + prefix = "some/path/" + output_path = "tmp/" + endpoint_url = "http://minio.example.com" + aws_access_key_id = "minio_user" + aws_secret_access_key = "minio_password" + + # Ejecutar la función + await download_objs(bucket, prefix, output_path, endpoint_url, aws_access_key_id, aws_secret_access_key) + + # Verifica que se llama a list_objs con los parámetros correctos + mock_list_objs.assert_called_once_with(bucket, prefix, endpoint_url, aws_secret_access_key, aws_access_key_id) + + # Verifica que se llama a download_obj con las claves obtenidas + assert mock_download_obj.call_count == 3 # Tres claves: key1, key2, key3 + + # Verifica que se haya llamado al logger + mock_logger.debug.assert_any_call("Downloading files from s3") + # mock_logger.debug.assert_any_call("Downloading 3 files from emt endpoint") + +###################### read_obj +@pytest.mark.asyncio +@patch('inesdata_mov_datasets.utils.get_session') +async def test_read_obj(mock_get_session): + """Test para verificar la lectura de un objeto desde S3.""" + + # Simular el cliente S3 + mock_client = AsyncMock() + mock_get_session.return_value.create_client.return_value.__aenter__.return_value = mock_client + + # Simular la respuesta de get_object + mock_response = AsyncMock() + mock_response['Body'] = AsyncMock() + + # Simular el método read para devolver el contenido correcto + mock_response['Body'].read = AsyncMock(return_value=b'{"key": "value"}') + + mock_client.get_object.return_value = mock_response + + bucket = "my-bucket" + endpoint_url = "http://minio.example.com" + aws_access_key_id = "minio_user" + aws_secret_access_key = "minio_password" + object_name = "some/object/key" + + # Ejecutar la función + result = await read_obj(bucket, endpoint_url, aws_access_key_id, aws_secret_access_key, object_name) + + # Verifica que se llama a get_object con el bucket y el object_name correctos + mock_client.get_object.assert_called_once_with(Bucket=bucket, Key=object_name) + + # Verifica que se devuelve el contenido correcto + assert result == '{"key": "value"}' + +###################### upload_obj +@pytest.mark.asyncio +@patch('inesdata_mov_datasets.utils.ClientCreatorContext') # Cambia 'inesdata_mov_datasets.utils' por el nombre correcto +async def test_upload_obj(mock_client_creator_context): + """Test para verificar la subida de un objeto a S3.""" + + # Simular el cliente S3 + mock_client = AsyncMock() + mock_client_creator_context.return_value = mock_client + + bucket = "my-bucket" + key = "some/object/key" + object_value = "Hello, S3!" + + # Ejecutar la función + await upload_obj(mock_client, bucket, key, object_value) + + # Verifica que se llama a put_object con los parámetros correctos + mock_client.put_object.assert_called_once_with(Bucket=bucket, Key=key, Body=object_value.encode("utf-8")) + +###################### upload_metadata +@patch('botocore.session.get_session') # Mock para la sesión de botocore +def test_upload_metadata(mock_get_session): + """Test para verificar la subida de metadatos a S3.""" + + # Simular el cliente S3 + mock_client = Mock() + mock_get_session.return_value.create_client.return_value = mock_client + + bucket = "my-bucket" + endpoint_url = "http://localhost:9000" + aws_access_key_id = "test-access-key" + aws_secret_access_key = "test-secret-key" + keys = ["some/object/key1", "some/object/key2"] + + # Caso 1: El archivo de metadatos ya existe + mock_client.get_object.return_value = { + 'Body': Mock(read=Mock(return_value=b'old_file1\nold_file2\n')) + } + + upload_metadata(bucket, endpoint_url, aws_access_key_id, aws_secret_access_key, keys) + + # Verifica que se llama a get_object para obtener el contenido previo + mock_client.get_object.assert_called_once_with(Bucket=bucket, Key='some/object/metadata.txt') + + # Verifica que se llama a put_object con el nuevo contenido + expected_content = 'old_file1\nold_file2\n\nsome/object/key1\nsome/object/key2' + mock_client.put_object.assert_called_once_with(Bucket=bucket, Key='some/object/metadata.txt', Body=expected_content.encode('utf-8')) + + # Caso 2: El archivo de metadatos no existe (primer ejecución del día) + mock_client.reset_mock() # Reinicia los mocks + mock_client.get_object.side_effect = Exception("File not found") # Simula que no se encuentra el archivo + + upload_metadata(bucket, endpoint_url, aws_access_key_id, aws_secret_access_key, keys) + + # Verifica que se llama a put_object con el nuevo contenido + new_expected_content = 'some/object/key1\nsome/object/key2' + mock_client.put_object.assert_called_once_with(Bucket=bucket, Key='some/object/metadata.txt', Body=new_expected_content.encode('utf-8')) + +###################### upload_objs +@patch('botocore.session.get_session') # Mock para la sesión de botocore +@patch('inesdata_mov_datasets.utils.upload_obj') # Mock para la función upload_obj +@pytest.mark.asyncio +async def test_upload_objs(mock_upload_obj, mock_get_session): + """Test para verificar la subida de objetos a S3.""" + + # Simular el cliente S3 + mock_client = AsyncMock() + mock_get_session.return_value.create_client.return_value.__aenter__.return_value = mock_client + + bucket = "my-bucket" + endpoint_url = "http://localhost:9000" + aws_access_key_id = "test-access-key" + aws_secret_access_key = "test-secret-key" + objects_dict = { + "object1.txt": "Contenido del objeto 1", + "object2.txt": "Contenido del objeto 2", + } + + await upload_objs(bucket, endpoint_url, aws_access_key_id, aws_secret_access_key, objects_dict) + + # Verifica que se llama a upload_obj para cada objeto en objects_dict + assert mock_upload_obj.call_count == len(objects_dict) + + # Verifica que upload_obj fue llamado con los argumentos correctos + assert mock_upload_obj.call_count == 2 + + +###################### read_settings +# Ejemplo de datos YAML simulados +yaml_content = """ +sources: + emt: + credentials: + email: myemail + password: mypassword + x_client_id: my_x_client_id + passkey: my_passkey + stops: [1,2] + lines: [1,2] + aemet: + credentials: + api_key: my_api_key +storage: + default: local + config: + minio: + access_key: my_access_key + secret_key: my_secret_key + endpoint: minio-endpoint + secure: True + bucket: my_bucket + local: + path: /path/to/save/datasets + logs: + path: /path/to/save/logs + level: LOG_LEVEL +""" + +def test_read_settings(): + # Simulamos la función open y la lectura del archivo YAML + with patch("builtins.open", mock_open(read_data=yaml_content)): + # Simulamos la carga de los datos con yaml.safe_load + with patch("yaml.safe_load", return_value=yaml.safe_load(yaml_content)): + # Llamamos a la función para probarla + settings = read_settings("path/to/config.yaml") + print(settings.sources.emt.credentials) + + # Verificamos que los valores cargados en settings sean correctos + if settings.storage.default == "local" and settings.sources.aemet.credentials.api_key == "my_api_key": + # Verificamos que email y password de EMT sean None + assert settings.sources.emt.credentials.email is None + assert settings.sources.emt.credentials.password is None + assert settings.sources.aemet.credentials.api_key == "my_api_key" + assert settings.storage.default == "local" + assert settings.storage.config.local.path == "/path/to/save/datasets" + assert settings.storage.config.minio.access_key == "my_access_key" + assert settings.storage.logs.level == "LOG_LEVEL" + + +###################### check_local_file_exists +@pytest.fixture +def mock_path(): + """Fixture para simular el objeto Path.""" + return MagicMock(spec=Path) + +def test_check_local_file_exists_exists(tmp_path): + """Test para verificar que la función retorna True si el archivo existe.""" + # Crea un archivo temporal + test_file = tmp_path / "archivo.txt" + test_file.write_text("Este es un archivo de prueba.") + + # Llama a la función con el path del directorio temporal y el nombre del archivo + result = check_local_file_exists(tmp_path, "archivo.txt") + + # Verifica que el resultado sea True + assert result is True + + +def test_check_local_file_exists_does_not_exist(tmp_path): + """Test para verificar que la función retorna False si el archivo no existe.""" + # Llama a la función con el path del directorio temporal y un nombre de archivo que no existe + result = check_local_file_exists(tmp_path, "archivo_no_existe.txt") + + # Verifica que el resultado sea False + assert result is False + +###################### check_s3_file_exists +# @pytest.mark.asyncio +async def test_check_s3_file_exists_exists(): + """Test para verificar que la función retorna True si el archivo existe en S3.""" + # Configuración de parámetros + endpoint_url = "http://localhost:9000" + aws_secret_access_key = "secret" + aws_access_key_id = "access_key" + bucket_name = "mi_bucket" + object_name = "archivo.txt" + + # Mock del cliente S3 y del método head_object + mock_client = AsyncMock() + mock_client.head_object.return_value = None # Simula que el archivo existe + + with patch("inesdata_mov_datasets.utils.get_session") as mock_get_session: + mock_get_session.return_value.create_client.return_value.__aenter__.return_value = mock_client + + result = await check_s3_file_exists(endpoint_url, aws_secret_access_key, aws_access_key_id, bucket_name, object_name) + + assert result is True + +@pytest.mark.asyncio +async def test_check_s3_file_exists_does_not_exist(): + """Test para verificar que la función retorna False si el archivo no existe en S3.""" + # Configuración de parámetros + endpoint_url = "http://localhost:9000" + aws_secret_access_key = "secret" + aws_access_key_id = "access_key" + bucket_name = "mi_bucket" + object_name = "archivo_inexistente.txt" + + # Mock del cliente S3 y del método head_object lanzando una excepción + mock_client = AsyncMock() + mock_client.head_object.side_effect = Exception("404 Not Found") # Simula que el archivo no existe + + with patch("inesdata_mov_datasets.utils.get_session") as mock_get_session: + mock_get_session.return_value.create_client.return_value.__aenter__.return_value = mock_client + + result = await check_s3_file_exists(endpoint_url, aws_secret_access_key, aws_access_key_id, bucket_name, object_name) + + assert result is False \ No newline at end of file