diff --git a/tests/core/data/garbage.csv b/tests/core/data/garbage.csv new file mode 100644 index 000000000..c55856dbd --- /dev/null +++ b/tests/core/data/garbage.csv @@ -0,0 +1,2 @@ +a,b,c +1,2,"3 \ No newline at end of file diff --git a/tests/core/data/garbage.xlsx b/tests/core/data/garbage.xlsx new file mode 100644 index 000000000..bd39652b1 Binary files /dev/null and b/tests/core/data/garbage.xlsx differ diff --git a/tests/core/test_readers.py b/tests/core/test_readers.py index 5c3bbdba7..323774974 100644 --- a/tests/core/test_readers.py +++ b/tests/core/test_readers.py @@ -4,6 +4,7 @@ from logging import CRITICAL, DEBUG, INFO import pytest +from pandas.errors import ParserError from xarray import DataArray from tests.conftest import log_check @@ -93,7 +94,11 @@ def test_func(): ], ) def test_load_netcdf(shared_datadir, caplog, file, file_var, exp_err, expected_log): - """Test the netdcf variable loader.""" + """Test the netdcf variable loader. + + The FileNotFoundError test is dependent on running the test_file_format_loader tests + first, and will fail if run independently. See TODO above. + """ from virtual_ecosystem.core.readers import load_netcdf @@ -105,6 +110,70 @@ def test_load_netcdf(shared_datadir, caplog, file, file_var, exp_err, expected_l log_check(caplog, expected_log) +@pytest.mark.parametrize( + argnames=["file", "file_var", "exp_err", "expected_log"], + argvalues=[ + ( + "not_there.csv", + "irrelevant", + pytest.raises(FileNotFoundError), + ((CRITICAL, "Data file not found"),), + ), + ( + "garbage.csv", + "irrelevant", + pytest.raises(ParserError), + ((CRITICAL, "Could not load data from"),), + ), + ( + "reader_test.csv", + "missing", + pytest.raises(KeyError), + ((CRITICAL, "Variable missing not found in"),), + ), + ( + "reader_test.csv", + "var1", + does_not_raise(), + (), + ), + ( + "garbage.xlsx", + "irrelevant", + pytest.raises(Exception), + ((CRITICAL, "Unidentified exception opening"),), + ), + ( + "reader_test.xlsx", + "missing", + pytest.raises(KeyError), + ((CRITICAL, "Variable missing not found in"),), + ), + ( + "reader_test.xlsx", + "var1", + does_not_raise(), + (), + ), + ], +) +def test_load_dataframe(shared_datadir, caplog, file, file_var, exp_err, expected_log): + """Test the netdcf variable loader. + + The FileNotFoundError test is dependent on running the test_file_format_loader tests + first, and will fail if run independently. See TODO above. + """ + + from virtual_ecosystem.core.readers import load_from_dataframe + + with exp_err: + darray = load_from_dataframe(shared_datadir / file, file_var) + assert isinstance(darray, DataArray) + + # Check the error reports + log_check(caplog, expected_log) + + @pytest.mark.parametrize( argnames=[ "filename", diff --git a/virtual_ecosystem/core/readers.py b/virtual_ecosystem/core/readers.py index 9fbb65322..19302d66f 100644 --- a/virtual_ecosystem/core/readers.py +++ b/virtual_ecosystem/core/readers.py @@ -35,6 +35,7 @@ def new_function_to_load_tif_data(...): from pathlib import Path from pandas import read_csv, read_excel +from pandas.errors import ParserError from xarray import DataArray, load_dataset from virtual_ecosystem.core.logger import LOGGER @@ -150,7 +151,11 @@ def load_from_dataframe(file: Path, var_name: str) -> DataArray: Raises: FileNotFoundError: with bad file path names. - ValueError: if the file data is not readable. + ParserError: if the csv data is not readable. + Exception: if the excel data is not readable. + + Note: the general exception is used because of the variety of exceptions that are + possible with read_excel. """ to_raise: Exception @@ -161,13 +166,17 @@ def load_from_dataframe(file: Path, var_name: str) -> DataArray: if file_type == ".csv": dataset = read_csv(file) else: - dataset = read_excel(file) + dataset = read_excel(file, engine="openpyxl") except FileNotFoundError: to_raise = FileNotFoundError(f"Data file not found: {file}") LOGGER.critical(to_raise) raise to_raise - except ValueError as err: - to_raise = ValueError(f"Could not load data from {file}: {err}") + except ParserError as err: + to_raise = ParserError(f"Could not load data from {file}: {err}.") + LOGGER.critical(to_raise) + raise to_raise + except Exception as err: + to_raise = Exception(f"Unidentified exception opening {file}: {err}") LOGGER.critical(to_raise) raise to_raise @@ -177,7 +186,7 @@ def load_from_dataframe(file: Path, var_name: str) -> DataArray: LOGGER.critical(to_raise) raise to_raise - return dataset[var_name] + return dataset[var_name].to_xarray() def load_to_dataarray(