Skip to content

Commit

Permalink
adding tests for corrupted files
Browse files Browse the repository at this point in the history
  • Loading branch information
sallymatson committed Jan 9, 2025
1 parent 9aa0cf6 commit 1126417
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 6 deletions.
2 changes: 2 additions & 0 deletions tests/core/data/garbage.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
a,b,c
1,2,"3
Binary file added tests/core/data/garbage.xlsx
Binary file not shown.
71 changes: 70 additions & 1 deletion tests/core/test_readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from logging import CRITICAL, DEBUG, INFO

import pytest
from pandas.errors import ParserError
from xarray import DataArray

from tests.conftest import log_check
Expand Down Expand Up @@ -93,7 +94,11 @@ def test_func():
],
)
def test_load_netcdf(shared_datadir, caplog, file, file_var, exp_err, expected_log):
"""Test the netdcf variable loader."""
"""Test the netdcf variable loader.
The FileNotFoundError test is dependent on running the test_file_format_loader tests
first, and will fail if run independently. See TODO above.
"""

from virtual_ecosystem.core.readers import load_netcdf

Expand All @@ -105,6 +110,70 @@ def test_load_netcdf(shared_datadir, caplog, file, file_var, exp_err, expected_l
log_check(caplog, expected_log)


@pytest.mark.parametrize(
argnames=["file", "file_var", "exp_err", "expected_log"],
argvalues=[
(
"not_there.csv",
"irrelevant",
pytest.raises(FileNotFoundError),
((CRITICAL, "Data file not found"),),
),
(
"garbage.csv",
"irrelevant",
pytest.raises(ParserError),
((CRITICAL, "Could not load data from"),),
),
(
"reader_test.csv",
"missing",
pytest.raises(KeyError),
((CRITICAL, "Variable missing not found in"),),
),
(
"reader_test.csv",
"var1",
does_not_raise(),
(),
),
(
"garbage.xlsx",
"irrelevant",
pytest.raises(Exception),
((CRITICAL, "Unidentified exception opening"),),
),
(
"reader_test.xlsx",
"missing",
pytest.raises(KeyError),
((CRITICAL, "Variable missing not found in"),),
),
(
"reader_test.xlsx",
"var1",
does_not_raise(),
(),
),
],
)
def test_load_dataframe(shared_datadir, caplog, file, file_var, exp_err, expected_log):
"""Test the netdcf variable loader.
The FileNotFoundError test is dependent on running the test_file_format_loader tests
first, and will fail if run independently. See TODO above.
"""

from virtual_ecosystem.core.readers import load_from_dataframe

with exp_err:
darray = load_from_dataframe(shared_datadir / file, file_var)
assert isinstance(darray, DataArray)

# Check the error reports
log_check(caplog, expected_log)


@pytest.mark.parametrize(
argnames=[
"filename",
Expand Down
19 changes: 14 additions & 5 deletions virtual_ecosystem/core/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def new_function_to_load_tif_data(...):
from pathlib import Path

from pandas import read_csv, read_excel
from pandas.errors import ParserError
from xarray import DataArray, load_dataset

from virtual_ecosystem.core.logger import LOGGER
Expand Down Expand Up @@ -150,7 +151,11 @@ def load_from_dataframe(file: Path, var_name: str) -> DataArray:
Raises:
FileNotFoundError: with bad file path names.
ValueError: if the file data is not readable.
ParserError: if the csv data is not readable.
Exception: if the excel data is not readable.
Note: the general exception is used because of the variety of exceptions that are
possible with read_excel.
"""

to_raise: Exception
Expand All @@ -161,13 +166,17 @@ def load_from_dataframe(file: Path, var_name: str) -> DataArray:
if file_type == ".csv":
dataset = read_csv(file)
else:
dataset = read_excel(file)
dataset = read_excel(file, engine="openpyxl")
except FileNotFoundError:
to_raise = FileNotFoundError(f"Data file not found: {file}")
LOGGER.critical(to_raise)
raise to_raise
except ValueError as err:
to_raise = ValueError(f"Could not load data from {file}: {err}")
except ParserError as err:
to_raise = ParserError(f"Could not load data from {file}: {err}.")
LOGGER.critical(to_raise)
raise to_raise
except Exception as err:
to_raise = Exception(f"Unidentified exception opening {file}: {err}")
LOGGER.critical(to_raise)
raise to_raise

Expand All @@ -177,7 +186,7 @@ def load_from_dataframe(file: Path, var_name: str) -> DataArray:
LOGGER.critical(to_raise)
raise to_raise

return dataset[var_name]
return dataset[var_name].to_xarray()


def load_to_dataarray(
Expand Down

0 comments on commit 1126417

Please sign in to comment.