ImperialCollegeLondon · sallymatson · Jan 8, 2025 · Jan 9, 2025 · Jan 9, 2025 · Jan 9, 2025
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -168,6 +168,7 @@ class MyReferenceStyle(AuthorYearReferenceStyle):
     ("py:obj", "virtual_ecosystem.core.grid.GRID_STRUCTURE_SIG.__repr__"),
     ("py:obj", "virtual_ecosystem.core.grid.GRID_STRUCTURE_SIG.count"),
     ("py:obj", "virtual_ecosystem.core.grid.GRID_STRUCTURE_SIG.index"),
+    ("py:exc", "ParserError"),
 ]
 intersphinx_mapping = {
     "numpy": ("https://numpy.org/doc/stable/", None),

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -44,6 +44,7 @@ tomli = {version = "^2.0.1", python = "<3.11"}
 tomli-w = "^1.0.0" 
 tqdm = "^4.66.2" 
 xarray = ">=2024.6,<2026.0" 
+openpyxl = "^3.1.5"
 
 [tool.poetry.group.types.dependencies]
 types-dataclasses = "^0.6.6"

diff --git a/tests/core/data/garbage.csv b/tests/core/data/garbage.csv
@@ -0,0 +1,2 @@
+a,b,c
+1,2,"3
diff --git a/tests/core/data/garbage.xlsx b/tests/core/data/garbage.xlsx
diff --git a/tests/core/data/reader_test.csv b/tests/core/data/reader_test.csv
@@ -0,0 +1,4 @@
+var1,var2
+1,4
+2,5
+3,6
diff --git a/tests/core/data/reader_test.xlsx b/tests/core/data/reader_test.xlsx
diff --git a/tests/core/test_readers.py b/tests/core/test_readers.py
@@ -4,6 +4,7 @@
 from logging import CRITICAL, DEBUG, INFO
 
 import pytest
+from pandas.errors import ParserError
 from xarray import DataArray
 
 from tests.conftest import log_check
@@ -93,7 +94,11 @@ def test_func():
     ],
 )
 def test_load_netcdf(shared_datadir, caplog, file, file_var, exp_err, expected_log):
-    """Test the netdcf variable loader."""
+    """Test the netdcf variable loader.
+
+    The tests here are dependent on the test_file_format_loader, so cannot be run
+    individually.
+    """
 
     from virtual_ecosystem.core.readers import load_netcdf
 
@@ -105,6 +110,70 @@ def test_load_netcdf(shared_datadir, caplog, file, file_var, exp_err, expected_l
     log_check(caplog, expected_log)
 
 
+@pytest.mark.parametrize(
+    argnames=["file", "file_var", "exp_err", "expected_log"],
+    argvalues=[
+        (
+            "not_there.csv",
+            "irrelevant",
+            pytest.raises(FileNotFoundError),
+            ((CRITICAL, "Data file not found"),),
+        ),
+        (
+            "garbage.csv",
+            "irrelevant",
+            pytest.raises(ParserError),
+            ((CRITICAL, "Could not load data from"),),
+        ),
+        (
+            "reader_test.csv",
+            "missing",
+            pytest.raises(KeyError),
+            ((CRITICAL, "Variable missing not found in"),),
+        ),
+        (
+            "reader_test.csv",
+            "var1",
+            does_not_raise(),
+            (),
+        ),
+        (
+            "garbage.xlsx",
+            "irrelevant",
+            pytest.raises(Exception),
+            ((CRITICAL, "Unidentified exception opening"),),
+        ),
+        (
+            "reader_test.xlsx",
+            "missing",
+            pytest.raises(KeyError),
+            ((CRITICAL, "Variable missing not found in"),),
+        ),
+        (
+            "reader_test.xlsx",
+            "var1",
+            does_not_raise(),
+            (),
+        ),
+    ],
+)
+def test_load_dataframe(shared_datadir, caplog, file, file_var, exp_err, expected_log):
+    """Test the netdcf variable loader.
+
+    The tests here are dependent on the test_file_format_loader, so cannot be run
+    individually.
+    """
+
+    from virtual_ecosystem.core.readers import load_from_dataframe
+
+    with exp_err:
+        darray = load_from_dataframe(shared_datadir / file, file_var)
+        assert isinstance(darray, DataArray)
+
+    # Check the error reports
+    log_check(caplog, expected_log)
+
+
 @pytest.mark.parametrize(
     argnames=[
         "filename",

diff --git a/virtual_ecosystem/core/readers.py b/virtual_ecosystem/core/readers.py
@@ -34,6 +34,8 @@ def new_function_to_load_tif_data(...):
 from collections.abc import Callable
 from pathlib import Path
 
+from pandas import read_csv, read_excel
+from pandas.errors import ParserError
 from xarray import DataArray, load_dataset
 
 from virtual_ecosystem.core.logger import LOGGER
@@ -55,7 +57,7 @@ def new_function_to_load_tif_data(...):
 """
 
 
-def register_file_format_loader(file_types: tuple[str]) -> Callable:
+def register_file_format_loader(file_types: tuple[str, ...]) -> Callable:
     """Adds a data loader function to the data loader registry.
 
     This decorator is used to register a function that loads data from a given file type
@@ -134,6 +136,59 @@ def load_netcdf(file: Path, var_name: str) -> DataArray:
     return dataset[var_name]
 
 
+@register_file_format_loader(
+    file_types=(
+        ".csv",
+        ".xlsx",
+    )
+)
+def load_from_dataframe(file: Path, var_name: str) -> DataArray:
+    """Loads a DataArray from a data frame format.
+
+    Args:
+        file: A Path for a csv or excel file containing the variable to load.
+        var_name: A string providing the name of the variables in this file.
+
+    Raises:
+        FileNotFoundError: with bad file path names.
+        ParserError: if the csv data is not readable.
+        Exception: if the excel data is not readable.
+
+    Note: the general exception is used because of the variety of exceptions that are
+    possible with read_excel.
+    """
+
+    to_raise: Exception
+    file_type = file.suffix
+
+    # Determine dataframe file type & load file
+    try:
+        if file_type == ".csv":
+            dataset = read_csv(file)
+        else:
+            dataset = read_excel(file, engine="openpyxl")
+    except FileNotFoundError:
+        to_raise = FileNotFoundError(f"Data file not found: {file}")
+        LOGGER.critical(to_raise)
+        raise to_raise
+    except ParserError as err:
+        to_raise = ParserError(f"Could not load data from {file}: {err}.")
+        LOGGER.critical(to_raise)
+        raise to_raise
+    except Exception as err:
+        to_raise = Exception(f"Unidentified exception opening {file}: {err}")
+        LOGGER.critical(to_raise)
+        raise to_raise
+
+    # Check if file var is in the dataset
+    if var_name not in dataset.columns:
+        to_raise = KeyError(f"Variable {var_name} not found in {file}")
+        LOGGER.critical(to_raise)
+        raise to_raise
+
+    return dataset[var_name].to_xarray()
+
+
 def load_to_dataarray(
     file: Path,
     var_name: str,