Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a data method to validate data frame variables #658

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions tests/core/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1107,3 +1107,54 @@ def test_merge_continuous_file_already_exists(
),
),
)


@pytest.mark.parametrize(
argnames="vars, var_names, exp_result, exp_msg",
argvalues=[
pytest.param(
{"a": DataArray(np.ones(12)), "b": DataArray(np.ones(12))},
["a", "b"],
True,
"Variables form a data frame",
id="correct",
),
pytest.param(
{"a": DataArray(np.ones(12)), "b": DataArray(np.ones(12))},
["a", "c"],
False,
"Missing variables: c",
id="missing variable",
),
pytest.param(
{"a": DataArray(np.ones((12, 2))), "b": DataArray(np.ones(12))},
["a", "b"],
False,
"Variables not one dimensional: a",
id="not all one dimensional",
),
pytest.param(
{
"a": DataArray(np.ones(14), dims="dim_1"),
"b": DataArray(np.ones(12), dims="dim_2"),
},
["a", "b"],
False,
"Variables of unequal length: 12,14",
id="not equal length",
),
],
)
def test_Data_confirm_variables_form_data_frame(vars, var_names, exp_result, exp_msg):
"""Test the data frame validation mechanism."""

from virtual_ecosystem.core.data import Data
from virtual_ecosystem.core.grid import Grid

data = Data(grid=Grid())
data.add_from_dict(vars)

result, msg = data.confirm_variables_form_data_frame(var_names)

assert result == exp_result
assert msg == exp_msg
31 changes: 31 additions & 0 deletions virtual_ecosystem/core/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,37 @@ def output_current_state(

return out_path

def confirm_variables_form_data_frame(
self, var_names: list[str]
) -> tuple[bool, str]:
"""Check a list of named variables form a data frame.

This is a utility method to check if a set of named variables are present in the
Data object and that together they form a data frame: a set of equal length, one
dimensional arrays, providing consistent tuples of values across the variables.

Args:
var_names: A list of the variable names that form a data frame.
"""

# All vars present
missing_var = [v for v in var_names if v not in self]

if missing_var:
return False, f"Missing variables: {', '.join(missing_var)}"

# All vars one dimensional
data_not_one_d = [var for var in var_names if self[var].ndim > 1]
if data_not_one_d:
return False, f"Variables not one dimensional: {','.join(data_not_one_d)}"

# All vars equal sized
shapes = sorted(set(str(self[var].shape[0]) for var in var_names))
if len(shapes) != 1:
return False, f"Variables of unequal length: {','.join(shapes)}"

return True, "Variables form a data frame"


def merge_continuous_data_files(
data_options: dict[str, Any], continuous_data_files: list[Path]
Expand Down
Loading