Skip to content

Commit

Permalink
[feature] add allow_multiple and return_dataseries
Browse files Browse the repository at this point in the history
  • Loading branch information
Antonin Blot committed Jul 17, 2023
1 parent 75246fb commit a02e97b
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 15 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

- Add `get_data_root` function to get `raw` or `processed` root for a project
- `get_children` can filter children by attributes before returning results
- refactor `get_datasets` to be non recursive and add filtering options
- refactor `get_datasets` to be non recursive and add filtering options. Also add
multiple options to filter datasets and format output
- add `get_datasets_recursively` to get all datasets below a given entity

### Bugfixes
Expand Down
41 changes: 29 additions & 12 deletions flexiznam/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1134,20 +1134,28 @@ def get_datasets(
dataset_type=None,
project_id=None,
flexilims_session=None,
return_paths=True,
filter_datasets=None,
allow_multiple=True,
return_paths=False,
return_dataseries=False,
):
"""
Args:
origin_id (str): hexadecimal ID of the origin session.
recording_type (str): type of the recording to filter by. If `None`,
will return datasets for all recordings.
origin_id (str): hexadecimal ID of the origin session. Not required if
origin_name is provided.
origin_name (str): text name of the origin session. Not required if origin_id
is provided.
dataset_type (str): type of the dataseet to filter by. If `None`,
will return all datasets.
project_id (str): text name of the project. Not required if
`flexilims_session` is provided.
flexilims_session (:py:class:`flexilims.Flexilims`): Flexylims session object
filter_datasets (dict): dictionary of key-value pairs to filter datasets by.
allow_multiple (bool): if True, allow multiple datasets to be returned,
otherwise ensure that only one dataset exists online and return it.
return_paths (bool): if True, return a list of paths. If False, return the
dataset objects or dataseries.
return_dataseries (bool): if True, return the dataseries instead of the
dataset objects.
_output (list): internal argument used for recursion.
Expand All @@ -1165,6 +1173,7 @@ def get_datasets(
filter_datasets = {}
if dataset_type is not None:
filter_datasets.update({"dataset_type": dataset_type})

datasets = get_children(
parent_id=origin_id,
parent_name=origin_name,
Expand All @@ -1173,14 +1182,22 @@ def get_datasets(
filter=filter_datasets,
)

datasets = [
flexiznam.Dataset.from_dataseries(
dataseries=ds, flexilims_session=flexilims_session
)
for _, ds in datasets.iterrows()
]
if return_paths:
datasets = [ds.path_full for ds in datasets]
if not return_dataseries:
datasets = [
flexiznam.Dataset.from_dataseries(
dataseries=ds, flexilims_session=flexilims_session
)
for _, ds in datasets.iterrows()
]
if return_paths:
datasets = [ds.path_full for ds in datasets]

if not allow_multiple:
assert len(datasets) <= 1, f"Fount {len(datasets)} datasets. Expected 1."
if len(datasets) == 1:
datasets = datasets[0] if not return_dataseries else datasets.iloc[0]
else:
datasets = None
return datasets


Expand Down
42 changes: 40 additions & 2 deletions tests/test_components/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from tests.tests_resources.data_for_testing import MOUSE_ID, SESSION

# Test functions from main.py
from flexiznam.schema import Dataset, HarpData
from flexiznam.schema import Dataset, HarpData, ScanimageData

# this needs to change every time I reset flexlilims

Expand Down Expand Up @@ -136,6 +136,7 @@ def test_get_datasets(flm_sess):
ds = flz.get_datasets(
origin_name=SESSION,
flexilims_session=flm_sess,
return_paths=True,
)
assert len(ds) == 3
assert all([isinstance(d, pathlib.PosixPath) for d in ds])
Expand All @@ -153,11 +154,42 @@ def test_get_datasets(flm_sess):
filter_datasets=dict(acq_uid="overview_zoom1_00001"),
)
assert len(ds) == 1
ds = flz.get_datasets(
origin_name=SESSION,
flexilims_session=flm_sess,
return_paths=False,
filter_datasets=dict(acq_uid="overview_zoom1_00001"),
allow_multiple=False,
)
assert isinstance(ds, ScanimageData)
ds = flz.get_datasets(
origin_name=SESSION,
flexilims_session=flm_sess,
return_paths=True,
filter_datasets=dict(acq_uid="overview_zoom1_00001"),
allow_multiple=False,
)
assert isinstance(ds, pathlib.PosixPath)
ds = flz.get_datasets(
origin_name=SESSION,
flexilims_session=flm_sess,
return_dataseries=True,
filter_datasets=dict(acq_uid="overview_zoom1_00001"),
allow_multiple=True,
)
assert isinstance(ds, pd.DataFrame)
ds = flz.get_datasets(
origin_name=SESSION,
flexilims_session=flm_sess,
return_dataseries=True,
filter_datasets=dict(acq_uid="overview_zoom1_00001"),
allow_multiple=False,
)
assert isinstance(ds, pd.Series)

rec = flz.get_children(
parent_name=SESSION, flexilims_session=flm_sess, children_datatype="recording"
).iloc[0]

ds_all = flz.get_datasets(
origin_id=rec.id,
flexilims_session=flm_sess,
Expand Down Expand Up @@ -186,6 +218,12 @@ def test_get_datasets(flm_sess):
return_paths=True,
)
assert ds == ds2
with pytest.raises(AssertionError):
flz.get_datasets(
origin_id=rec.id,
project_id=flm_sess.project_id,
allow_multiple=False,
)


def test_get_datasets_recursively(flm_sess):
Expand Down

0 comments on commit a02e97b

Please sign in to comment.