diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0a52d97..09a632b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,16 @@
# Change log
+## v0.3.7
+
+### Main changes
+
+- Separate `Dataset.from_dataseries` and `Dataset.from_flexilims` to avoid confusion
+
+### Minor
+
+- `get_children` output is filtered to contain only relevant columns when `children_datatype` is not None
+
+### Bugfixes
## v0.3.6
### Main changes
diff --git a/flexiznam/main.py b/flexiznam/main.py
index e0a3813..a663f4b 100755
--- a/flexiznam/main.py
+++ b/flexiznam/main.py
@@ -968,11 +968,14 @@ def get_children(
if parent_id is None:
assert parent_name is not None, "Must provide either parent_id or parent_name"
parent_id = get_id(parent_name, flexilims_session=flexilims_session)
- results = format_results(flexilims_session.get_children(parent_id))
+ results = format_results(
+ flexilims_session.get_children(parent_id), return_list=True
+ )
if not len(results):
- return results
+ return pd.DataFrame(results)
if children_datatype is not None:
- results = results.loc[results.type == children_datatype, :]
+ results = [r for r in results if r["type"] == children_datatype]
+ results = pd.DataFrame(results)
results.set_index("name", drop=False, inplace=True)
return results
@@ -1046,8 +1049,8 @@ def get_datasets(
datapath_dict[recording_id] = datapaths
else:
datapath_dict[recording_id] = [
- flexiznam.Dataset.from_flexilims(
- data_series=ds, flexilims_session=flexilims_session
+ flexiznam.Dataset.from_dataseries(
+ dataseries=ds, flexilims_session=flexilims_session
)
for _, ds in datasets.iterrows()
]
@@ -1085,7 +1088,7 @@ def generate_name(datatype, name, flexilims_session=None, project_id=None):
return name
-def format_results(results):
+def format_results(results, return_list=False):
"""Make request output a nice DataFrame
This will crash if any attribute is also present in the flexilims reply,
@@ -1095,6 +1098,7 @@ def format_results(results):
Args:
results (:obj:`list` of :obj:`dict`): Flexilims reply
+ return_list (bool): if True, return a list of dicts instead of a DataFrame
Returns:
:py:class:`pandas.DataFrame`: Reply formatted as a DataFrame
@@ -1108,5 +1112,6 @@ def format_results(results):
)
result[attr_name] = attr_value
result.pop("attributes")
- df = pd.DataFrame(results)
- return df
+ if return_list:
+ return results
+ return pd.DataFrame(results)
diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py
index e5fc412..11e181e 100644
--- a/flexiznam/schema/datasets.py
+++ b/flexiznam/schema/datasets.py
@@ -56,44 +56,57 @@ def from_flexilims(
project=None,
name=None,
id=None,
- data_series=None,
flexilims_session=None,
):
- """Loads a dataset from flexilims.
+ """Loads a dataset from flexilims
If the dataset_type attribute of the flexilims entry defined in
Dataset.SUBCLASSES,this subclass will be used. Otherwise a generic Dataset is
returned
Args:
- project: Name of the project or hexadecimal project_id
- name: Unique name of the dataset on flexilims
- id: Hexadecimal id of the dataset on flexilims
- data_series: default to None. pd.Series as returned by flz.get_entities.
- If provided, supersedes project, name and id.
- flexilims_session: authentication session to access flexilims
+ project (str, optional): Name of the project or hexadecimal project_id. If
+ not provided, can be read from flexilims_session
+ name (str, optional): Unique name of the dataset on flexilims. Ignored if
+ `dataseries` is provided
+ id (str, optional): Hexadecimal id of the dataset on flexilims. Ignored if
+ `dataseries` is provided
+ flexilims_session (flexilims.Session, optional): authentication session to
+ access flexilims.
"""
- if data_series is not None:
- if (project is not None) or (name is not None):
- raise AttributeError("Specify either data_series OR project + name/id")
- else:
- data_series = flz.get_entity(
- project_id=project,
- datatype="dataset",
- name=name,
- id=id,
- flexilims_session=flexilims_session,
+ dataseries = flz.get_entity(
+ project_id=project,
+ datatype="dataset",
+ name=name,
+ id=id,
+ flexilims_session=flexilims_session,
+ )
+
+ if dataseries is None:
+ if project is None:
+ project = flexilims_session.project_id
+ raise FlexilimsError(
+ "No dataset named {} in project {}".format(name, project)
)
+ ds = Dataset.from_dataseries(dataseries, flexilims_session=flexilims_session)
+ return ds
- if data_series is None:
- if project is None:
- project = flexilims_session.project_id
- raise FlexilimsError(
- "No dataset named {} in project {}".format(name, project)
- )
- dataset_type = data_series.dataset_type
+ @staticmethod
+ def from_dataseries(
+ dataseries,
+ flexilims_session=None,
+ ):
+ """Create dataset from a flexilims dataseries
+
+ This function does not call flexilims, but uses the dataseries object directly.
+ Args:
+ dataseries (flexilims.DataSeries): flexilims dataseries object
+ flexilims_session (flexilims.Session, optional): authentication session to
+ access flexilims. Will be added to dataset object.
+ """
+ dataset_type = dataseries.dataset_type
- kwargs = Dataset._format_series_to_kwargs(data_series)
+ kwargs = Dataset._format_series_to_kwargs(dataseries)
name = kwargs.pop("name")
kwargs["flexilims_session"] = flexilims_session
if dataset_type in Dataset.SUBCLASSES:
@@ -204,7 +217,7 @@ def from_origin(
)
elif conflicts == "skip" or conflicts == "overwrite":
if len(processed) == 1:
- return Dataset.from_flexilims(data_series=processed.iloc[0])
+ return Dataset.from_dataseries(dataseries=processed.iloc[0])
else:
raise flz.errors.NameNotUniqueError(
"{} {} datasets with name starting by {} exists for {}, "
diff --git a/flexiznam/utils.py b/flexiznam/utils.py
index 516f6a1..0effaaa 100644
--- a/flexiznam/utils.py
+++ b/flexiznam/utils.py
@@ -465,8 +465,8 @@ def _check_path(output, element, flexilims_session, recursive, error_only):
output.append([element.name, element.type, "Folder found", " ".join(ok), 0])
else:
try:
- ds = Dataset.from_flexilims(
- flexilims_session=flexilims_session, data_series=element
+ ds = Dataset.from_dataseries(
+ flexilims_session=flexilims_session, dataseries=element
)
if not ds.path_full.exists():
output.append(
diff --git a/setup.py b/setup.py
index 799edf2..df05b7d 100755
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
setup(
name="flexiznam",
- version="v0.3.6",
+ version="v0.3.7",
url="https://github.com/znamlab/flexznam",
license="MIT",
author="Antonin Blot",
diff --git a/tests/test-results/pytest_in_tests.xml b/tests/test-results/pytest_in_tests.xml
index 04ed6d2..d6f2e8e 100644
--- a/tests/test-results/pytest_in_tests.xml
+++ b/tests/test-results/pytest_in_tests.xml
@@ -1 +1 @@
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/tests/test_components/test_main.py b/tests/test_components/test_main.py
index 331e455..8544755 100644
--- a/tests/test_components/test_main.py
+++ b/tests/test_components/test_main.py
@@ -4,7 +4,7 @@
import flexiznam.main as flz
from flexiznam.config import PARAMETERS, get_password
from flexiznam.errors import FlexilimsError, NameNotUniqueError
-from tests.tests_resources.data_for_testing import MOUSE_ID
+from tests.tests_resources.data_for_testing import MOUSE_ID, SESSION
# Test functions from main.py
from flexiznam.schema import Dataset
@@ -180,6 +180,13 @@ def test_get_children(flm_sess):
assert isinstance(res, pd.DataFrame)
res = flz.get_children(parent_name="mouse_physio_2p", flexilims_session=flm_sess)
assert len(res) == 1
+ res_all = flz.get_children(parent_name=SESSION, flexilims_session=flm_sess)
+ assert (res_all.type != "recording").sum() != 0
+ res_part = flz.get_children(
+ parent_name=SESSION, flexilims_session=flm_sess, children_datatype="recording"
+ )
+ assert (res_part.type != "recording").sum() == 0
+ assert res_all.shape[1] > res_part.shape[1]
def test_add_entity(flm_sess):
@@ -251,7 +258,7 @@ def test_update_entity(flm_sess):
assert dbval["attributes"]["acq_num"] is None
# restore database state
- ds = Dataset.from_flexilims(data_series=original_entity, flexilims_session=flm_sess)
+ ds = Dataset.from_dataseries(dataseries=original_entity, flexilims_session=flm_sess)
ds.update_flexilims(mode="overwrite")
new_entity = flz.get_entity(
datatype="dataset", name=dataset_name, flexilims_session=flm_sess
diff --git a/tests/test_components/tests_schema/test_datasets.py b/tests/test_components/tests_schema/test_datasets.py
index a898f40..8c58fc5 100644
--- a/tests/test_components/tests_schema/test_datasets.py
+++ b/tests/test_components/tests_schema/test_datasets.py
@@ -1,10 +1,10 @@
import pytest
import pathlib
import pandas as pd
-from flexiznam.schema import Dataset
+from flexiznam.schema import Dataset, microscopy_data
from flexiznam.config import PARAMETERS
-from flexiznam.errors import DatasetError, FlexilimsError
-from tests.tests_resources.data_for_testing import TEST_PROJECT
+from flexiznam.errors import DatasetError, FlexilimsError, NameNotUniqueError
+from tests.tests_resources.data_for_testing import TEST_PROJECT, PROJECT_ID
from tests.test_components.test_main import MOUSE_ID
# Test the generic dataset class.
@@ -197,19 +197,118 @@ def test_from_flexilims(flm_sess):
assert ds_by_id.project == project
+def test_from_dataseries(flm_sess):
+ """This test requires the config file to include demo_project"""
+ series = pd.Series(
+ name="minimal_series",
+ data=dict(
+ genealogy=("minimal_series",),
+ path="/fake/path",
+ id="hexidonflexilims",
+ project=PROJECT_ID,
+ is_raw="no",
+ dataset_type="suite2p_rois",
+ ),
+ )
+ ds = Dataset.from_dataseries(
+ dataseries=series,
+ flexilims_session=flm_sess,
+ )
+ assert ds.flexilims_session == flm_sess
+ assert ds.full_name == "minimal_series"
+ assert type(ds) == Dataset
+ series = pd.Series(
+ name="test_microscopy",
+ data=dict(
+ genealogy=(
+ "test",
+ "microscopy",
+ ),
+ path="/fake/path",
+ id="hexidonflexilims",
+ project=PROJECT_ID,
+ is_raw="no",
+ dataset_type="microscopy",
+ ),
+ )
+
+ ds = Dataset.from_dataseries(
+ dataseries=series,
+ flexilims_session=flm_sess,
+ )
+ assert ds.flexilims_session == flm_sess
+ assert ds.full_name == "test_microscopy"
+ assert ds.dataset_name == "microscopy"
+ assert type(ds) == microscopy_data.MicroscopyData
+
+
def test_from_origin(flm_sess):
"""This test requires the database to be up-to-date for the physio mouse"""
origin_name = "mouse_physio_2p_S20211102_R165821_SpheresPermTube"
- ds = Dataset.from_origin(
+ ds0 = Dataset.from_origin(
origin_type="recording",
origin_name=origin_name,
dataset_type="suite2p_rois",
- conflicts="skip",
+ conflicts="abort",
flexilims_session=flm_sess,
)
- assert ds.flexilims_session == flm_sess
- assert ds.genealogy[-1].startswith("suite2p_rois")
+ assert ds0.flexilims_session == flm_sess
+ assert ds0.genealogy[-1].startswith("suite2p_rois")
+ assert ds0.dataset_name == "suite2p_rois_0"
+ ds0.update_flexilims()
+ # now from_origin should raise an error if abort
+ with pytest.raises(DatasetError) as err:
+ Dataset.from_origin(
+ origin_type="recording",
+ origin_name=origin_name,
+ dataset_type="suite2p_rois",
+ conflicts="abort",
+ flexilims_session=flm_sess,
+ )
+ ds0_bis = Dataset.from_origin(
+ origin_type="recording",
+ origin_name=origin_name,
+ dataset_type="suite2p_rois",
+ conflicts="overwrite",
+ flexilims_session=flm_sess,
+ )
+ assert ds0.id == ds0_bis.id
+ ds1 = Dataset.from_origin(
+ origin_type="recording",
+ origin_name=origin_name,
+ dataset_type="suite2p_rois",
+ conflicts="append",
+ flexilims_session=flm_sess,
+ )
+ assert ds1.id is None
+ assert ds1.flexilims_session == flm_sess
+ assert ds1.dataset_name == "suite2p_rois_1"
+ ds1.update_flexilims()
+ assert ds1.id is not None
+ assert ds1.id != ds0.id
+ # now we have 2 datasets, skip and overwrite should raise an error
+ with pytest.raises(NameNotUniqueError) as err:
+ Dataset.from_origin(
+ origin_type="recording",
+ origin_name=origin_name,
+ dataset_type="suite2p_rois",
+ conflicts="skip",
+ flexilims_session=flm_sess,
+ )
+ with pytest.raises(NameNotUniqueError) as err:
+ Dataset.from_origin(
+ origin_type="recording",
+ origin_name=origin_name,
+ dataset_type="suite2p_rois",
+ conflicts="overwrite",
+ flexilims_session=flm_sess,
+ )
+ # clean up
+ for ds in (ds0, ds1):
+ flm_sess.delete(ds.id)
+
+
def test_update_flexilims(flm_sess):
"""This test requires the database to be up-to-date for the physio mouse"""
diff --git a/tests/tests_resources/data_for_testing.py b/tests/tests_resources/data_for_testing.py
index 72d32f9..78d5e34 100644
--- a/tests/tests_resources/data_for_testing.py
+++ b/tests/tests_resources/data_for_testing.py
@@ -6,6 +6,8 @@
MOUSE_ID = "6437dcb13ded9c65df142a12"
TEST_PROJECT = "demo_project"
+PROJECT_ID = "610989f9a651ff0b6237e0f6"
+SESSION = "mouse_physio_2p_S20211102"
DATA_ROOT = Path(PARAMETERS["data_root"]["raw"]) / TEST_PROJECT
PROCESSED_ROOT = Path(PARAMETERS["data_root"]["processed"]) / TEST_PROJECT