diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a52d97..09a632b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,16 @@ # Change log +## v0.3.7 + +### Main changes + +- Separate `Dataset.from_dataseries` and `Dataset.from_flexilims` to avoid confusion + +### Minor + +- `get_children` output is filtered to contain only relevant columns when `children_datatype` is not None + +### Bugfixes ## v0.3.6 ### Main changes diff --git a/flexiznam/main.py b/flexiznam/main.py index e0a3813..a663f4b 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -968,11 +968,14 @@ def get_children( if parent_id is None: assert parent_name is not None, "Must provide either parent_id or parent_name" parent_id = get_id(parent_name, flexilims_session=flexilims_session) - results = format_results(flexilims_session.get_children(parent_id)) + results = format_results( + flexilims_session.get_children(parent_id), return_list=True + ) if not len(results): - return results + return pd.DataFrame(results) if children_datatype is not None: - results = results.loc[results.type == children_datatype, :] + results = [r for r in results if r["type"] == children_datatype] + results = pd.DataFrame(results) results.set_index("name", drop=False, inplace=True) return results @@ -1046,8 +1049,8 @@ def get_datasets( datapath_dict[recording_id] = datapaths else: datapath_dict[recording_id] = [ - flexiznam.Dataset.from_flexilims( - data_series=ds, flexilims_session=flexilims_session + flexiznam.Dataset.from_dataseries( + dataseries=ds, flexilims_session=flexilims_session ) for _, ds in datasets.iterrows() ] @@ -1085,7 +1088,7 @@ def generate_name(datatype, name, flexilims_session=None, project_id=None): return name -def format_results(results): +def format_results(results, return_list=False): """Make request output a nice DataFrame This will crash if any attribute is also present in the flexilims reply, @@ -1095,6 +1098,7 @@ def format_results(results): Args: results (:obj:`list` of :obj:`dict`): Flexilims reply + return_list (bool): if True, return a list of dicts instead of a DataFrame Returns: :py:class:`pandas.DataFrame`: Reply formatted as a DataFrame @@ -1108,5 +1112,6 @@ def format_results(results): ) result[attr_name] = attr_value result.pop("attributes") - df = pd.DataFrame(results) - return df + if return_list: + return results + return pd.DataFrame(results) diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index e5fc412..11e181e 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -56,44 +56,57 @@ def from_flexilims( project=None, name=None, id=None, - data_series=None, flexilims_session=None, ): - """Loads a dataset from flexilims. + """Loads a dataset from flexilims If the dataset_type attribute of the flexilims entry defined in Dataset.SUBCLASSES,this subclass will be used. Otherwise a generic Dataset is returned Args: - project: Name of the project or hexadecimal project_id - name: Unique name of the dataset on flexilims - id: Hexadecimal id of the dataset on flexilims - data_series: default to None. pd.Series as returned by flz.get_entities. - If provided, supersedes project, name and id. - flexilims_session: authentication session to access flexilims + project (str, optional): Name of the project or hexadecimal project_id. If + not provided, can be read from flexilims_session + name (str, optional): Unique name of the dataset on flexilims. Ignored if + `dataseries` is provided + id (str, optional): Hexadecimal id of the dataset on flexilims. Ignored if + `dataseries` is provided + flexilims_session (flexilims.Session, optional): authentication session to + access flexilims. """ - if data_series is not None: - if (project is not None) or (name is not None): - raise AttributeError("Specify either data_series OR project + name/id") - else: - data_series = flz.get_entity( - project_id=project, - datatype="dataset", - name=name, - id=id, - flexilims_session=flexilims_session, + dataseries = flz.get_entity( + project_id=project, + datatype="dataset", + name=name, + id=id, + flexilims_session=flexilims_session, + ) + + if dataseries is None: + if project is None: + project = flexilims_session.project_id + raise FlexilimsError( + "No dataset named {} in project {}".format(name, project) ) + ds = Dataset.from_dataseries(dataseries, flexilims_session=flexilims_session) + return ds - if data_series is None: - if project is None: - project = flexilims_session.project_id - raise FlexilimsError( - "No dataset named {} in project {}".format(name, project) - ) - dataset_type = data_series.dataset_type + @staticmethod + def from_dataseries( + dataseries, + flexilims_session=None, + ): + """Create dataset from a flexilims dataseries + + This function does not call flexilims, but uses the dataseries object directly. + Args: + dataseries (flexilims.DataSeries): flexilims dataseries object + flexilims_session (flexilims.Session, optional): authentication session to + access flexilims. Will be added to dataset object. + """ + dataset_type = dataseries.dataset_type - kwargs = Dataset._format_series_to_kwargs(data_series) + kwargs = Dataset._format_series_to_kwargs(dataseries) name = kwargs.pop("name") kwargs["flexilims_session"] = flexilims_session if dataset_type in Dataset.SUBCLASSES: @@ -204,7 +217,7 @@ def from_origin( ) elif conflicts == "skip" or conflicts == "overwrite": if len(processed) == 1: - return Dataset.from_flexilims(data_series=processed.iloc[0]) + return Dataset.from_dataseries(dataseries=processed.iloc[0]) else: raise flz.errors.NameNotUniqueError( "{} {} datasets with name starting by {} exists for {}, " diff --git a/flexiznam/utils.py b/flexiznam/utils.py index 516f6a1..0effaaa 100644 --- a/flexiznam/utils.py +++ b/flexiznam/utils.py @@ -465,8 +465,8 @@ def _check_path(output, element, flexilims_session, recursive, error_only): output.append([element.name, element.type, "Folder found", " ".join(ok), 0]) else: try: - ds = Dataset.from_flexilims( - flexilims_session=flexilims_session, data_series=element + ds = Dataset.from_dataseries( + flexilims_session=flexilims_session, dataseries=element ) if not ds.path_full.exists(): output.append( diff --git a/setup.py b/setup.py index 799edf2..df05b7d 100755 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="flexiznam", - version="v0.3.6", + version="v0.3.7", url="https://github.com/znamlab/flexznam", license="MIT", author="Antonin Blot", diff --git a/tests/test-results/pytest_in_tests.xml b/tests/test-results/pytest_in_tests.xml index 04ed6d2..d6f2e8e 100644 --- a/tests/test-results/pytest_in_tests.xml +++ b/tests/test-results/pytest_in_tests.xml @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/tests/test_components/test_main.py b/tests/test_components/test_main.py index 331e455..8544755 100644 --- a/tests/test_components/test_main.py +++ b/tests/test_components/test_main.py @@ -4,7 +4,7 @@ import flexiznam.main as flz from flexiznam.config import PARAMETERS, get_password from flexiznam.errors import FlexilimsError, NameNotUniqueError -from tests.tests_resources.data_for_testing import MOUSE_ID +from tests.tests_resources.data_for_testing import MOUSE_ID, SESSION # Test functions from main.py from flexiznam.schema import Dataset @@ -180,6 +180,13 @@ def test_get_children(flm_sess): assert isinstance(res, pd.DataFrame) res = flz.get_children(parent_name="mouse_physio_2p", flexilims_session=flm_sess) assert len(res) == 1 + res_all = flz.get_children(parent_name=SESSION, flexilims_session=flm_sess) + assert (res_all.type != "recording").sum() != 0 + res_part = flz.get_children( + parent_name=SESSION, flexilims_session=flm_sess, children_datatype="recording" + ) + assert (res_part.type != "recording").sum() == 0 + assert res_all.shape[1] > res_part.shape[1] def test_add_entity(flm_sess): @@ -251,7 +258,7 @@ def test_update_entity(flm_sess): assert dbval["attributes"]["acq_num"] is None # restore database state - ds = Dataset.from_flexilims(data_series=original_entity, flexilims_session=flm_sess) + ds = Dataset.from_dataseries(dataseries=original_entity, flexilims_session=flm_sess) ds.update_flexilims(mode="overwrite") new_entity = flz.get_entity( datatype="dataset", name=dataset_name, flexilims_session=flm_sess diff --git a/tests/test_components/tests_schema/test_datasets.py b/tests/test_components/tests_schema/test_datasets.py index a898f40..8c58fc5 100644 --- a/tests/test_components/tests_schema/test_datasets.py +++ b/tests/test_components/tests_schema/test_datasets.py @@ -1,10 +1,10 @@ import pytest import pathlib import pandas as pd -from flexiznam.schema import Dataset +from flexiznam.schema import Dataset, microscopy_data from flexiznam.config import PARAMETERS -from flexiznam.errors import DatasetError, FlexilimsError -from tests.tests_resources.data_for_testing import TEST_PROJECT +from flexiznam.errors import DatasetError, FlexilimsError, NameNotUniqueError +from tests.tests_resources.data_for_testing import TEST_PROJECT, PROJECT_ID from tests.test_components.test_main import MOUSE_ID # Test the generic dataset class. @@ -197,19 +197,118 @@ def test_from_flexilims(flm_sess): assert ds_by_id.project == project +def test_from_dataseries(flm_sess): + """This test requires the config file to include demo_project""" + series = pd.Series( + name="minimal_series", + data=dict( + genealogy=("minimal_series",), + path="/fake/path", + id="hexidonflexilims", + project=PROJECT_ID, + is_raw="no", + dataset_type="suite2p_rois", + ), + ) + ds = Dataset.from_dataseries( + dataseries=series, + flexilims_session=flm_sess, + ) + assert ds.flexilims_session == flm_sess + assert ds.full_name == "minimal_series" + assert type(ds) == Dataset + series = pd.Series( + name="test_microscopy", + data=dict( + genealogy=( + "test", + "microscopy", + ), + path="/fake/path", + id="hexidonflexilims", + project=PROJECT_ID, + is_raw="no", + dataset_type="microscopy", + ), + ) + + ds = Dataset.from_dataseries( + dataseries=series, + flexilims_session=flm_sess, + ) + assert ds.flexilims_session == flm_sess + assert ds.full_name == "test_microscopy" + assert ds.dataset_name == "microscopy" + assert type(ds) == microscopy_data.MicroscopyData + + def test_from_origin(flm_sess): """This test requires the database to be up-to-date for the physio mouse""" origin_name = "mouse_physio_2p_S20211102_R165821_SpheresPermTube" - ds = Dataset.from_origin( + ds0 = Dataset.from_origin( origin_type="recording", origin_name=origin_name, dataset_type="suite2p_rois", - conflicts="skip", + conflicts="abort", flexilims_session=flm_sess, ) - assert ds.flexilims_session == flm_sess - assert ds.genealogy[-1].startswith("suite2p_rois") + assert ds0.flexilims_session == flm_sess + assert ds0.genealogy[-1].startswith("suite2p_rois") + assert ds0.dataset_name == "suite2p_rois_0" + ds0.update_flexilims() + # now from_origin should raise an error if abort + with pytest.raises(DatasetError) as err: + Dataset.from_origin( + origin_type="recording", + origin_name=origin_name, + dataset_type="suite2p_rois", + conflicts="abort", + flexilims_session=flm_sess, + ) + ds0_bis = Dataset.from_origin( + origin_type="recording", + origin_name=origin_name, + dataset_type="suite2p_rois", + conflicts="overwrite", + flexilims_session=flm_sess, + ) + assert ds0.id == ds0_bis.id + ds1 = Dataset.from_origin( + origin_type="recording", + origin_name=origin_name, + dataset_type="suite2p_rois", + conflicts="append", + flexilims_session=flm_sess, + ) + assert ds1.id is None + assert ds1.flexilims_session == flm_sess + assert ds1.dataset_name == "suite2p_rois_1" + ds1.update_flexilims() + assert ds1.id is not None + assert ds1.id != ds0.id + # now we have 2 datasets, skip and overwrite should raise an error + with pytest.raises(NameNotUniqueError) as err: + Dataset.from_origin( + origin_type="recording", + origin_name=origin_name, + dataset_type="suite2p_rois", + conflicts="skip", + flexilims_session=flm_sess, + ) + with pytest.raises(NameNotUniqueError) as err: + Dataset.from_origin( + origin_type="recording", + origin_name=origin_name, + dataset_type="suite2p_rois", + conflicts="overwrite", + flexilims_session=flm_sess, + ) + # clean up + for ds in (ds0, ds1): + flm_sess.delete(ds.id) + + def test_update_flexilims(flm_sess): """This test requires the database to be up-to-date for the physio mouse""" diff --git a/tests/tests_resources/data_for_testing.py b/tests/tests_resources/data_for_testing.py index 72d32f9..78d5e34 100644 --- a/tests/tests_resources/data_for_testing.py +++ b/tests/tests_resources/data_for_testing.py @@ -6,6 +6,8 @@ MOUSE_ID = "6437dcb13ded9c65df142a12" TEST_PROJECT = "demo_project" +PROJECT_ID = "610989f9a651ff0b6237e0f6" +SESSION = "mouse_physio_2p_S20211102" DATA_ROOT = Path(PARAMETERS["data_root"]["raw"]) / TEST_PROJECT PROCESSED_ROOT = Path(PARAMETERS["data_root"]["processed"]) / TEST_PROJECT