From a7bc2c3ba70f6d66858469baee96f5f103c990fd Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Tue, 4 Jul 2023 18:42:58 +0100 Subject: [PATCH 1/5] bump version number --- CHANGELOG.md | 7 +++++++ setup.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a52d97..0e2d897 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Change log +## v0.3.7 + +### Main changes + +### Minor + +### Bugfixes ## v0.3.6 ### Main changes diff --git a/setup.py b/setup.py index 799edf2..df05b7d 100755 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="flexiznam", - version="v0.3.6", + version="v0.3.7", url="https://github.com/znamlab/flexznam", license="MIT", author="Antonin Blot", From 8edd8e077a829062f8661f72429d5b69e1d41701 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Wed, 5 Jul 2023 10:25:04 +0100 Subject: [PATCH 2/5] [doc] Improve docstring of from_flexilims --- flexiznam/schema/datasets.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index e5fc412..585fc7a 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -59,19 +59,24 @@ def from_flexilims( data_series=None, flexilims_session=None, ): - """Loads a dataset from flexilims. + """Loads a dataset from flexilims or flexilims data series. If the dataset_type attribute of the flexilims entry defined in Dataset.SUBCLASSES,this subclass will be used. Otherwise a generic Dataset is returned Args: - project: Name of the project or hexadecimal project_id - name: Unique name of the dataset on flexilims - id: Hexadecimal id of the dataset on flexilims - data_series: default to None. pd.Series as returned by flz.get_entities. - If provided, supersedes project, name and id. - flexilims_session: authentication session to access flexilims + project (str, optional): Name of the project or hexadecimal project_id. If + not provided, can be read from flexilims_session + name (str, optional): Unique name of the dataset on flexilims. Ignored if + `data_series` is provided + id (str, optional): Hexadecimal id of the dataset on flexilims. Ignored if + `data_series` is provided + data_series (pandas.Series, optional): default to None. Dataset info as + returned by flz.get_entities. If provided, supersedes project, name and + id. + flexilims_session (flexilims.Session, optional): authentication session to + access flexilims. """ if data_series is not None: if (project is not None) or (name is not None): From a1ad595c97877494bf97239df96d3ab4a146c712 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 6 Jul 2023 10:44:42 +0100 Subject: [PATCH 3/5] [feature] add from_dataseries --- CHANGELOG.md | 2 + flexiznam/main.py | 4 +- flexiznam/schema/datasets.py | 64 +++++++++++-------- flexiznam/utils.py | 4 +- tests/test_components/test_main.py | 2 +- .../tests_schema/test_datasets.py | 49 +++++++++++++- tests/tests_resources/data_for_testing.py | 1 + 7 files changed, 91 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e2d897..cb910a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### Main changes +- Separate `Dataset.from_dataseries` and `Dataset.from_flexilims` to avoid confusion + ### Minor ### Bugfixes diff --git a/flexiznam/main.py b/flexiznam/main.py index e0a3813..a468d47 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -1046,8 +1046,8 @@ def get_datasets( datapath_dict[recording_id] = datapaths else: datapath_dict[recording_id] = [ - flexiznam.Dataset.from_flexilims( - data_series=ds, flexilims_session=flexilims_session + flexiznam.Dataset.from_dataseries( + dataseries=ds, flexilims_session=flexilims_session ) for _, ds in datasets.iterrows() ] diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index 585fc7a..11e181e 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -56,49 +56,57 @@ def from_flexilims( project=None, name=None, id=None, - data_series=None, flexilims_session=None, ): - """Loads a dataset from flexilims or flexilims data series. + """Loads a dataset from flexilims If the dataset_type attribute of the flexilims entry defined in Dataset.SUBCLASSES,this subclass will be used. Otherwise a generic Dataset is returned Args: - project (str, optional): Name of the project or hexadecimal project_id. If + project (str, optional): Name of the project or hexadecimal project_id. If not provided, can be read from flexilims_session name (str, optional): Unique name of the dataset on flexilims. Ignored if - `data_series` is provided + `dataseries` is provided id (str, optional): Hexadecimal id of the dataset on flexilims. Ignored if - `data_series` is provided - data_series (pandas.Series, optional): default to None. Dataset info as - returned by flz.get_entities. If provided, supersedes project, name and - id. - flexilims_session (flexilims.Session, optional): authentication session to + `dataseries` is provided + flexilims_session (flexilims.Session, optional): authentication session to access flexilims. """ - if data_series is not None: - if (project is not None) or (name is not None): - raise AttributeError("Specify either data_series OR project + name/id") - else: - data_series = flz.get_entity( - project_id=project, - datatype="dataset", - name=name, - id=id, - flexilims_session=flexilims_session, + dataseries = flz.get_entity( + project_id=project, + datatype="dataset", + name=name, + id=id, + flexilims_session=flexilims_session, + ) + + if dataseries is None: + if project is None: + project = flexilims_session.project_id + raise FlexilimsError( + "No dataset named {} in project {}".format(name, project) ) + ds = Dataset.from_dataseries(dataseries, flexilims_session=flexilims_session) + return ds - if data_series is None: - if project is None: - project = flexilims_session.project_id - raise FlexilimsError( - "No dataset named {} in project {}".format(name, project) - ) - dataset_type = data_series.dataset_type + @staticmethod + def from_dataseries( + dataseries, + flexilims_session=None, + ): + """Create dataset from a flexilims dataseries + + This function does not call flexilims, but uses the dataseries object directly. + Args: + dataseries (flexilims.DataSeries): flexilims dataseries object + flexilims_session (flexilims.Session, optional): authentication session to + access flexilims. Will be added to dataset object. + """ + dataset_type = dataseries.dataset_type - kwargs = Dataset._format_series_to_kwargs(data_series) + kwargs = Dataset._format_series_to_kwargs(dataseries) name = kwargs.pop("name") kwargs["flexilims_session"] = flexilims_session if dataset_type in Dataset.SUBCLASSES: @@ -209,7 +217,7 @@ def from_origin( ) elif conflicts == "skip" or conflicts == "overwrite": if len(processed) == 1: - return Dataset.from_flexilims(data_series=processed.iloc[0]) + return Dataset.from_dataseries(dataseries=processed.iloc[0]) else: raise flz.errors.NameNotUniqueError( "{} {} datasets with name starting by {} exists for {}, " diff --git a/flexiznam/utils.py b/flexiznam/utils.py index 516f6a1..0effaaa 100644 --- a/flexiznam/utils.py +++ b/flexiznam/utils.py @@ -465,8 +465,8 @@ def _check_path(output, element, flexilims_session, recursive, error_only): output.append([element.name, element.type, "Folder found", " ".join(ok), 0]) else: try: - ds = Dataset.from_flexilims( - flexilims_session=flexilims_session, data_series=element + ds = Dataset.from_dataseries( + flexilims_session=flexilims_session, dataseries=element ) if not ds.path_full.exists(): output.append( diff --git a/tests/test_components/test_main.py b/tests/test_components/test_main.py index 331e455..32a7d1d 100644 --- a/tests/test_components/test_main.py +++ b/tests/test_components/test_main.py @@ -251,7 +251,7 @@ def test_update_entity(flm_sess): assert dbval["attributes"]["acq_num"] is None # restore database state - ds = Dataset.from_flexilims(data_series=original_entity, flexilims_session=flm_sess) + ds = Dataset.from_dataseries(dataseries=original_entity, flexilims_session=flm_sess) ds.update_flexilims(mode="overwrite") new_entity = flz.get_entity( datatype="dataset", name=dataset_name, flexilims_session=flm_sess diff --git a/tests/test_components/tests_schema/test_datasets.py b/tests/test_components/tests_schema/test_datasets.py index a898f40..68f7a77 100644 --- a/tests/test_components/tests_schema/test_datasets.py +++ b/tests/test_components/tests_schema/test_datasets.py @@ -1,10 +1,10 @@ import pytest import pathlib import pandas as pd -from flexiznam.schema import Dataset +from flexiznam.schema import Dataset, microscopy_data from flexiznam.config import PARAMETERS from flexiznam.errors import DatasetError, FlexilimsError -from tests.tests_resources.data_for_testing import TEST_PROJECT +from tests.tests_resources.data_for_testing import TEST_PROJECT, PROJECT_ID from tests.test_components.test_main import MOUSE_ID # Test the generic dataset class. @@ -197,6 +197,51 @@ def test_from_flexilims(flm_sess): assert ds_by_id.project == project +def test_from_dataseries(flm_sess): + """This test requires the config file to include demo_project""" + series = pd.Series( + name="minimal_series", + data=dict( + genealogy=("minimal_series",), + path="/fake/path", + id="hexidonflexilims", + project=PROJECT_ID, + is_raw="no", + dataset_type="suite2p_rois", + ), + ) + ds = Dataset.from_dataseries( + dataseries=series, + flexilims_session=flm_sess, + ) + assert ds.flexilims_session == flm_sess + assert ds.full_name == "minimal_series" + assert type(ds) == Dataset + series = pd.Series( + name="test_microscopy", + data=dict( + genealogy=( + "test", + "microscopy", + ), + path="/fake/path", + id="hexidonflexilims", + project=PROJECT_ID, + is_raw="no", + dataset_type="microscopy", + ), + ) + + ds = Dataset.from_dataseries( + dataseries=series, + flexilims_session=flm_sess, + ) + assert ds.flexilims_session == flm_sess + assert ds.full_name == "test_microscopy" + assert ds.dataset_name == "microscopy" + assert type(ds) == microscopy_data.MicroscopyData + + def test_from_origin(flm_sess): """This test requires the database to be up-to-date for the physio mouse""" origin_name = "mouse_physio_2p_S20211102_R165821_SpheresPermTube" diff --git a/tests/tests_resources/data_for_testing.py b/tests/tests_resources/data_for_testing.py index 72d32f9..5fbf18d 100644 --- a/tests/tests_resources/data_for_testing.py +++ b/tests/tests_resources/data_for_testing.py @@ -6,6 +6,7 @@ MOUSE_ID = "6437dcb13ded9c65df142a12" TEST_PROJECT = "demo_project" +PROJECT_ID = "610989f9a651ff0b6237e0f6" DATA_ROOT = Path(PARAMETERS["data_root"]["raw"]) / TEST_PROJECT PROCESSED_ROOT = Path(PARAMETERS["data_root"]["processed"]) / TEST_PROJECT From 92183360441c64b33a6edd182923bba2c7f299b5 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 6 Jul 2023 11:07:37 +0100 Subject: [PATCH 4/5] [testing] Improve test Test conflicts options of from_origin --- tests/test-results/pytest_in_tests.xml | 2 +- .../tests_schema/test_datasets.py | 64 +++++++++++++++++-- 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/tests/test-results/pytest_in_tests.xml b/tests/test-results/pytest_in_tests.xml index 04ed6d2..45e39d6 100644 --- a/tests/test-results/pytest_in_tests.xml +++ b/tests/test-results/pytest_in_tests.xml @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/tests/test_components/tests_schema/test_datasets.py b/tests/test_components/tests_schema/test_datasets.py index 68f7a77..8c58fc5 100644 --- a/tests/test_components/tests_schema/test_datasets.py +++ b/tests/test_components/tests_schema/test_datasets.py @@ -3,7 +3,7 @@ import pandas as pd from flexiznam.schema import Dataset, microscopy_data from flexiznam.config import PARAMETERS -from flexiznam.errors import DatasetError, FlexilimsError +from flexiznam.errors import DatasetError, FlexilimsError, NameNotUniqueError from tests.tests_resources.data_for_testing import TEST_PROJECT, PROJECT_ID from tests.test_components.test_main import MOUSE_ID @@ -245,16 +245,70 @@ def test_from_dataseries(flm_sess): def test_from_origin(flm_sess): """This test requires the database to be up-to-date for the physio mouse""" origin_name = "mouse_physio_2p_S20211102_R165821_SpheresPermTube" - ds = Dataset.from_origin( + ds0 = Dataset.from_origin( origin_type="recording", origin_name=origin_name, dataset_type="suite2p_rois", - conflicts="skip", + conflicts="abort", flexilims_session=flm_sess, ) - assert ds.flexilims_session == flm_sess - assert ds.genealogy[-1].startswith("suite2p_rois") + assert ds0.flexilims_session == flm_sess + assert ds0.genealogy[-1].startswith("suite2p_rois") + assert ds0.dataset_name == "suite2p_rois_0" + ds0.update_flexilims() + # now from_origin should raise an error if abort + with pytest.raises(DatasetError) as err: + Dataset.from_origin( + origin_type="recording", + origin_name=origin_name, + dataset_type="suite2p_rois", + conflicts="abort", + flexilims_session=flm_sess, + ) + ds0_bis = Dataset.from_origin( + origin_type="recording", + origin_name=origin_name, + dataset_type="suite2p_rois", + conflicts="overwrite", + flexilims_session=flm_sess, + ) + assert ds0.id == ds0_bis.id + ds1 = Dataset.from_origin( + origin_type="recording", + origin_name=origin_name, + dataset_type="suite2p_rois", + conflicts="append", + flexilims_session=flm_sess, + ) + assert ds1.id is None + assert ds1.flexilims_session == flm_sess + assert ds1.dataset_name == "suite2p_rois_1" + ds1.update_flexilims() + assert ds1.id is not None + assert ds1.id != ds0.id + # now we have 2 datasets, skip and overwrite should raise an error + with pytest.raises(NameNotUniqueError) as err: + Dataset.from_origin( + origin_type="recording", + origin_name=origin_name, + dataset_type="suite2p_rois", + conflicts="skip", + flexilims_session=flm_sess, + ) + with pytest.raises(NameNotUniqueError) as err: + Dataset.from_origin( + origin_type="recording", + origin_name=origin_name, + dataset_type="suite2p_rois", + conflicts="overwrite", + flexilims_session=flm_sess, + ) + # clean up + for ds in (ds0, ds1): + flm_sess.delete(ds.id) + + def test_update_flexilims(flm_sess): """This test requires the database to be up-to-date for the physio mouse""" From 4cb82a74cf9e3552a7d0fe65da4fa82d11b7c69a Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 6 Jul 2023 16:45:52 +0100 Subject: [PATCH 5/5] improve get_children output - `get_children` output is filtered to contain only relevant columns when `children_datatype` is not None --- CHANGELOG.md | 2 ++ flexiznam/main.py | 17 +++++++++++------ tests/test-results/pytest_in_tests.xml | 2 +- tests/test_components/test_main.py | 9 ++++++++- tests/tests_resources/data_for_testing.py | 1 + 5 files changed, 23 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb910a3..09a632b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ ### Minor +- `get_children` output is filtered to contain only relevant columns when `children_datatype` is not None + ### Bugfixes ## v0.3.6 diff --git a/flexiznam/main.py b/flexiznam/main.py index a468d47..a663f4b 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -968,11 +968,14 @@ def get_children( if parent_id is None: assert parent_name is not None, "Must provide either parent_id or parent_name" parent_id = get_id(parent_name, flexilims_session=flexilims_session) - results = format_results(flexilims_session.get_children(parent_id)) + results = format_results( + flexilims_session.get_children(parent_id), return_list=True + ) if not len(results): - return results + return pd.DataFrame(results) if children_datatype is not None: - results = results.loc[results.type == children_datatype, :] + results = [r for r in results if r["type"] == children_datatype] + results = pd.DataFrame(results) results.set_index("name", drop=False, inplace=True) return results @@ -1085,7 +1088,7 @@ def generate_name(datatype, name, flexilims_session=None, project_id=None): return name -def format_results(results): +def format_results(results, return_list=False): """Make request output a nice DataFrame This will crash if any attribute is also present in the flexilims reply, @@ -1095,6 +1098,7 @@ def format_results(results): Args: results (:obj:`list` of :obj:`dict`): Flexilims reply + return_list (bool): if True, return a list of dicts instead of a DataFrame Returns: :py:class:`pandas.DataFrame`: Reply formatted as a DataFrame @@ -1108,5 +1112,6 @@ def format_results(results): ) result[attr_name] = attr_value result.pop("attributes") - df = pd.DataFrame(results) - return df + if return_list: + return results + return pd.DataFrame(results) diff --git a/tests/test-results/pytest_in_tests.xml b/tests/test-results/pytest_in_tests.xml index 45e39d6..d6f2e8e 100644 --- a/tests/test-results/pytest_in_tests.xml +++ b/tests/test-results/pytest_in_tests.xml @@ -1 +1 @@ - \ No newline at end of file + \ No newline at end of file diff --git a/tests/test_components/test_main.py b/tests/test_components/test_main.py index 32a7d1d..8544755 100644 --- a/tests/test_components/test_main.py +++ b/tests/test_components/test_main.py @@ -4,7 +4,7 @@ import flexiznam.main as flz from flexiznam.config import PARAMETERS, get_password from flexiznam.errors import FlexilimsError, NameNotUniqueError -from tests.tests_resources.data_for_testing import MOUSE_ID +from tests.tests_resources.data_for_testing import MOUSE_ID, SESSION # Test functions from main.py from flexiznam.schema import Dataset @@ -180,6 +180,13 @@ def test_get_children(flm_sess): assert isinstance(res, pd.DataFrame) res = flz.get_children(parent_name="mouse_physio_2p", flexilims_session=flm_sess) assert len(res) == 1 + res_all = flz.get_children(parent_name=SESSION, flexilims_session=flm_sess) + assert (res_all.type != "recording").sum() != 0 + res_part = flz.get_children( + parent_name=SESSION, flexilims_session=flm_sess, children_datatype="recording" + ) + assert (res_part.type != "recording").sum() == 0 + assert res_all.shape[1] > res_part.shape[1] def test_add_entity(flm_sess): diff --git a/tests/tests_resources/data_for_testing.py b/tests/tests_resources/data_for_testing.py index 5fbf18d..78d5e34 100644 --- a/tests/tests_resources/data_for_testing.py +++ b/tests/tests_resources/data_for_testing.py @@ -7,6 +7,7 @@ MOUSE_ID = "6437dcb13ded9c65df142a12" TEST_PROJECT = "demo_project" PROJECT_ID = "610989f9a651ff0b6237e0f6" +SESSION = "mouse_physio_2p_S20211102" DATA_ROOT = Path(PARAMETERS["data_root"]["raw"]) / TEST_PROJECT PROCESSED_ROOT = Path(PARAMETERS["data_root"]["processed"]) / TEST_PROJECT