From e66c0d951462c0e2464e2711a21cd5fdff6ec629 Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Tue, 11 Jul 2023 13:33:43 +0100
Subject: [PATCH 01/18] Update version and exclude test package

---
 CHANGELOG.md | 4 ++++
 setup.py     | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 09a632b..7caab39 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,9 @@
 # Change log
 
+## v0.3.8
+
+### Main changes
+
 ## v0.3.7
 
 ### Main changes
diff --git a/setup.py b/setup.py
index df05b7d..afd0c32 100755
--- a/setup.py
+++ b/setup.py
@@ -2,13 +2,13 @@
 
 setup(
     name="flexiznam",
-    version="v0.3.7",
+    version="v0.3.8",
     url="https://github.com/znamlab/flexznam",
     license="MIT",
     author="Antonin Blot",
     author_email="antonin.blot@gmail.com",
     description="Znamlab tool to interact with flexilims",
-    packages=find_packages(),
+    packages=find_packages(exclude=["tests"]),
     include_package_data=True,
     install_requires=[
         "Click",

From 9ded96ec025821e8361b4c07b1dd5cd62cb24877 Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Mon, 17 Jul 2023 14:06:24 +0100
Subject: [PATCH 02/18] [feature] Add `get_data_root` function to get `raw` or
 `processed` root for a project

---
 CHANGELOG.md                       |  2 ++
 flexiznam/main.py                  | 33 ++++++++++++++++++++++++++++++
 tests/test_components/test_main.py | 21 +++++++++++++++++++
 3 files changed, 56 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7caab39..8b94e86 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,8 @@
 
 ## v0.3.7
 
+- Add `get_data_root` function to get `raw` or `processed` root for a project
+
 ### Main changes
 
 - Separate `Dataset.from_dataseries` and `Dataset.from_flexilims` to avoid confusion
diff --git a/flexiznam/main.py b/flexiznam/main.py
index a663f4b..2d6b7d6 100755
--- a/flexiznam/main.py
+++ b/flexiznam/main.py
@@ -22,6 +22,38 @@ def _format_project(project_id, prm):
     return project_id
 
 
+def get_data_root(which, project=None, flexilims_session=None):
+    """Get raw or processed path for a project
+
+    Args:
+        which (str): either "raw" or "processed"
+        project (str, optional): name or id of the project. Optional if
+            flexilims_session is provided
+        flexilims_session (:py:class:`flexilims.Flexilims`, optional): a flexilims
+            session with project set. Optional if project is provided.
+    """
+    if which not in ["raw", "processed"]:
+        raise ValueError("which must be either 'raw' or 'processed'")
+
+    if project is None:
+        assert (
+            flexilims_session is not None,
+            "`flexilims_session` must be provided if `project` is None",
+        )
+        project = flexilims_session.project_id
+
+    if project not in PARAMETERS["project_ids"]:
+        project = lookup_project(project, prm=None)
+        assert project is not None, f"Invalid project {project}"
+
+    if project in PARAMETERS["project_paths"]:
+        return Path(PARAMETERS["project_paths"][project][which])
+
+    if which == "raw":
+        return Path(PARAMETERS["data_root"]["raw"])
+    return Path(PARAMETERS["data_root"]["processed"])
+
+
 def lookup_project(project_id, prm=None):
     """
     Look up project name by hexadecimal id
@@ -975,6 +1007,7 @@ def get_children(
         return pd.DataFrame(results)
     if children_datatype is not None:
         results = [r for r in results if r["type"] == children_datatype]
+
     results = pd.DataFrame(results)
     results.set_index("name", drop=False, inplace=True)
     return results
diff --git a/tests/test_components/test_main.py b/tests/test_components/test_main.py
index 8544755..73ec8dd 100644
--- a/tests/test_components/test_main.py
+++ b/tests/test_components/test_main.py
@@ -1,4 +1,5 @@
 import datetime
+from pathlib import Path
 import pandas as pd
 import pytest
 import flexiznam.main as flz
@@ -12,6 +13,26 @@
 # this needs to change every time I reset flexlilims
 
 
+def test_get_path():
+    p = flz.get_data_root(which="raw", project="test", flexilims_session=None)
+    assert p == Path(PARAMETERS["data_root"]["raw"])
+    p = flz.get_data_root(which="processed", project="test", flexilims_session=None)
+    assert p == Path(PARAMETERS["data_root"]["processed"])
+    sess = flz.get_flexilims_session(
+        project_id=PARAMETERS["project_ids"]["test"], reuse_token=False
+    )
+    p = flz.get_data_root(which="processed", project=None, flexilims_session=sess)
+    assert p == Path(PARAMETERS["data_root"]["processed"])
+    p = flz.get_data_root(which="raw", project="example", flexilims_session=None)
+    assert p == Path("/camp/project/example_project/raw")
+    with pytest.raises(AttributeError):
+        flz.get_data_root(which="processed", project=None, flexilims_session=None)
+    with pytest.raises(ValueError):
+        flz.get_data_root(which="crap", project="test", flexilims_session=None)
+    with pytest.raises(AssertionError):
+        p = flz.get_data_root(which="raw", project="random", flexilims_session=None)
+
+
 def test_get_flexilims_session():
     sess = flz.get_flexilims_session(
         project_id=PARAMETERS["project_ids"]["test"], reuse_token=False

From c414376d5bd5671e2886397904b9abb1b8a48444 Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Mon, 17 Jul 2023 14:22:36 +0100
Subject: [PATCH 03/18] [bugfix] harp dataset match csv even with short name

---
 CHANGELOG.md                  | 10 ++++++++--
 flexiznam/schema/harp_data.py | 20 +++++++++-----------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8b94e86..8e6487c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,10 +4,16 @@
 
 ### Main changes
 
-## v0.3.7
-
 - Add `get_data_root` function to get `raw` or `processed` root for a project
 
+### Minor
+
+- `harp_dataset.from_folder` will now match csv even if there is nothing before or after
+  `harpmessage` in the file name (i.e. the file is `harpmessage.bin`, and all csvs in 
+  the folder will be matched)
+
+## v0.3.7
+
 ### Main changes
 
 - Separate `Dataset.from_dataseries` and `Dataset.from_flexilims` to avoid confusion
diff --git a/flexiznam/schema/harp_data.py b/flexiznam/schema/harp_data.py
index 3613cd1..70a9f4f 100644
--- a/flexiznam/schema/harp_data.py
+++ b/flexiznam/schema/harp_data.py
@@ -55,17 +55,15 @@ def from_folder(
                         "file name." % bin_file
                     )
                 continue
-            if any(m.groups()):  # do not match csv if the bin is just "harpmessage"
-                pattern = "(.*)".join(m.groups()) + ".csv"
-                matches = [re.match(pattern, f) for f in csv_files]
-                associated_csv = {
-                    m.groups()[0].strip("_"): f for f, m in zip(csv_files, matches) if m
-                }
-                if matched_files.intersection(associated_csv.values()):
-                    raise IOError("A csv file matched with multiple binary files.")
-                matched_files.update(associated_csv.values())
-            else:
-                associated_csv = {}
+
+            pattern = "(.*)".join(m.groups()) + ".csv"
+            matches = [re.match(pattern, f) for f in csv_files]
+            associated_csv = {
+                m.groups()[0].strip("_"): f for f, m in zip(csv_files, matches) if m
+            }
+            if matched_files.intersection(associated_csv.values()):
+                raise IOError("A csv file matched with multiple binary files.")
+            matched_files.update(associated_csv.values())
 
             bin_path = pathlib.Path(folder) / bin_file
             created = datetime.datetime.fromtimestamp(bin_path.stat().st_mtime)

From 18450ef716a22f2d772785e05a06d586a02538ee Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Mon, 17 Jul 2023 14:31:00 +0100
Subject: [PATCH 04/18] [feature] `get_children` can filter by attributes

---
 CHANGELOG.md                       | 1 +
 flexiznam/main.py                  | 5 +++++
 tests/test_components/test_main.py | 7 +++++++
 3 files changed, 13 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8e6487c..0305fa0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@
 ### Main changes
 
 - Add `get_data_root` function to get `raw` or `processed` root for a project
+- `get_children` can filter children by attributes before returning results
 
 ### Minor
 
diff --git a/flexiznam/main.py b/flexiznam/main.py
index 2d6b7d6..cf45ecf 100755
--- a/flexiznam/main.py
+++ b/flexiznam/main.py
@@ -978,6 +978,7 @@ def get_children(
     children_datatype=None,
     project_id=None,
     flexilims_session=None,
+    filter=None,
 ):
     """
     Get all entries belonging to a particular parent entity
@@ -989,6 +990,7 @@ def get_children(
                                          types if None)
         project_id (str): text name of the project
         flexilims_session (:py:class:`flexilims.Flexilims`): Flexylims session object
+        filter (dict, None): filter to apply to the extra_attributes of the children
 
     Returns:
         DataFrame: containing all the relevant child entitites
@@ -1007,6 +1009,9 @@ def get_children(
         return pd.DataFrame(results)
     if children_datatype is not None:
         results = [r for r in results if r["type"] == children_datatype]
+    if filter is not None:
+        for key, value in filter.items():
+            results = [r for r in results if r.get(key, None) == value]
 
     results = pd.DataFrame(results)
     results.set_index("name", drop=False, inplace=True)
diff --git a/tests/test_components/test_main.py b/tests/test_components/test_main.py
index 73ec8dd..f40d4b3 100644
--- a/tests/test_components/test_main.py
+++ b/tests/test_components/test_main.py
@@ -208,6 +208,13 @@ def test_get_children(flm_sess):
     )
     assert (res_part.type != "recording").sum() == 0
     assert res_all.shape[1] > res_part.shape[1]
+    single_res = flz.get_children(
+        parent_name=SESSION,
+        flexilims_session=flm_sess,
+        children_datatype="dataset",
+        filter=dict(notes="Motion correction reference"),
+    )
+    assert single_res.shape[0] == 1
 
 
 def test_add_entity(flm_sess):

From a2efd1bc4ea18007b93c184c451980524f8ae933 Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Mon, 17 Jul 2023 15:46:12 +0100
Subject: [PATCH 05/18] [bugfix] if get_children `filter` filteres everything

---
 CHANGELOG.md      | 5 +++++
 flexiznam/main.py | 3 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0305fa0..9dd1258 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,11 @@
 - Add `get_data_root` function to get `raw` or `processed` root for a project
 - `get_children` can filter children by attributes before returning results
 
+### Bugfixes
+
+- return empty dataframe if `filter` in `get_children` filters out everything (instead 
+  of crashing)
+
 ### Minor
 
 - `harp_dataset.from_folder` will now match csv even if there is nothing before or after
diff --git a/flexiznam/main.py b/flexiznam/main.py
index cf45ecf..4db9099 100755
--- a/flexiznam/main.py
+++ b/flexiznam/main.py
@@ -1014,7 +1014,8 @@ def get_children(
             results = [r for r in results if r.get(key, None) == value]
 
     results = pd.DataFrame(results)
-    results.set_index("name", drop=False, inplace=True)
+    if len(results):
+        results.set_index("name", drop=False, inplace=True)
     return results
 
 

From e37cb43bab653f2415645057fc7679e9ac655b48 Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Mon, 17 Jul 2023 17:44:34 +0100
Subject: [PATCH 06/18] [feature] split get_datasets and recursive version

---
 CHANGELOG.md                       |   2 +
 flexiznam/main.py                  | 178 +++++++++++++++++++++--------
 tests/test_components/test_main.py | 115 ++++++++++++++++---
 3 files changed, 232 insertions(+), 63 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9dd1258..86f233e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,8 @@
 
 - Add `get_data_root` function to get `raw` or `processed` root for a project
 - `get_children` can filter children by attributes before returning results
+- refactor `get_datasets` to be non recursive and add filtering options
+- add `get_datasets_recursively` to get all datasets below a given entity
 
 ### Bugfixes
 
diff --git a/flexiznam/main.py b/flexiznam/main.py
index 4db9099..3aa48a0 100755
--- a/flexiznam/main.py
+++ b/flexiznam/main.py
@@ -1019,20 +1019,125 @@ def get_children(
     return results
 
 
-def get_datasets(
-    origin_id,
-    recording_type=None,
+def get_child_dataset(flz_session, parent_name, dataset_type):
+    """
+    Get the last dataset of a given type for a given parent entity.
+
+    Args:
+        flz_session (flexilims_session): flexilims session
+        parent_name (str): name of the parent entity
+        dataset_type (str): type of the dataset
+
+    Returns:
+        Dataset: the last dataset of the given type for the given parent entity
+
+    """
+    all_children = get_children(
+        parent_name=parent_name,
+        children_datatype="dataset",
+        flexilims_session=flz_session,
+    )
+    selected_datasets = all_children[all_children["dataset_type"] == dataset_type]
+    if len(selected_datasets) == 0:
+        raise ValueError(f"No {dataset_type} dataset found for session {parent_name}")
+    elif len(selected_datasets) > 1:
+        print(
+            f"{len(selected_datasets)} {dataset_type} datasets found for session {parent_name}"
+        )
+        print("Will return the last one...")
+    return flexiznam.Dataset.from_dataseries(
+        selected_datasets.iloc[-1], flexilims_session=flz_session
+    )
+
+
+def get_datasets_recursively(
+    origin_id=None,
+    origin_name=None,
+    origin_series=None,
     dataset_type=None,
+    filter_datasets=None,
+    parent_type=None,
+    filter_parents=None,
+    return_paths=False,
     project_id=None,
     flexilims_session=None,
-    return_paths=True,
+    _output=None,
 ):
-    """
-    Recurse into recordings and get paths to child datasets of a given type.
+    """Get datasets recursively from a parent entity
 
     For example, this is useful if you want to retrieve paths to all *scanimage*
     datasets associated with a given session.
 
+    Returns:
+        dict: Dictionary with direct parent id as keys and lists of associated
+            datasets, or dataset paths as values
+
+    """
+    if origin_series is None:
+        if origin_id is None:
+            origin_id = get_id(origin_name, flexilims_session=flexilims_session)
+        origin_series = get_entity(id=origin_id, flexilims_session=flexilims_session)
+    else:
+        origin_id = origin_series["id"]
+    origin_is_valid = True
+
+    # initialize output if first call
+    if _output is None:
+        _output = {}
+
+    # Before adding the datasets of this level, check if the parent is valid
+    if (parent_type is not None) and (origin_series["type"] != parent_type):
+        origin_is_valid = False
+    if filter_parents is not None:
+        for key, value in filter_parents.items():
+            if origin_series.get(key, None) != value:
+                origin_is_valid = False
+
+    if origin_is_valid:
+        ds = get_datasets(
+            origin_id=origin_id,
+            dataset_type=dataset_type,
+            project_id=project_id,
+            flexilims_session=flexilims_session,
+            return_paths=return_paths,
+            filter_datasets=filter_datasets,
+        )
+        # add only if there are datasets
+        if len(ds):
+            _output[origin_id] = ds
+
+    # now recurse on children
+    children = get_children(
+        parent_id=origin_id,
+        parent_name=origin_name,
+        flexilims_session=flexilims_session,
+    )
+    for _, child in children.iterrows():
+        if child.type == "dataset":
+            continue
+        get_datasets_recursively(
+            origin_series=child,
+            dataset_type=dataset_type,
+            project_id=project_id,
+            flexilims_session=flexilims_session,
+            return_paths=return_paths,
+            filter_datasets=filter_datasets,
+            filter_parents=filter_parents,
+            _output=_output,
+        )
+    return _output
+
+
+def get_datasets(
+    origin_id=None,
+    origin_name=None,
+    dataset_type=None,
+    project_id=None,
+    flexilims_session=None,
+    return_paths=True,
+    filter_datasets=None,
+):
+    """
     Args:
         origin_id (str): hexadecimal ID of the origin session.
         recording_type (str): type of the recording to filter by. If `None`,
@@ -1044,9 +1149,8 @@ def get_datasets(
         flexilims_session (:py:class:`flexilims.Flexilims`): Flexylims session object
         return_paths (bool): if True, return a list of paths. If False, return the
             dataset objects.
+        _output (list): internal argument used for recursion.
 
-    Returns:
-        dict: Dictionary with recording names as keys containing lists of associated dataset paths.
 
     """
     assert (project_id is not None) or (flexilims_session is not None)
@@ -1054,46 +1158,30 @@ def get_datasets(
         flexilims_session = get_flexilims_session(project_id)
     else:
         project_id = lookup_project(flexilims_session.project_id, PARAMETERS)
-    recordings = get_entities(
-        datatype="recording",
-        origin_id=origin_id,
-        query_key="recording_type",
-        query_value=recording_type,
+
+    if origin_id is None:
+        assert origin_name is not None, "Must provide either origin_id or origin_name"
+    if filter_datasets is None:
+        filter_datasets = {}
+    if dataset_type is not None:
+        filter_datasets.update({"dataset_type": dataset_type})
+    datasets = get_children(
+        parent_id=origin_id,
+        parent_name=origin_name,
+        children_datatype="dataset",
         flexilims_session=flexilims_session,
+        filter=filter_datasets,
     )
-    datapath_dict = {}
-    if len(recordings) < 1:
-        return datapath_dict
-    for recording_id in recordings["id"]:
-        datasets = get_entities(
-            datatype="dataset",
-            origin_id=recording_id,
-            query_key="dataset_type",
-            query_value=dataset_type,
-            flexilims_session=flexilims_session,
+
+    datasets = [
+        flexiznam.Dataset.from_dataseries(
+            dataseries=ds, flexilims_session=flexilims_session
         )
-        if return_paths:
-            datapaths = []
-            for dataset_path, is_raw in zip(datasets["path"], datasets["is_raw"]):
-                prefix = (
-                    PARAMETERS["data_root"]["raw"]
-                    if is_raw == "yes"
-                    else PARAMETERS["data_root"]["processed"]
-                )
-                this_path = Path(prefix) / dataset_path
-                if this_path.exists():
-                    datapaths.append(str(this_path))
-                else:
-                    raise IOError("Dataset {} not found".format(this_path))
-                datapath_dict[recording_id] = datapaths
-        else:
-            datapath_dict[recording_id] = [
-                flexiznam.Dataset.from_dataseries(
-                    dataseries=ds, flexilims_session=flexilims_session
-                )
-                for _, ds in datasets.iterrows()
-            ]
-    return datapath_dict
+        for _, ds in datasets.iterrows()
+    ]
+    if return_paths:
+        datasets = [ds.path_full for ds in datasets]
+    return datasets
 
 
 def generate_name(datatype, name, flexilims_session=None, project_id=None):
diff --git a/tests/test_components/test_main.py b/tests/test_components/test_main.py
index f40d4b3..ad2314a 100644
--- a/tests/test_components/test_main.py
+++ b/tests/test_components/test_main.py
@@ -1,4 +1,5 @@
 import datetime
+import pathlib
 from pathlib import Path
 import pandas as pd
 import pytest
@@ -8,7 +9,7 @@
 from tests.tests_resources.data_for_testing import MOUSE_ID, SESSION
 
 # Test functions from main.py
-from flexiznam.schema import Dataset
+from flexiznam.schema import Dataset, HarpData
 
 # this needs to change every time I reset flexlilims
 
@@ -127,28 +128,106 @@ def test_get_mouse_id(flm_sess):
 
 
 def test_get_datasets(flm_sess):
-    sess = flz.get_entity(
-        name="mouse_physio_2p_S20211102", datatype="session", flexilims_session=flm_sess
+    ds = flz.get_datasets(
+        origin_id=MOUSE_ID,
+        flexilims_session=flm_sess,
+    )
+    assert len(ds) == 0
+    ds = flz.get_datasets(
+        origin_name=SESSION,
+        flexilims_session=flm_sess,
+    )
+    assert len(ds) == 3
+    assert all([isinstance(d, pathlib.PosixPath) for d in ds])
+    ds = flz.get_datasets(
+        origin_name=SESSION,
+        flexilims_session=flm_sess,
+        return_paths=False,
     )
+    assert len(ds) == 3
+    assert all([hasattr(d, "path_full") for d in ds])
     ds = flz.get_datasets(
-        flexilims_session=flm_sess, origin_id=sess.id, return_paths=True
+        origin_name=SESSION,
+        flexilims_session=flm_sess,
+        return_paths=False,
+        filter_datasets=dict(acq_uid="overview_zoom1_00001"),
     )
-    assert len(ds) == 2
-    for v in ds.values():
-        assert isinstance(v, list)
-        assert all([isinstance(d, str) for d in v])
+    assert len(ds) == 1
+
+    rec = flz.get_children(
+        parent_name=SESSION, flexilims_session=flm_sess, children_datatype="recording"
+    ).iloc[0]
 
+    ds_all = flz.get_datasets(
+        origin_id=rec.id,
+        flexilims_session=flm_sess,
+        return_paths=False,
+    )
+    ds_cam = flz.get_datasets(
+        origin_id=rec.id,
+        dataset_type="camera",
+        flexilims_session=flm_sess,
+        return_paths=False,
+    )
+    assert len(ds_all) > len(ds_cam)
+    ds = flz.get_datasets(
+        origin_id=rec.id,
+        dataset_type="camera",
+        filter_datasets=dict(timestamp_file="face_camera_timestamps.csv"),
+        flexilims_session=flm_sess,
+        return_paths=True,
+    )
+    assert len(ds) == 1
     ds2 = flz.get_datasets(
-        flexilims_session=flm_sess, origin_id=sess.id, return_paths=False
-    )
-    assert len(ds2) == 2
-    assert all([k in ds2 for k in ds])
-    for k in ds2:
-        ds_paths = ds[k]
-        ds_ds = ds2[k]
-        assert len(ds_ds) == len(ds_paths)
-        assert all([isinstance(d, Dataset) for d in ds_ds])
-        assert all([str(d.path_full) in ds_paths for d in ds_ds])
+        origin_id=rec.id,
+        dataset_type="camera",
+        filter_datasets=dict(timestamp_file="face_camera_timestamps.csv"),
+        project_id=flm_sess.project_id,
+        return_paths=True,
+    )
+    assert ds == ds2
+
+
+def test_get_datasets_recursively(flm_sess):
+    ds_dict = flz.get_datasets_recursively(
+        flexilims_session=flm_sess, origin_name=SESSION, return_paths=True
+    )
+    assert len(ds_dict) == 3
+    ds_dict = flz.get_datasets_recursively(
+        flexilims_session=flm_sess,
+        origin_name=SESSION,
+        return_paths=False,
+        dataset_type="harp",
+    )
+    assert len(ds_dict) == 2
+    ds = []
+    for d in ds_dict.values():
+        ds.extend(d)
+    assert all([isinstance(d, HarpData) for d in ds])
+    ds_dict = flz.get_datasets_recursively(
+        flexilims_session=flm_sess,
+        origin_name=SESSION,
+        return_paths=False,
+        dataset_type="harp",
+        filter_datasets=dict(binary_file="harpmessage.bin"),
+    )
+    assert len(ds_dict) == 1
+    assert ds_dict.values().__iter__().__next__()[0].dataset_name == "harpmessage"
+
+    ds_dict = flz.get_datasets_recursively(
+        flexilims_session=flm_sess,
+        origin_name=SESSION,
+        return_paths=False,
+        filter_parents={"timestamp": "165821"},
+    )
+    assert len(ds_dict) == 1
+    ds_dict = flz.get_datasets_recursively(
+        flexilims_session=flm_sess,
+        origin_name=SESSION,
+        return_paths=False,
+        parent_type="recording",
+    )
+    assert len(ds_dict) == 2
 
 
 def test_add_mouse(flm_sess):

From 591aa238e623899d2461a5830f6c2a201e4589a7 Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Mon, 17 Jul 2023 17:45:01 +0100
Subject: [PATCH 07/18] [minor] add `project_paths` to defautl config

---
 flexiznam/config/default_config.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/flexiznam/config/default_config.py b/flexiznam/config/default_config.py
index 06b5e62..5e0d4ae 100644
--- a/flexiznam/config/default_config.py
+++ b/flexiznam/config/default_config.py
@@ -14,6 +14,12 @@
         "test": "606df1ac08df4d77c72c9aa4",
         "AAVRKO_retina_hva": "60a7757a8901a2357f29080a",
     },
+    project_paths={
+        "example": dict(
+            raw="/camp/project/example_project/raw",
+            processed="/camp/project/example_project/processed",
+        )
+    },
     # a default username can be specified
     flexilims_username="yourusername",
     # Use for data dataset detection

From d644f0836a6076f1f1a64cdbdb9a601c842c6453 Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Mon, 17 Jul 2023 17:51:24 +0100
Subject: [PATCH 08/18] [paths] adapt datasets to use `get_data_root`

---
 flexiznam/schema/datasets.py                  |  9 +++----
 .../tests_schema/test_datasets.py             | 26 +++++++++++++------
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py
index 11e181e..09f54af 100644
--- a/flexiznam/schema/datasets.py
+++ b/flexiznam/schema/datasets.py
@@ -711,12 +711,11 @@ def is_raw(self, value):
     @property
     def path_root(self):
         """Get CAMP root path that should apply to this dataset"""
-        if self.is_raw:
-            return Path(flz.config.PARAMETERS["data_root"]["raw"])
-        elif self.is_raw is None:
+        if self.is_raw is None:
             raise AttributeError("`is_raw` must be set to find path.")
-        else:
-            return Path(flz.config.PARAMETERS["data_root"]["processed"])
+        return flz.get_data_root(
+            which="raw" if self.is_raw else "processed", project=self.project
+        )
 
     @property
     def path_full(self):
diff --git a/tests/test_components/tests_schema/test_datasets.py b/tests/test_components/tests_schema/test_datasets.py
index 8c58fc5..95fd6d7 100644
--- a/tests/test_components/tests_schema/test_datasets.py
+++ b/tests/test_components/tests_schema/test_datasets.py
@@ -1,6 +1,7 @@
 import pytest
 import pathlib
 import pandas as pd
+import flexiznam
 from flexiznam.schema import Dataset, microscopy_data
 from flexiznam.config import PARAMETERS
 from flexiznam.errors import DatasetError, FlexilimsError, NameNotUniqueError
@@ -266,12 +267,12 @@ def test_from_origin(flm_sess):
             flexilims_session=flm_sess,
         )
     ds0_bis = Dataset.from_origin(
-            origin_type="recording",
-            origin_name=origin_name,
-            dataset_type="suite2p_rois",
-            conflicts="overwrite",
-            flexilims_session=flm_sess,
-        )
+        origin_type="recording",
+        origin_name=origin_name,
+        dataset_type="suite2p_rois",
+        conflicts="overwrite",
+        flexilims_session=flm_sess,
+    )
     assert ds0.id == ds0_bis.id
 
     ds1 = Dataset.from_origin(
@@ -307,8 +308,7 @@ def test_from_origin(flm_sess):
     # clean up
     for ds in (ds0, ds1):
         flm_sess.delete(ds.id)
-    
-                
+
 
 def test_update_flexilims(flm_sess):
     """This test requires the database to be up-to-date for the physio mouse"""
@@ -353,6 +353,16 @@ def test_dataset_paths(flm_sess):
     assert str(ds.path_full) == str(
         pathlib.Path(PARAMETERS["data_root"]["raw"] / ds.path)
     )
+    ds = Dataset(
+        path="test_project",
+        is_raw="no",
+        dataset_type="camera",
+        extra_attributes={},
+        created="",
+        project="example",
+    )
+    assert ds.path_root != pathlib.Path(PARAMETERS["data_root"]["processed"])
+    assert ds.path_root == flexiznam.get_data_root(which="processed", project="example")
 
 
 def test_project_project_id(flm_sess):

From 75246fb10548f41e1efb38c3e7d8a13fe9dae4f6 Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Mon, 17 Jul 2023 17:52:54 +0100
Subject: [PATCH 09/18] [tests] fix test to change in demo data

---
 tests/test_components/tests_schema/test_harp.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_components/tests_schema/test_harp.py b/tests/test_components/tests_schema/test_harp.py
index ed13fa4..e31679f 100644
--- a/tests/test_components/tests_schema/test_harp.py
+++ b/tests/test_components/tests_schema/test_harp.py
@@ -1,14 +1,13 @@
 import pytest
 from flexiznam.schema.harp_data import HarpData
 from tests.tests_resources.data_for_testing import DATA_ROOT
-from pathlib import Path
 
 
 def test_harp():
     folder_genealogy = ["mouse_physio_2p", "S20211102", "R165821_SpheresPermTube"]
     data_dir = DATA_ROOT.joinpath(*folder_genealogy)
     ds = HarpData.from_folder(data_dir, verbose=False)
-    assert len(ds) == 2
+    assert len(ds) == 1
     ds_name = "PZAD9.4d_S20211102_R165821_SpheresPermTube_harpmessage"
     d = ds[ds_name]
     assert d.full_name == folder_genealogy[-1] + "_" + ds_name

From a02e97b7113cb7901931ba0965fe59cd07d5fb02 Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Mon, 17 Jul 2023 18:46:31 +0100
Subject: [PATCH 10/18] [feature] add allow_multiple and return_dataseries

---
 CHANGELOG.md                       |  3 ++-
 flexiznam/main.py                  | 41 ++++++++++++++++++++---------
 tests/test_components/test_main.py | 42 ++++++++++++++++++++++++++++--
 3 files changed, 71 insertions(+), 15 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 86f233e..cf4abd4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,7 +6,8 @@
 
 - Add `get_data_root` function to get `raw` or `processed` root for a project
 - `get_children` can filter children by attributes before returning results
-- refactor `get_datasets` to be non recursive and add filtering options
+- refactor `get_datasets` to be non recursive and add filtering options. Also add 
+  multiple options to filter datasets and format output
 - add `get_datasets_recursively` to get all datasets below a given entity
 
 ### Bugfixes
diff --git a/flexiznam/main.py b/flexiznam/main.py
index 3aa48a0..814706c 100755
--- a/flexiznam/main.py
+++ b/flexiznam/main.py
@@ -1134,20 +1134,28 @@ def get_datasets(
     dataset_type=None,
     project_id=None,
     flexilims_session=None,
-    return_paths=True,
     filter_datasets=None,
+    allow_multiple=True,
+    return_paths=False,
+    return_dataseries=False,
 ):
     """
     Args:
-        origin_id (str): hexadecimal ID of the origin session.
-        recording_type (str): type of the recording to filter by. If `None`,
-            will return datasets for all recordings.
+        origin_id (str): hexadecimal ID of the origin session. Not required if
+            origin_name is provided.
+        origin_name (str): text name of the origin session. Not required if origin_id
+            is provided.
         dataset_type (str): type of the dataseet to filter by. If `None`,
             will return all datasets.
         project_id (str): text name of the project. Not required if
             `flexilims_session` is provided.
         flexilims_session (:py:class:`flexilims.Flexilims`): Flexylims session object
+        filter_datasets (dict): dictionary of key-value pairs to filter datasets by.
+        allow_multiple (bool): if True, allow multiple datasets to be returned,
+            otherwise ensure that only one dataset exists online and return it.
         return_paths (bool): if True, return a list of paths. If False, return the
+            dataset objects or dataseries.
+        return_dataseries (bool): if True, return the dataseries instead of the
             dataset objects.
         _output (list): internal argument used for recursion.
 
@@ -1165,6 +1173,7 @@ def get_datasets(
         filter_datasets = {}
     if dataset_type is not None:
         filter_datasets.update({"dataset_type": dataset_type})
+        
     datasets = get_children(
         parent_id=origin_id,
         parent_name=origin_name,
@@ -1173,14 +1182,22 @@ def get_datasets(
         filter=filter_datasets,
     )
 
-    datasets = [
-        flexiznam.Dataset.from_dataseries(
-            dataseries=ds, flexilims_session=flexilims_session
-        )
-        for _, ds in datasets.iterrows()
-    ]
-    if return_paths:
-        datasets = [ds.path_full for ds in datasets]
+    if not return_dataseries:
+        datasets = [
+            flexiznam.Dataset.from_dataseries(
+                dataseries=ds, flexilims_session=flexilims_session
+            )
+            for _, ds in datasets.iterrows()
+        ]
+        if return_paths:
+            datasets = [ds.path_full for ds in datasets]
+
+    if not allow_multiple:
+        assert len(datasets) <= 1, f"Fount {len(datasets)} datasets. Expected 1."
+        if len(datasets) == 1:
+            datasets = datasets[0] if not return_dataseries else datasets.iloc[0]
+        else:
+            datasets = None
     return datasets
 
 
diff --git a/tests/test_components/test_main.py b/tests/test_components/test_main.py
index ad2314a..c6aa785 100644
--- a/tests/test_components/test_main.py
+++ b/tests/test_components/test_main.py
@@ -9,7 +9,7 @@
 from tests.tests_resources.data_for_testing import MOUSE_ID, SESSION
 
 # Test functions from main.py
-from flexiznam.schema import Dataset, HarpData
+from flexiznam.schema import Dataset, HarpData, ScanimageData
 
 # this needs to change every time I reset flexlilims
 
@@ -136,6 +136,7 @@ def test_get_datasets(flm_sess):
     ds = flz.get_datasets(
         origin_name=SESSION,
         flexilims_session=flm_sess,
+        return_paths=True,
     )
     assert len(ds) == 3
     assert all([isinstance(d, pathlib.PosixPath) for d in ds])
@@ -153,11 +154,42 @@ def test_get_datasets(flm_sess):
         filter_datasets=dict(acq_uid="overview_zoom1_00001"),
     )
     assert len(ds) == 1
+    ds = flz.get_datasets(
+        origin_name=SESSION,
+        flexilims_session=flm_sess,
+        return_paths=False,
+        filter_datasets=dict(acq_uid="overview_zoom1_00001"),
+        allow_multiple=False,
+    )
+    assert isinstance(ds, ScanimageData)
+    ds = flz.get_datasets(
+        origin_name=SESSION,
+        flexilims_session=flm_sess,
+        return_paths=True,
+        filter_datasets=dict(acq_uid="overview_zoom1_00001"),
+        allow_multiple=False,
+    )
+    assert isinstance(ds, pathlib.PosixPath)
+    ds = flz.get_datasets(
+        origin_name=SESSION,
+        flexilims_session=flm_sess,
+        return_dataseries=True,
+        filter_datasets=dict(acq_uid="overview_zoom1_00001"),
+        allow_multiple=True,
+    )
+    assert isinstance(ds,  pd.DataFrame)
+    ds = flz.get_datasets(
+        origin_name=SESSION,
+        flexilims_session=flm_sess,
+        return_dataseries=True,
+        filter_datasets=dict(acq_uid="overview_zoom1_00001"),
+        allow_multiple=False,
+    )
+    assert isinstance(ds,  pd.Series)
 
     rec = flz.get_children(
         parent_name=SESSION, flexilims_session=flm_sess, children_datatype="recording"
     ).iloc[0]
-
     ds_all = flz.get_datasets(
         origin_id=rec.id,
         flexilims_session=flm_sess,
@@ -186,6 +218,12 @@ def test_get_datasets(flm_sess):
         return_paths=True,
     )
     assert ds == ds2
+    with pytest.raises(AssertionError):
+        flz.get_datasets(
+            origin_id=rec.id,
+            project_id=flm_sess.project_id,
+            allow_multiple=False,
+        )
 
 
 def test_get_datasets_recursively(flm_sess):

From af690d6a28eeed144227add40c4433d515862589 Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Tue, 18 Jul 2023 10:05:55 +0100
Subject: [PATCH 11/18] [minor] docstrings and tests update

---
 flexiznam/main.py                                         | 8 +++-----
 flexiznam/schema/sequencing_data.py                       | 2 +-
 tests/test-results/pytest_in_tests.xml                    | 2 +-
 .../test_components/tests_schema/test_sequencing_data.py  | 5 ++---
 4 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/flexiznam/main.py b/flexiznam/main.py
index 814706c..e8edb41 100755
--- a/flexiznam/main.py
+++ b/flexiznam/main.py
@@ -1153,10 +1153,8 @@ def get_datasets(
         filter_datasets (dict): dictionary of key-value pairs to filter datasets by.
         allow_multiple (bool): if True, allow multiple datasets to be returned,
             otherwise ensure that only one dataset exists online and return it.
-        return_paths (bool): if True, return a list of paths. If False, return the
-            dataset objects or dataseries.
-        return_dataseries (bool): if True, return the dataseries instead of the
-            dataset objects.
+        return_paths (bool): if True, return a list of paths
+        return_dataseries (bool): if True, a dataframe or a dataseries
         _output (list): internal argument used for recursion.
 
 
@@ -1173,7 +1171,7 @@ def get_datasets(
         filter_datasets = {}
     if dataset_type is not None:
         filter_datasets.update({"dataset_type": dataset_type})
-        
+
     datasets = get_children(
         parent_id=origin_id,
         parent_name=origin_name,
diff --git a/flexiznam/schema/sequencing_data.py b/flexiznam/schema/sequencing_data.py
index d226b68..4f4ab0f 100644
--- a/flexiznam/schema/sequencing_data.py
+++ b/flexiznam/schema/sequencing_data.py
@@ -48,7 +48,7 @@ def from_folder(
             is_raw (bool): does this folder contain raw data?
             verbose (bool=True): print info about what is found
             flexilims_session (flm.Session): session to interact with flexilims
-            project (str): project ID or name
+            project (str): project name
 
         Returns:
             dict of datasets (fzm.schema.sequencing_data.SequencingData)
diff --git a/tests/test-results/pytest_in_tests.xml b/tests/test-results/pytest_in_tests.xml
index d6f2e8e..e3838f1 100644
--- a/tests/test-results/pytest_in_tests.xml
+++ b/tests/test-results/pytest_in_tests.xml
@@ -1 +1 @@
-<?xml version="1.0" encoding="utf-8"?><testsuites><testsuite name="pytest" errors="0" failures="0" skipped="0" tests="52" time="1217.486" timestamp="2023-07-06T16:21:50.352629" hostname="FM70R4W0XG"><testcase classname="tests.test_2p" name="test_create_yaml" time="5.630" /><testcase classname="tests.test_2p" name="test_parse_yaml" time="4.056" /><testcase classname="tests.test_2p" name="test_flm" time="37.068" /><testcase classname="tests.test_barseq" name="test_create_yaml" time="3.134" /><testcase classname="tests.test_barseq" name="test_parse_yaml" time="9.162" /><testcase classname="tests.test_barseq" name="test_flm" time="57.617" /><testcase classname="tests.test_components.test_cli" name="test_config" time="0.168" /><testcase classname="tests.test_components.test_cli" name="test_add_password" time="0.016" /><testcase classname="tests.test_components.test_cli" name="test_create_yaml" time="0.451" /><testcase classname="tests.test_components.test_cli" name="test_make_full_yaml" time="4.992" /><testcase classname="tests.test_components.test_cli" name="test_upload" time="35.318" /><testcase classname="tests.test_components.test_cli" name="test_flm_issues" time="110.427" /><testcase classname="tests.test_components.test_main" name="test_get_flexilims_session" time="0.210" /><testcase classname="tests.test_components.test_main" name="test_format_results" time="0.016" /><testcase classname="tests.test_components.test_main" name="test_get_experimental_sessions" time="1.185" /><testcase classname="tests.test_components.test_main" name="test_get_entities" time="2.557" /><testcase classname="tests.test_components.test_main" name="test_get_entity" time="0.055" /><testcase classname="tests.test_components.test_main" name="test_get_mouse_id" time="0.655" /><testcase classname="tests.test_components.test_main" name="test_get_datasets" time="6.716" /><testcase classname="tests.test_components.test_main" name="test_add_mouse" time="6.767" /><testcase classname="tests.test_components.test_main" name="test_generate_name" time="6.369" /><testcase classname="tests.test_components.test_main" name="test_get_children" time="10.536" /><testcase classname="tests.test_components.test_main" name="test_add_entity" time="2.100" /><testcase classname="tests.test_components.test_main" name="test_update_entity" time="9.515" /><testcase classname="tests.test_components.test_mcms" name="test_get_mouse_df" time="0.502" /><testcase classname="tests.test_components.test_mcms" name="test_get_procedures" time="0.308" /><testcase classname="tests.test_components.test_utils" name="test_create_config" time="0.034" /><testcase classname="tests.test_components.test_utils" name="test_update_config" time="0.231" /><testcase classname="tests.test_components.test_utils" name="test_passwd_creation" time="0.049" /><testcase classname="tests.test_components.test_utils" name="test_check_flexilims_paths" time="206.289" /><testcase classname="tests.test_components.test_utils" name="test_check_flexilims_names" time="136.485" /><testcase classname="tests.test_components.test_utils" name="test_add_genealogy" time="276.362" /><testcase classname="tests.test_components.test_utils" name="test_clean_recursively" time="0.008" /><testcase classname="tests.test_components.test_utils" name="test_add_missing_paths" time="63.834" /><testcase classname="tests.test_components.test_utils" name="test_check_attribute" time="147.412" /><testcase classname="tests.test_components.tests_schema.test_camera_data" name="test_create_directly" time="0.007" /><testcase classname="tests.test_components.tests_schema.test_camera_data" name="test_create_from_folder" time="1.996" /><testcase classname="tests.test_components.tests_schema.test_camera_data" name="test_create_from_flexilims" time="2.030" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset" time="0.022" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_constructor" time="0.011" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset_flexilims_integration" time="2.067" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_from_flexilims" time="2.082" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_from_dataseries" time="0.014" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_from_origin" time="46.158" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_update_flexilims" time="14.295" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset_paths" time="0.803" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_project_project_id" time="0.011" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset_type_enforcer" time="0.013" /><testcase classname="tests.test_components.tests_schema.test_harp" name="test_harp" time="0.571" /><testcase classname="tests.test_components.tests_schema.test_microscopy_data" name="test_from_folder" time="0.064" /><testcase classname="tests.test_components.tests_schema.test_scanimage_data" name="test_scanimage" time="0.477" /><testcase classname="tests.test_components.tests_schema.test_sequencing_data" name="test_from_folder" time="0.385" /></testsuite></testsuites>
\ No newline at end of file
+<?xml version="1.0" encoding="utf-8"?><testsuites><testsuite name="pytest" errors="0" failures="0" skipped="0" tests="54" time="678.628" timestamp="2023-07-17T23:11:47.609985" hostname="FM70R4W0XG"><testcase classname="tests.test_2p" name="test_create_yaml" time="9.791" /><testcase classname="tests.test_2p" name="test_parse_yaml" time="4.791" /><testcase classname="tests.test_2p" name="test_flm" time="21.164" /><testcase classname="tests.test_barseq" name="test_create_yaml" time="2.402" /><testcase classname="tests.test_barseq" name="test_parse_yaml" time="11.728" /><testcase classname="tests.test_barseq" name="test_flm" time="38.502" /><testcase classname="tests.test_components.test_cli" name="test_config" time="0.264" /><testcase classname="tests.test_components.test_cli" name="test_add_password" time="0.028" /><testcase classname="tests.test_components.test_cli" name="test_create_yaml" time="0.801" /><testcase classname="tests.test_components.test_cli" name="test_make_full_yaml" time="10.921" /><testcase classname="tests.test_components.test_cli" name="test_upload" time="28.840" /><testcase classname="tests.test_components.test_cli" name="test_flm_issues" time="64.297" /><testcase classname="tests.test_components.test_main" name="test_get_path" time="0.212" /><testcase classname="tests.test_components.test_main" name="test_get_flexilims_session" time="0.276" /><testcase classname="tests.test_components.test_main" name="test_format_results" time="0.021" /><testcase classname="tests.test_components.test_main" name="test_get_experimental_sessions" time="0.709" /><testcase classname="tests.test_components.test_main" name="test_get_entities" time="1.006" /><testcase classname="tests.test_components.test_main" name="test_get_entity" time="0.073" /><testcase classname="tests.test_components.test_main" name="test_get_mouse_id" time="0.315" /><testcase classname="tests.test_components.test_main" name="test_get_datasets" time="11.690" /><testcase classname="tests.test_components.test_main" name="test_get_datasets_recursively" time="17.020" /><testcase classname="tests.test_components.test_main" name="test_add_mouse" time="4.284" /><testcase classname="tests.test_components.test_main" name="test_generate_name" time="3.180" /><testcase classname="tests.test_components.test_main" name="test_get_children" time="5.837" /><testcase classname="tests.test_components.test_main" name="test_add_entity" time="1.935" /><testcase classname="tests.test_components.test_main" name="test_update_entity" time="5.820" /><testcase classname="tests.test_components.test_mcms" name="test_get_mouse_df" time="0.676" /><testcase classname="tests.test_components.test_mcms" name="test_get_procedures" time="0.558" /><testcase classname="tests.test_components.test_utils" name="test_create_config" time="0.060" /><testcase classname="tests.test_components.test_utils" name="test_update_config" time="0.364" /><testcase classname="tests.test_components.test_utils" name="test_passwd_creation" time="0.063" /><testcase classname="tests.test_components.test_utils" name="test_check_flexilims_paths" time="72.021" /><testcase classname="tests.test_components.test_utils" name="test_check_flexilims_names" time="70.026" /><testcase classname="tests.test_components.test_utils" name="test_add_genealogy" time="149.076" /><testcase classname="tests.test_components.test_utils" name="test_clean_recursively" time="0.010" /><testcase classname="tests.test_components.test_utils" name="test_add_missing_paths" time="31.141" /><testcase classname="tests.test_components.test_utils" name="test_check_attribute" time="67.307" /><testcase classname="tests.test_components.tests_schema.test_camera_data" name="test_create_directly" time="0.009" /><testcase classname="tests.test_components.tests_schema.test_camera_data" name="test_create_from_folder" time="1.188" /><testcase classname="tests.test_components.tests_schema.test_camera_data" name="test_create_from_flexilims" time="1.107" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset" time="0.016" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_constructor" time="0.021" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset_flexilims_integration" time="0.900" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_from_flexilims" time="1.103" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_from_dataseries" time="0.018" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_from_origin" time="27.100" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_update_flexilims" time="7.381" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset_paths" time="0.245" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_project_project_id" time="0.013" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset_type_enforcer" time="0.011" /><testcase classname="tests.test_components.tests_schema.test_harp" name="test_harp" time="0.762" /><testcase classname="tests.test_components.tests_schema.test_microscopy_data" name="test_from_folder" time="0.107" /><testcase classname="tests.test_components.tests_schema.test_scanimage_data" name="test_scanimage" time="0.565" /><testcase classname="tests.test_components.tests_schema.test_sequencing_data" name="test_from_folder" time="0.628" /></testsuite></testsuites>
\ No newline at end of file
diff --git a/tests/test_components/tests_schema/test_sequencing_data.py b/tests/test_components/tests_schema/test_sequencing_data.py
index b4983c9..e066926 100644
--- a/tests/test_components/tests_schema/test_sequencing_data.py
+++ b/tests/test_components/tests_schema/test_sequencing_data.py
@@ -1,6 +1,6 @@
 import pytest
 from flexiznam.schema.sequencing_data import SequencingData
-from tests.tests_resources.data_for_testing import DATA_ROOT
+from tests.tests_resources.data_for_testing import DATA_ROOT, PROJECT_ID
 
 # Test creation of all dataset types.
 #
@@ -13,10 +13,9 @@
 
 def test_from_folder():
     raw_folder = DATA_ROOT / "mouse_mapseq_lcm" / "Raw_sequencing"
-    ds = SequencingData.from_folder(raw_folder, verbose=False)
+    ds = SequencingData.from_folder(raw_folder, verbose=False, project="demo_project")
     assert len(ds) == 2
     d = ds["TUR675_S896_l769_R2_001"]
     assert d.full_name == "Raw_sequencing_TUR675_S896_l769_R2_001"
     assert d.dataset_name == "TUR675_S896_l769_R2_001"
     assert d.is_valid()
-

From f53a89d5c477d4e4273c3bbd8889dd4547a1dbbc Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Tue, 18 Jul 2023 10:21:54 +0100
Subject: [PATCH 12/18] `update_flexilims` correctly uploads tuples parameters

---
 CHANGELOG.md                                        |  1 +
 flexiznam/schema/datasets.py                        |  2 +-
 tests/test_components/tests_schema/test_datasets.py | 13 +++++++++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cf4abd4..173bfee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@
 
 - return empty dataframe if `filter` in `get_children` filters out everything (instead 
   of crashing)
+- `update_flexilims` correctly uploads tuples parameters
 
 ### Minor
 
diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py
index 09f54af..3aa129d 100644
--- a/flexiznam/schema/datasets.py
+++ b/flexiznam/schema/datasets.py
@@ -397,7 +397,7 @@ def update_flexilims(self, mode="safe"):
             if isinstance(attributes[attribute], np.bool_):
                 attributes[attribute] = bool(attributes[attribute])
             if isinstance(attributes[attribute], tuple):
-                attributes[attribute] = list(attribute)
+                attributes[attribute] = list(attributes[attribute])
         flz.utils.clean_recursively(attributes)
 
         if status == "different":
diff --git a/tests/test_components/tests_schema/test_datasets.py b/tests/test_components/tests_schema/test_datasets.py
index 95fd6d7..56358d8 100644
--- a/tests/test_components/tests_schema/test_datasets.py
+++ b/tests/test_components/tests_schema/test_datasets.py
@@ -341,6 +341,19 @@ def test_update_flexilims(flm_sess):
         ds.origin_id = None
         ds.update_flexilims(mode="overwrite")
     assert err.value.args[0] == "Cannot set origin_id to null"
+    params = dict(
+        harp_bin="some/random/path",
+        di_names=("frame_triggers", "lick_detection", "di2_encoder_initial_state"),
+        verbose=False,
+    )
+    ds.extra_attributes = params
+    ds.origin_id = MOUSE_ID
+    ds.update_flexilims(mode="overwrite")
+    reloaded_ds = Dataset.from_flexilims(
+        project, name=ds_name, flexilims_session=flm_sess
+    )
+    assert reloaded_ds.extra_attributes["di_names"] == list(params["di_names"])
+    assert reloaded_ds.extra_attributes["verbose"] == params["verbose"]
 
 
 def test_dataset_paths(flm_sess):

From 8137c9f0bf8c5cf9ce789d8156f8859838a64d6b Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Tue, 18 Jul 2023 10:44:36 +0100
Subject: [PATCH 13/18] [minor] assertion was always true

---
 flexiznam/main.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/flexiznam/main.py b/flexiznam/main.py
index e8edb41..b5e6544 100755
--- a/flexiznam/main.py
+++ b/flexiznam/main.py
@@ -37,9 +37,9 @@ def get_data_root(which, project=None, flexilims_session=None):
 
     if project is None:
         assert (
-            flexilims_session is not None,
-            "`flexilims_session` must be provided if `project` is None",
-        )
+            flexilims_session is not None
+        ), "`flexilims_session` must be provided if `project` is None"
+
         project = flexilims_session.project_id
 
     if project not in PARAMETERS["project_ids"]:

From 3d86365928eda78ee9dfac570a9aec67309a7996 Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Tue, 18 Jul 2023 13:42:09 +0100
Subject: [PATCH 14/18] [bugfix] json compatibility for np.float/np.int

---
 CHANGELOG.md                                  |  1 +
 flexiznam/schema/datasets.py                  | 12 +-------
 flexiznam/utils.py                            | 12 ++++++--
 .../tests_schema/test_datasets.py             | 28 +++++++++++++++++++
 4 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 173bfee..cc28e69 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,7 @@
 - return empty dataframe if `filter` in `get_children` filters out everything (instead 
   of crashing)
 - `update_flexilims` correctly uploads tuples parameters
+- `update_flexilims` correctly uploads floats and np.float/np.int parameters
 
 ### Minor
 
diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py
index 3aa129d..c154f96 100644
--- a/flexiznam/schema/datasets.py
+++ b/flexiznam/schema/datasets.py
@@ -389,16 +389,6 @@ def update_flexilims(self, mode="safe"):
 
         status = self.flexilims_status()
         attributes = self.extra_attributes.copy()
-        # the following lines are necessary because pandas converts python types to numpy
-        # types, which JSON does not understand and because JSON doesn't like tuples
-        for attribute in attributes:
-            if isinstance(attributes[attribute], np.integer):
-                attributes[attribute] = int(attributes[attribute])
-            if isinstance(attributes[attribute], np.bool_):
-                attributes[attribute] = bool(attributes[attribute])
-            if isinstance(attributes[attribute], tuple):
-                attributes[attribute] = list(attributes[attribute])
-        flz.utils.clean_recursively(attributes)
 
         if status == "different":
             if mode == "safe":
@@ -417,7 +407,7 @@ def update_flexilims(self, mode="safe"):
                 if self.origin_id is None:
                     if self.get_flexilims_entry().get("origin_id", None) is not None:
                         raise FlexilimsError("Cannot set origin_id to null")
-
+                utils.clean_recursively(attributes)
                 resp = flz.update_entity(
                     datatype="dataset",
                     id=self.id,
diff --git a/flexiznam/utils.py b/flexiznam/utils.py
index 0effaaa..1e13483 100644
--- a/flexiznam/utils.py
+++ b/flexiznam/utils.py
@@ -150,6 +150,8 @@ def clean_recursively(
         # we don't have a dictionary
         ds_classes = set(Dataset.SUBCLASSES.values())
         ds_classes.add(Dataset)
+        floats = (float, np.float32, np.float64)
+        ints = (int, np.int32, np.int64)
         if (
             (element is None)
             or isinstance(element, str)
@@ -165,8 +167,14 @@ def clean_recursively(
             element = element.tolist()
         elif isinstance(element, pathlib.Path):
             element = str(PurePosixPath(element))
-        elif isinstance(element, float) and (not np.isfinite(element)):
-            element = str(element)
+        elif isinstance(element, floats):
+            if not np.isfinite(element):
+                # nan and inf must be uploaded as string
+                element = str(element)
+            else:
+                element = float(element)
+        elif isinstance(element, ints):
+            element = int(element)
         elif isinstance(element, pd.Series or pd.DataFrame):
             raise IOError("Cannot make a pandas object json compatible")
         else:
diff --git a/tests/test_components/tests_schema/test_datasets.py b/tests/test_components/tests_schema/test_datasets.py
index 56358d8..2bc65b1 100644
--- a/tests/test_components/tests_schema/test_datasets.py
+++ b/tests/test_components/tests_schema/test_datasets.py
@@ -1,6 +1,7 @@
 import pytest
 import pathlib
 import pandas as pd
+import numpy as np
 import flexiznam
 from flexiznam.schema import Dataset, microscopy_data
 from flexiznam.config import PARAMETERS
@@ -315,6 +316,33 @@ def test_update_flexilims(flm_sess):
     project = "demo_project"
     ds_name = "mouse_physio_2p_S20211102_R165821_SpheresPermTube_wf_camera"
     ds = Dataset.from_flexilims(project, name=ds_name, flexilims_session=flm_sess)
+
+    # test weirded datatypes
+    ds.extra_attributes = dict(
+        nf64=np.float64(1),
+        nf32=np.float32(1),
+        ni64=np.int64(1),
+        ni32=np.int32(1),
+        pathobj=pathlib.Path("some/path"),
+        list=[1, 2, 3],
+        tuple=(1, 2, 3),
+        f=float(1.0),
+        i=int(34),
+    )
+    ds.update_flexilims(mode="overwrite")
+    reloaded_ds = Dataset.from_flexilims(
+        project, name=ds_name, flexilims_session=flm_sess
+    )
+    assert reloaded_ds.extra_attributes["nf64"] == 1.0
+    assert reloaded_ds.extra_attributes["nf32"] == 1.0
+    assert reloaded_ds.extra_attributes["ni64"] == 1
+    assert reloaded_ds.extra_attributes["ni32"] == 1
+    assert reloaded_ds.extra_attributes["pathobj"] == str(pathlib.Path("some/path"))
+    assert reloaded_ds.extra_attributes["list"] == [1, 2, 3]
+    assert reloaded_ds.extra_attributes["tuple"] == list((1, 2, 3))
+    assert reloaded_ds.extra_attributes["f"] == 1.0
+    assert reloaded_ds.extra_attributes["i"] == 34
+
     original_path = ds.path
     ds.path = "new/test/path"
     with pytest.raises(FlexilimsError) as err:

From 2110424cda71324010f11b5bc54e811cf7f11f4d Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Tue, 18 Jul 2023 13:50:54 +0100
Subject: [PATCH 15/18] [test] update test

Tests should not change entity uploaded in 2p tests
---
 .../tests_schema/test_datasets.py             | 56 ++++++++++---------
 1 file changed, 30 insertions(+), 26 deletions(-)

diff --git a/tests/test_components/tests_schema/test_datasets.py b/tests/test_components/tests_schema/test_datasets.py
index 2bc65b1..15e1420 100644
--- a/tests/test_components/tests_schema/test_datasets.py
+++ b/tests/test_components/tests_schema/test_datasets.py
@@ -317,32 +317,6 @@ def test_update_flexilims(flm_sess):
     ds_name = "mouse_physio_2p_S20211102_R165821_SpheresPermTube_wf_camera"
     ds = Dataset.from_flexilims(project, name=ds_name, flexilims_session=flm_sess)
 
-    # test weirded datatypes
-    ds.extra_attributes = dict(
-        nf64=np.float64(1),
-        nf32=np.float32(1),
-        ni64=np.int64(1),
-        ni32=np.int32(1),
-        pathobj=pathlib.Path("some/path"),
-        list=[1, 2, 3],
-        tuple=(1, 2, 3),
-        f=float(1.0),
-        i=int(34),
-    )
-    ds.update_flexilims(mode="overwrite")
-    reloaded_ds = Dataset.from_flexilims(
-        project, name=ds_name, flexilims_session=flm_sess
-    )
-    assert reloaded_ds.extra_attributes["nf64"] == 1.0
-    assert reloaded_ds.extra_attributes["nf32"] == 1.0
-    assert reloaded_ds.extra_attributes["ni64"] == 1
-    assert reloaded_ds.extra_attributes["ni32"] == 1
-    assert reloaded_ds.extra_attributes["pathobj"] == str(pathlib.Path("some/path"))
-    assert reloaded_ds.extra_attributes["list"] == [1, 2, 3]
-    assert reloaded_ds.extra_attributes["tuple"] == list((1, 2, 3))
-    assert reloaded_ds.extra_attributes["f"] == 1.0
-    assert reloaded_ds.extra_attributes["i"] == 34
-
     original_path = ds.path
     ds.path = "new/test/path"
     with pytest.raises(FlexilimsError) as err:
@@ -382,6 +356,36 @@ def test_update_flexilims(flm_sess):
     )
     assert reloaded_ds.extra_attributes["di_names"] == list(params["di_names"])
     assert reloaded_ds.extra_attributes["verbose"] == params["verbose"]
+    # test weirded datatypes
+    ds = Dataset.from_origin(
+        project, origin_id=ds.origin_id, flexilims_session=flm_sess,
+        dataset_type='microscopy'
+    )
+    ds.extra_attributes = dict(
+        nf64=np.float64(1),
+        nf32=np.float32(1),
+        ni64=np.int64(1),
+        ni32=np.int32(1),
+        pathobj=pathlib.Path("some/path"),
+        list=[1, 2, 3],
+        tuple=(1, 2, 3),
+        f=float(1.0),
+        i=int(34),
+    )
+    ds.update_flexilims(mode="overwrite")
+    reloaded_ds = Dataset.from_flexilims(
+        project, name=ds.full_name, flexilims_session=flm_sess
+    )
+    assert reloaded_ds.extra_attributes["nf64"] == 1.0
+    assert reloaded_ds.extra_attributes["nf32"] == 1.0
+    assert reloaded_ds.extra_attributes["ni64"] == 1
+    assert reloaded_ds.extra_attributes["ni32"] == 1
+    assert reloaded_ds.extra_attributes["pathobj"] == str(pathlib.Path("some/path"))
+    assert reloaded_ds.extra_attributes["list"] == [1, 2, 3]
+    assert reloaded_ds.extra_attributes["tuple"] == list((1, 2, 3))
+    assert reloaded_ds.extra_attributes["f"] == 1.0
+    assert reloaded_ds.extra_attributes["i"] == 34
+    flm_sess.delete(ds.id)
 
 
 def test_dataset_paths(flm_sess):

From e8c002e82882afe43af9e1b9b88a9a9ec26aff7a Mon Sep 17 00:00:00 2001
From: yiran25 <yiran.he@crick.ac.uk>
Date: Fri, 21 Jul 2023 11:51:56 +0100
Subject: [PATCH 16/18] change update_flexilims to overwrite is conflicts is
 overwrite

---
 flexiznam/camp/sync_data.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py
index 7051587..d7d331e 100644
--- a/flexiznam/camp/sync_data.py
+++ b/flexiznam/camp/sync_data.py
@@ -171,6 +171,7 @@ def upload_yaml(
         list of names of entities created/updated
 
     """
+    
     output = []
     # if there are errors, I cannot safely parse the yaml
     errors = find_xxerrorxx(yml_file=source_yaml)
@@ -232,12 +233,17 @@ def upload_yaml(
         root_id = mouse["id"]
 
     # session datasets
+    # use "overwrite" as mode if conflict is "overwrite", otherwise use "safe" mode
+    if conflicts == "overwrite":
+        mode = "overwrite"
+    else:
+        mode = "safe"
     for ds_name, ds in session_data.get("datasets", {}).items():
         ds.genealogy = [mouse["name"], session_data["session"], ds_name]
         ds.project = session_data["project"]
         ds.origin_id = root_id
         ds.flexilims_session = flexilims_session
-        ds.update_flexilims(mode="safe")
+        ds.update_flexilims(mode=mode)
         output.append(ds.full_name)
 
     # now deal with recordings
@@ -276,7 +282,7 @@ def upload_yaml(
             ds.project = session_data["project"]
             ds.origin_id = rec_rep["id"]
             ds.flexilims_session = flexilims_session
-            ds.update_flexilims(mode="safe")
+            ds.update_flexilims(mode=mode)
             output.append(ds.full_name)
 
     # now deal with samples

From 68a694fa2568b34bcc5bdf725eae13b3967052ed Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Wed, 26 Jul 2023 19:31:52 +0100
Subject: [PATCH 17/18] [bugfix] Add lock file to token creation

This should prevent race condition when running a bunch of jobs at once.
---
 CHANGELOG.md                                  |  4 ++
 flexiznam/config/config_tools.py              | 42 ++++++++-----
 flexiznam/main.py                             | 35 +++++++----
 setup.py                                      |  1 +
 tests/test_components/test_main.py            | 34 +++++++---
 .../tests_schema/test_datasets.py             | 62 ++++++++-----------
 tests/tests_resources/data_for_testing.py     |  3 +-
 7 files changed, 110 insertions(+), 71 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cc28e69..9de84ce 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,12 +16,16 @@
   of crashing)
 - `update_flexilims` correctly uploads tuples parameters
 - `update_flexilims` correctly uploads floats and np.float/np.int parameters
+- Add filelock for token creation to avoid concurrent access and move token to their own
+  file
 
 ### Minor
 
 - `harp_dataset.from_folder` will now match csv even if there is nothing before or after
   `harpmessage` in the file name (i.e. the file is `harpmessage.bin`, and all csvs in 
   the folder will be matched)
+- private function `config_tools._find_files` has now  a `create_if_missing` argument to 
+  create the file if it does not exist
 
 ## v0.3.7
 
diff --git a/flexiznam/config/config_tools.py b/flexiznam/config/config_tools.py
index d1d4357..bf2d373 100644
--- a/flexiznam/config/config_tools.py
+++ b/flexiznam/config/config_tools.py
@@ -10,7 +10,7 @@
 from getpass import getpass
 
 
-def _find_file(file_name, config_folder=None):
+def _find_file(file_name, config_folder=None, create_if_missing=False):
     """Find a file by looking at various places
 
     Only in config_folder (if provided)
@@ -19,25 +19,37 @@ def _find_file(file_name, config_folder=None):
     - then in the ~/.config folder
     - then in the folder contain the file defining this function
     - then in sys.path
+
+    Args:
+        file_name (str): name of the file to find
+        config_folder (str): folder to look for the file
+        create_if_missing (bool): if True, create the file in the config folder if it
+            does not exist. If False, raise an error if the file is not found
     """
     if config_folder is not None:
         in_config_folder = Path(config_folder) / file_name
         if in_config_folder.is_file():
             return in_config_folder
-        raise ConfigurationError("Cannot find %s in %s" % (file_name, config_folder))
-    local = Path.cwd() / file_name
-    if local.is_file():
-        return local
-    config = Path(__file__).parent.absolute() / "config" / file_name
-    home = Path.home() / ".flexiznam"
-    if home.is_dir() and (home / file_name).is_file():
-        return home / file_name
-    if config.is_file():
-        return config
-    for directory in sys.path:
-        fname = Path(directory) / file_name
-        if fname.is_file():
-            return fname
+        missing_file = in_config_folder
+    else:
+        local = Path.cwd() / file_name
+        if local.is_file():
+            return local
+        config = Path(__file__).parent.absolute() / "config" / file_name
+        home = Path.home() / ".flexiznam"
+        if home.is_dir() and (home / file_name).is_file():
+            return home / file_name
+        if config.is_file():
+            return config
+        for directory in sys.path:
+            fname = Path(directory) / file_name
+            if fname.is_file():
+                return fname
+        missing_file = home / file_name
+    if create_if_missing:
+        with open(missing_file, "w") as f:
+            f.write("")
+        return missing_file
     raise ConfigurationError("Cannot find %s" % file_name)
 
 
diff --git a/flexiznam/main.py b/flexiznam/main.py
index b5e6544..2ec0867 100755
--- a/flexiznam/main.py
+++ b/flexiznam/main.py
@@ -1,11 +1,13 @@
 import datetime
 import re
+import portalocker
 import warnings
 import pandas as pd
 import flexilims as flm
 from pathlib import Path
 from flexilims.utils import SPECIAL_CHARACTERS
 import flexiznam
+import yaml
 from flexiznam import mcms
 from flexiznam.config import PARAMETERS, get_password, add_password
 from flexiznam.errors import NameNotUniqueError, FlexilimsError, ConfigurationError
@@ -68,7 +70,11 @@ def lookup_project(project_id, prm=None):
 
 
 def get_flexilims_session(
-    project_id=None, username=None, password=None, reuse_token=True
+    project_id=None,
+    username=None,
+    password=None,
+    reuse_token=True,
+    timeout=10,
 ):
     """Open a new flexilims session by creating a new authentication token.
 
@@ -80,6 +86,8 @@ def get_flexilims_session(
         password (str): (optional) flexilims password. If not provided, it is
             read from the secrets file, or failing that triggers an input prompt.
         reuse_token (bool): (optional) if True, try to reuse an existing token
+        timeout (int): (optional) timeout in seconds for the portalocker lock. Default
+            to 10.
 
     Returns:
         :py:class:`flexilims.Flexilims`: Flexilims session object.
@@ -96,21 +104,26 @@ def get_flexilims_session(
 
     if reuse_token:
         today = datetime.datetime.now().strftime("%Y-%m-%d")
-        try:
-            token = get_password(app="flexilims", username="token", allow_input=False)
-            token, date = token.split("_")
+        tocken_file = flexiznam.config.config_tools._find_file(
+            "flexilims_token.yml", create_if_missing=True
+        )
+        with portalocker.Lock(tocken_file, "r+", timeout=timeout) as file_handle:
+            tokinfo = yaml.safe_load(file_handle) or {}
+            token = tokinfo.get("token", None)
+            date = tokinfo.get("date", None)
             if date != today:
                 token = None
             else:
                 token = dict(Authorization=f"Bearer {token}")
-        except ConfigurationError:
-            token = None
+            session = flm.Flexilims(
+                username, password, project_id=project_id, token=token
+            )
+            if token is None:
+                # we need to update the token
+                token = session.session.headers["Authorization"].split(" ")[-1]
+                yaml.dump(dict(token=token, date=today), file_handle)
     else:
-        token = None
-    session = flm.Flexilims(username, password, project_id=project_id, token=token)
-    if reuse_token:
-        token = session.session.headers["Authorization"].split(" ")[-1]
-        add_password("flexilims", "token", f"{token}_{today}")
+        session = flm.Flexilims(username, password, project_id=project_id, token=None)
     return session
 
 
diff --git a/setup.py b/setup.py
index afd0c32..003ab18 100755
--- a/setup.py
+++ b/setup.py
@@ -13,6 +13,7 @@
     install_requires=[
         "Click",
         "pandas",
+        "portalocker",
         "webbot",
         "pyyaml",
         "flexilims @ git+ssh://git@github.com/znamlab/flexilims.git#egg=flexilims",
diff --git a/tests/test_components/test_main.py b/tests/test_components/test_main.py
index c6aa785..252a588 100644
--- a/tests/test_components/test_main.py
+++ b/tests/test_components/test_main.py
@@ -2,8 +2,10 @@
 import pathlib
 from pathlib import Path
 import pandas as pd
+import portalocker
 import pytest
-import flexiznam.main as flz
+import flexiznam as flz
+import yaml
 from flexiznam.config import PARAMETERS, get_password
 from flexiznam.errors import FlexilimsError, NameNotUniqueError
 from tests.tests_resources.data_for_testing import MOUSE_ID, SESSION
@@ -26,7 +28,7 @@ def test_get_path():
     assert p == Path(PARAMETERS["data_root"]["processed"])
     p = flz.get_data_root(which="raw", project="example", flexilims_session=None)
     assert p == Path("/camp/project/example_project/raw")
-    with pytest.raises(AttributeError):
+    with pytest.raises(AssertionError):
         flz.get_data_root(which="processed", project=None, flexilims_session=None)
     with pytest.raises(ValueError):
         flz.get_data_root(which="crap", project="test", flexilims_session=None)
@@ -41,13 +43,28 @@ def test_get_flexilims_session():
     assert sess.username == PARAMETERS["flexilims_username"]
     sess = flz.get_flexilims_session(project_id=None, reuse_token=True)
     assert sess.username == PARAMETERS["flexilims_username"]
-    token, date = get_password("flexilims", "token").split("_")
+    token_file = flz.config.config_tools._find_file("flexilims_token.yml")
+    tokinfo = yaml.safe_load(token_file.read_text())
+    token = tokinfo.get("token", None)
+    date = tokinfo.get("date", None)
     today = datetime.datetime.now().strftime("%Y-%m-%d")
     assert date == today
     assert sess.session.headers["Authorization"].split(" ")[1] == token
     sess = flz.get_flexilims_session(project_id=None, reuse_token=True)
     assert sess.session.headers["Authorization"].split(" ")[1] == token
 
+    # manualy lock the token file to test timeout
+    with portalocker.Lock(token_file, "r+", timeout=10) as file_handle:
+        with pytest.raises(portalocker.exceptions.LockException):
+            sess = flz.get_flexilims_session(
+                project_id=None, reuse_token=True, timeout=0.1
+            )
+        # but fine without the reuse_token flag
+        sess = flz.get_flexilims_session(project_id=None, reuse_token=False)
+        assert sess.session.headers["Authorization"].split(" ")[1] != token
+    sess = flz.get_flexilims_session(project_id=None, reuse_token=True, timeout=0.1)
+    assert sess.session.headers["Authorization"].split(" ")[1] == token
+
 
 def test_format_results():
     exmple_res = {
@@ -177,7 +194,7 @@ def test_get_datasets(flm_sess):
         filter_datasets=dict(acq_uid="overview_zoom1_00001"),
         allow_multiple=True,
     )
-    assert isinstance(ds,  pd.DataFrame)
+    assert isinstance(ds, pd.DataFrame)
     ds = flz.get_datasets(
         origin_name=SESSION,
         flexilims_session=flm_sess,
@@ -185,7 +202,7 @@ def test_get_datasets(flm_sess):
         filter_datasets=dict(acq_uid="overview_zoom1_00001"),
         allow_multiple=False,
     )
-    assert isinstance(ds,  pd.Series)
+    assert isinstance(ds, pd.Series)
 
     rec = flz.get_children(
         parent_name=SESSION, flexilims_session=flm_sess, children_datatype="recording"
@@ -247,10 +264,13 @@ def test_get_datasets_recursively(flm_sess):
         origin_name=SESSION,
         return_paths=False,
         dataset_type="harp",
-        filter_datasets=dict(binary_file="harpmessage.bin"),
+        filter_datasets=dict(
+            binary_file="PZAD9.4d_S20211102_R173917_SpheresPermTube_harpmessage.bin"
+        ),
     )
     assert len(ds_dict) == 1
-    assert ds_dict.values().__iter__().__next__()[0].dataset_name == "harpmessage"
+    ds = ds_dict.values().__iter__().__next__()[0]
+    assert isinstance(ds, HarpData)
 
     ds_dict = flz.get_datasets_recursively(
         flexilims_session=flm_sess,
diff --git a/tests/test_components/tests_schema/test_datasets.py b/tests/test_components/tests_schema/test_datasets.py
index 15e1420..38c5ab9 100644
--- a/tests/test_components/tests_schema/test_datasets.py
+++ b/tests/test_components/tests_schema/test_datasets.py
@@ -101,64 +101,47 @@ def test_dataset_flexilims_integration(flm_sess):
         extra_attributes={},
         created="",
         flexilims_session=flm_sess,
-        genealogy=(
-            "mouse_physio_2p",
-            "S20211102",
-            "R165821_SpheresPermTube",
-            "wf_camera",
-        ),
+        genealogy=("PZAA15.1a", "temporary_dataset"),
     )
     st = ds.flexilims_status()
+    assert st == "not online"
+    ds.update_flexilims()
+    st = ds.flexilims_status()
+    assert st == "up-to-date"
+    ds.extra_attributes = dict(test="test")
+    ds.path = "new/path"
+    st = ds.flexilims_status()
     assert st == "different"
     rep = ds.flexilims_report()
     expected = pd.DataFrame(
         dict(
-            offline={
-                "is_raw": "no",
-                "path": "fake/path",
-                "created": "",
-                "metadata_file": "NA",
-                "timestamp_file": "NA",
-                "video_file": "NA",
-            },
-            flexilims={
-                "is_raw": "yes",
-                "created": "2021-11-02 17:03:17",
-                "path": "demo_project/mouse_physio_2p/"
-                "S20211102/R165821_SpheresPermTube",
-                "origin_id": "61ebf94120d82a35f724490d",
-                "timestamp_file": "wf_camera_timestamps.csv",
-                "video_file": "wf_camera_data.bin",
-                "metadata_file": "wf_camera_metadata.txt",
-            },
+            offline={"path": "fake/path", "test": "test"},
+            flexilims={"path": "new/path", "test": "NA"},
         )
     )
     assert all(rep.sort_index() == expected.sort_index())
-    ds_name = "mouse_physio_2p_S20211102_R165821_SpheresPermTube_wf_camera"
+    ds_name = "PZAA15.1a_temporary_dataset"
+    assert ds.format().name == ds_name
     fmt = {
-        "path": "fake/path",
         "created": "",
         "dataset_type": "camera",
+        "genealogy": ("PZAA15.1a", "temporary_dataset"),
         "is_raw": "no",
-        "name": ds_name,
+        "name": "PZAA15.1a_temporary_dataset",
+        "path": "new/path",
         "project": "610989f9a651ff0b6237e0f6",
+        "test": "test",
         "type": "dataset",
-        "genealogy": (
-            "mouse_physio_2p",
-            "S20211102",
-            "R165821_SpheresPermTube",
-            "wf_camera",
-        ),
     }
-    assert ds.format().name == ds_name
     assert all(
         ds.format().drop("origin_id").sort_index()
         == pd.Series(data=fmt, name=ds_name).sort_index()
     )
 
     # same with yaml mode
-    fmt["extra_attributes"] = {}
+    fmt["extra_attributes"] = {"test": "test"}
     fmt["genealogy"] = list(fmt["genealogy"])
+    fmt.pop("test")
     ds_yaml = ds.format(mode="yaml")
     try:
         del ds_yaml["origin_id"]
@@ -166,6 +149,9 @@ def test_dataset_flexilims_integration(flm_sess):
         pass
     assert ds_yaml == fmt
 
+    flm_sess.delete(ds.id)
+    assert ds.flexilims_status() == "not online"
+
     ds = Dataset(
         path="fake/path",
         is_raw="no",
@@ -358,8 +344,10 @@ def test_update_flexilims(flm_sess):
     assert reloaded_ds.extra_attributes["verbose"] == params["verbose"]
     # test weirded datatypes
     ds = Dataset.from_origin(
-        project, origin_id=ds.origin_id, flexilims_session=flm_sess,
-        dataset_type='microscopy'
+        project,
+        origin_id=ds.origin_id,
+        flexilims_session=flm_sess,
+        dataset_type="microscopy",
     )
     ds.extra_attributes = dict(
         nf64=np.float64(1),
diff --git a/tests/tests_resources/data_for_testing.py b/tests/tests_resources/data_for_testing.py
index 78d5e34..dd6bb0a 100644
--- a/tests/tests_resources/data_for_testing.py
+++ b/tests/tests_resources/data_for_testing.py
@@ -4,7 +4,8 @@
 from flexiznam.config import PARAMETERS
 
 
-MOUSE_ID = "6437dcb13ded9c65df142a12"
+MOUSE_ID = "6437dcb13ded9c65df142a12"  # actual physio2p mouse
+MOUSE_TEMP = "647a1aec7ddb34517470d3e6" # some random mouse where I can change data
 TEST_PROJECT = "demo_project"
 PROJECT_ID = "610989f9a651ff0b6237e0f6"
 SESSION = "mouse_physio_2p_S20211102"

From d8cbad702096dbb07416feb46c230b3da2fdde59 Mon Sep 17 00:00:00 2001
From: Antonin Blot <antonin.blot@crick.ac.uk>
Date: Thu, 27 Jul 2023 18:11:52 +0100
Subject: [PATCH 18/18] [docs] Add changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9de84ce..053d5f6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,6 +16,7 @@
   of crashing)
 - `update_flexilims` correctly uploads tuples parameters
 - `update_flexilims` correctly uploads floats and np.float/np.int parameters
+- `update_flexilims` can overwrite existing datasets (if `conflicts='overwrite'`)
 - Add filelock for token creation to avoid concurrent access and move token to their own
   file