diff --git a/CHANGELOG.md b/CHANGELOG.md
index 09a632b..053d5f6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,33 @@
# Change log
+## v0.3.8
+
+### Main changes
+
+- Add `get_data_root` function to get `raw` or `processed` root for a project
+- `get_children` can filter children by attributes before returning results
+- refactor `get_datasets` to be non recursive and add filtering options. Also add
+ multiple options to filter datasets and format output
+- add `get_datasets_recursively` to get all datasets below a given entity
+
+###Â Bugfixes
+
+- return empty dataframe if `filter` in `get_children` filters out everything (instead
+ of crashing)
+- `update_flexilims` correctly uploads tuples parameters
+- `update_flexilims` correctly uploads floats and np.float/np.int parameters
+- `update_flexilims` can overwrite existing datasets (if `conflicts='overwrite'`)
+- Add filelock for token creation to avoid concurrent access and move token to their own
+ file
+
+### Minor
+
+- `harp_dataset.from_folder` will now match csv even if there is nothing before or after
+ `harpmessage` in the file name (i.e. the file is `harpmessage.bin`, and all csvs in
+ the folder will be matched)
+- private function `config_tools._find_files` has now a `create_if_missing` argument to
+ create the file if it does not exist
+
## v0.3.7
### Main changes
diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py
index 7051587..d7d331e 100644
--- a/flexiznam/camp/sync_data.py
+++ b/flexiznam/camp/sync_data.py
@@ -171,6 +171,7 @@ def upload_yaml(
list of names of entities created/updated
"""
+
output = []
# if there are errors, I cannot safely parse the yaml
errors = find_xxerrorxx(yml_file=source_yaml)
@@ -232,12 +233,17 @@ def upload_yaml(
root_id = mouse["id"]
# session datasets
+ # use "overwrite" as mode if conflict is "overwrite", otherwise use "safe" mode
+ if conflicts == "overwrite":
+ mode = "overwrite"
+ else:
+ mode = "safe"
for ds_name, ds in session_data.get("datasets", {}).items():
ds.genealogy = [mouse["name"], session_data["session"], ds_name]
ds.project = session_data["project"]
ds.origin_id = root_id
ds.flexilims_session = flexilims_session
- ds.update_flexilims(mode="safe")
+ ds.update_flexilims(mode=mode)
output.append(ds.full_name)
# now deal with recordings
@@ -276,7 +282,7 @@ def upload_yaml(
ds.project = session_data["project"]
ds.origin_id = rec_rep["id"]
ds.flexilims_session = flexilims_session
- ds.update_flexilims(mode="safe")
+ ds.update_flexilims(mode=mode)
output.append(ds.full_name)
# now deal with samples
diff --git a/flexiznam/config/config_tools.py b/flexiznam/config/config_tools.py
index d1d4357..bf2d373 100644
--- a/flexiznam/config/config_tools.py
+++ b/flexiznam/config/config_tools.py
@@ -10,7 +10,7 @@
from getpass import getpass
-def _find_file(file_name, config_folder=None):
+def _find_file(file_name, config_folder=None, create_if_missing=False):
"""Find a file by looking at various places
Only in config_folder (if provided)
@@ -19,25 +19,37 @@ def _find_file(file_name, config_folder=None):
- then in the ~/.config folder
- then in the folder contain the file defining this function
- then in sys.path
+
+ Args:
+ file_name (str): name of the file to find
+ config_folder (str): folder to look for the file
+ create_if_missing (bool): if True, create the file in the config folder if it
+ does not exist. If False, raise an error if the file is not found
"""
if config_folder is not None:
in_config_folder = Path(config_folder) / file_name
if in_config_folder.is_file():
return in_config_folder
- raise ConfigurationError("Cannot find %s in %s" % (file_name, config_folder))
- local = Path.cwd() / file_name
- if local.is_file():
- return local
- config = Path(__file__).parent.absolute() / "config" / file_name
- home = Path.home() / ".flexiznam"
- if home.is_dir() and (home / file_name).is_file():
- return home / file_name
- if config.is_file():
- return config
- for directory in sys.path:
- fname = Path(directory) / file_name
- if fname.is_file():
- return fname
+ missing_file = in_config_folder
+ else:
+ local = Path.cwd() / file_name
+ if local.is_file():
+ return local
+ config = Path(__file__).parent.absolute() / "config" / file_name
+ home = Path.home() / ".flexiznam"
+ if home.is_dir() and (home / file_name).is_file():
+ return home / file_name
+ if config.is_file():
+ return config
+ for directory in sys.path:
+ fname = Path(directory) / file_name
+ if fname.is_file():
+ return fname
+ missing_file = home / file_name
+ if create_if_missing:
+ with open(missing_file, "w") as f:
+ f.write("")
+ return missing_file
raise ConfigurationError("Cannot find %s" % file_name)
diff --git a/flexiznam/config/default_config.py b/flexiznam/config/default_config.py
index 06b5e62..5e0d4ae 100644
--- a/flexiznam/config/default_config.py
+++ b/flexiznam/config/default_config.py
@@ -14,6 +14,12 @@
"test": "606df1ac08df4d77c72c9aa4",
"AAVRKO_retina_hva": "60a7757a8901a2357f29080a",
},
+ project_paths={
+ "example": dict(
+ raw="/camp/project/example_project/raw",
+ processed="/camp/project/example_project/processed",
+ )
+ },
# a default username can be specified
flexilims_username="yourusername",
# Use for data dataset detection
diff --git a/flexiznam/main.py b/flexiznam/main.py
index a663f4b..2ec0867 100755
--- a/flexiznam/main.py
+++ b/flexiznam/main.py
@@ -1,11 +1,13 @@
import datetime
import re
+import portalocker
import warnings
import pandas as pd
import flexilims as flm
from pathlib import Path
from flexilims.utils import SPECIAL_CHARACTERS
import flexiznam
+import yaml
from flexiznam import mcms
from flexiznam.config import PARAMETERS, get_password, add_password
from flexiznam.errors import NameNotUniqueError, FlexilimsError, ConfigurationError
@@ -22,6 +24,38 @@ def _format_project(project_id, prm):
return project_id
+def get_data_root(which, project=None, flexilims_session=None):
+ """Get raw or processed path for a project
+
+ Args:
+ which (str): either "raw" or "processed"
+ project (str, optional): name or id of the project. Optional if
+ flexilims_session is provided
+ flexilims_session (:py:class:`flexilims.Flexilims`, optional): a flexilims
+ session with project set. Optional if project is provided.
+ """
+ if which not in ["raw", "processed"]:
+ raise ValueError("which must be either 'raw' or 'processed'")
+
+ if project is None:
+ assert (
+ flexilims_session is not None
+ ), "`flexilims_session` must be provided if `project` is None"
+
+ project = flexilims_session.project_id
+
+ if project not in PARAMETERS["project_ids"]:
+ project = lookup_project(project, prm=None)
+ assert project is not None, f"Invalid project {project}"
+
+ if project in PARAMETERS["project_paths"]:
+ return Path(PARAMETERS["project_paths"][project][which])
+
+ if which == "raw":
+ return Path(PARAMETERS["data_root"]["raw"])
+ return Path(PARAMETERS["data_root"]["processed"])
+
+
def lookup_project(project_id, prm=None):
"""
Look up project name by hexadecimal id
@@ -36,7 +70,11 @@ def lookup_project(project_id, prm=None):
def get_flexilims_session(
- project_id=None, username=None, password=None, reuse_token=True
+ project_id=None,
+ username=None,
+ password=None,
+ reuse_token=True,
+ timeout=10,
):
"""Open a new flexilims session by creating a new authentication token.
@@ -48,6 +86,8 @@ def get_flexilims_session(
password (str): (optional) flexilims password. If not provided, it is
read from the secrets file, or failing that triggers an input prompt.
reuse_token (bool): (optional) if True, try to reuse an existing token
+ timeout (int): (optional) timeout in seconds for the portalocker lock. Default
+ to 10.
Returns:
:py:class:`flexilims.Flexilims`: Flexilims session object.
@@ -64,21 +104,26 @@ def get_flexilims_session(
if reuse_token:
today = datetime.datetime.now().strftime("%Y-%m-%d")
- try:
- token = get_password(app="flexilims", username="token", allow_input=False)
- token, date = token.split("_")
+ tocken_file = flexiznam.config.config_tools._find_file(
+ "flexilims_token.yml", create_if_missing=True
+ )
+ with portalocker.Lock(tocken_file, "r+", timeout=timeout) as file_handle:
+ tokinfo = yaml.safe_load(file_handle) or {}
+ token = tokinfo.get("token", None)
+ date = tokinfo.get("date", None)
if date != today:
token = None
else:
token = dict(Authorization=f"Bearer {token}")
- except ConfigurationError:
- token = None
+ session = flm.Flexilims(
+ username, password, project_id=project_id, token=token
+ )
+ if token is None:
+ # we need to update the token
+ token = session.session.headers["Authorization"].split(" ")[-1]
+ yaml.dump(dict(token=token, date=today), file_handle)
else:
- token = None
- session = flm.Flexilims(username, password, project_id=project_id, token=token)
- if reuse_token:
- token = session.session.headers["Authorization"].split(" ")[-1]
- add_password("flexilims", "token", f"{token}_{today}")
+ session = flm.Flexilims(username, password, project_id=project_id, token=None)
return session
@@ -946,6 +991,7 @@ def get_children(
children_datatype=None,
project_id=None,
flexilims_session=None,
+ filter=None,
):
"""
Get all entries belonging to a particular parent entity
@@ -957,6 +1003,7 @@ def get_children(
types if None)
project_id (str): text name of the project
flexilims_session (:py:class:`flexilims.Flexilims`): Flexylims session object
+ filter (dict, None): filter to apply to the extra_attributes of the children
Returns:
DataFrame: containing all the relevant child entitites
@@ -975,39 +1022,154 @@ def get_children(
return pd.DataFrame(results)
if children_datatype is not None:
results = [r for r in results if r["type"] == children_datatype]
+ if filter is not None:
+ for key, value in filter.items():
+ results = [r for r in results if r.get(key, None) == value]
+
results = pd.DataFrame(results)
- results.set_index("name", drop=False, inplace=True)
+ if len(results):
+ results.set_index("name", drop=False, inplace=True)
return results
-def get_datasets(
- origin_id,
- recording_type=None,
+def get_child_dataset(flz_session, parent_name, dataset_type):
+ """
+ Get the last dataset of a given type for a given parent entity.
+
+ Args:
+ flz_session (flexilims_session): flexilims session
+ parent_name (str): name of the parent entity
+ dataset_type (str): type of the dataset
+
+ Returns:
+ Dataset: the last dataset of the given type for the given parent entity
+
+ """
+ all_children = get_children(
+ parent_name=parent_name,
+ children_datatype="dataset",
+ flexilims_session=flz_session,
+ )
+ selected_datasets = all_children[all_children["dataset_type"] == dataset_type]
+ if len(selected_datasets) == 0:
+ raise ValueError(f"No {dataset_type} dataset found for session {parent_name}")
+ elif len(selected_datasets) > 1:
+ print(
+ f"{len(selected_datasets)} {dataset_type} datasets found for session {parent_name}"
+ )
+ print("Will return the last one...")
+ return flexiznam.Dataset.from_dataseries(
+ selected_datasets.iloc[-1], flexilims_session=flz_session
+ )
+
+
+def get_datasets_recursively(
+ origin_id=None,
+ origin_name=None,
+ origin_series=None,
dataset_type=None,
+ filter_datasets=None,
+ parent_type=None,
+ filter_parents=None,
+ return_paths=False,
project_id=None,
flexilims_session=None,
- return_paths=True,
+ _output=None,
):
- """
- Recurse into recordings and get paths to child datasets of a given type.
+ """Get datasets recursively from a parent entity
For example, this is useful if you want to retrieve paths to all *scanimage*
datasets associated with a given session.
+ Returns:
+ dict: Dictionary with direct parent id as keys and lists of associated
+ datasets, or dataset paths as values
+
+ """
+ if origin_series is None:
+ if origin_id is None:
+ origin_id = get_id(origin_name, flexilims_session=flexilims_session)
+ origin_series = get_entity(id=origin_id, flexilims_session=flexilims_session)
+ else:
+ origin_id = origin_series["id"]
+ origin_is_valid = True
+
+ # initialize output if first call
+ if _output is None:
+ _output = {}
+
+ # Before adding the datasets of this level, check if the parent is valid
+ if (parent_type is not None) and (origin_series["type"] != parent_type):
+ origin_is_valid = False
+ if filter_parents is not None:
+ for key, value in filter_parents.items():
+ if origin_series.get(key, None) != value:
+ origin_is_valid = False
+
+ if origin_is_valid:
+ ds = get_datasets(
+ origin_id=origin_id,
+ dataset_type=dataset_type,
+ project_id=project_id,
+ flexilims_session=flexilims_session,
+ return_paths=return_paths,
+ filter_datasets=filter_datasets,
+ )
+ # add only if there are datasets
+ if len(ds):
+ _output[origin_id] = ds
+
+ # now recurse on children
+ children = get_children(
+ parent_id=origin_id,
+ parent_name=origin_name,
+ flexilims_session=flexilims_session,
+ )
+ for _, child in children.iterrows():
+ if child.type == "dataset":
+ continue
+ get_datasets_recursively(
+ origin_series=child,
+ dataset_type=dataset_type,
+ project_id=project_id,
+ flexilims_session=flexilims_session,
+ return_paths=return_paths,
+ filter_datasets=filter_datasets,
+ filter_parents=filter_parents,
+ _output=_output,
+ )
+ return _output
+
+
+def get_datasets(
+ origin_id=None,
+ origin_name=None,
+ dataset_type=None,
+ project_id=None,
+ flexilims_session=None,
+ filter_datasets=None,
+ allow_multiple=True,
+ return_paths=False,
+ return_dataseries=False,
+):
+ """
Args:
- origin_id (str): hexadecimal ID of the origin session.
- recording_type (str): type of the recording to filter by. If `None`,
- will return datasets for all recordings.
+ origin_id (str): hexadecimal ID of the origin session. Not required if
+ origin_name is provided.
+ origin_name (str): text name of the origin session. Not required if origin_id
+ is provided.
dataset_type (str): type of the dataseet to filter by. If `None`,
will return all datasets.
project_id (str): text name of the project. Not required if
`flexilims_session` is provided.
flexilims_session (:py:class:`flexilims.Flexilims`): Flexylims session object
- return_paths (bool): if True, return a list of paths. If False, return the
- dataset objects.
+ filter_datasets (dict): dictionary of key-value pairs to filter datasets by.
+ allow_multiple (bool): if True, allow multiple datasets to be returned,
+ otherwise ensure that only one dataset exists online and return it.
+ return_paths (bool): if True, return a list of paths
+ return_dataseries (bool): if True, a dataframe or a dataseries
+ _output (list): internal argument used for recursion.
- Returns:
- dict: Dictionary with recording names as keys containing lists of associated dataset paths.
"""
assert (project_id is not None) or (flexilims_session is not None)
@@ -1015,46 +1177,39 @@ def get_datasets(
flexilims_session = get_flexilims_session(project_id)
else:
project_id = lookup_project(flexilims_session.project_id, PARAMETERS)
- recordings = get_entities(
- datatype="recording",
- origin_id=origin_id,
- query_key="recording_type",
- query_value=recording_type,
+
+ if origin_id is None:
+ assert origin_name is not None, "Must provide either origin_id or origin_name"
+ if filter_datasets is None:
+ filter_datasets = {}
+ if dataset_type is not None:
+ filter_datasets.update({"dataset_type": dataset_type})
+
+ datasets = get_children(
+ parent_id=origin_id,
+ parent_name=origin_name,
+ children_datatype="dataset",
flexilims_session=flexilims_session,
+ filter=filter_datasets,
)
- datapath_dict = {}
- if len(recordings) < 1:
- return datapath_dict
- for recording_id in recordings["id"]:
- datasets = get_entities(
- datatype="dataset",
- origin_id=recording_id,
- query_key="dataset_type",
- query_value=dataset_type,
- flexilims_session=flexilims_session,
- )
+
+ if not return_dataseries:
+ datasets = [
+ flexiznam.Dataset.from_dataseries(
+ dataseries=ds, flexilims_session=flexilims_session
+ )
+ for _, ds in datasets.iterrows()
+ ]
if return_paths:
- datapaths = []
- for dataset_path, is_raw in zip(datasets["path"], datasets["is_raw"]):
- prefix = (
- PARAMETERS["data_root"]["raw"]
- if is_raw == "yes"
- else PARAMETERS["data_root"]["processed"]
- )
- this_path = Path(prefix) / dataset_path
- if this_path.exists():
- datapaths.append(str(this_path))
- else:
- raise IOError("Dataset {} not found".format(this_path))
- datapath_dict[recording_id] = datapaths
+ datasets = [ds.path_full for ds in datasets]
+
+ if not allow_multiple:
+ assert len(datasets) <= 1, f"Fount {len(datasets)} datasets. Expected 1."
+ if len(datasets) == 1:
+ datasets = datasets[0] if not return_dataseries else datasets.iloc[0]
else:
- datapath_dict[recording_id] = [
- flexiznam.Dataset.from_dataseries(
- dataseries=ds, flexilims_session=flexilims_session
- )
- for _, ds in datasets.iterrows()
- ]
- return datapath_dict
+ datasets = None
+ return datasets
def generate_name(datatype, name, flexilims_session=None, project_id=None):
diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py
index 11e181e..c154f96 100644
--- a/flexiznam/schema/datasets.py
+++ b/flexiznam/schema/datasets.py
@@ -389,16 +389,6 @@ def update_flexilims(self, mode="safe"):
status = self.flexilims_status()
attributes = self.extra_attributes.copy()
- # the following lines are necessary because pandas converts python types to numpy
- # types, which JSON does not understand and because JSON doesn't like tuples
- for attribute in attributes:
- if isinstance(attributes[attribute], np.integer):
- attributes[attribute] = int(attributes[attribute])
- if isinstance(attributes[attribute], np.bool_):
- attributes[attribute] = bool(attributes[attribute])
- if isinstance(attributes[attribute], tuple):
- attributes[attribute] = list(attribute)
- flz.utils.clean_recursively(attributes)
if status == "different":
if mode == "safe":
@@ -417,7 +407,7 @@ def update_flexilims(self, mode="safe"):
if self.origin_id is None:
if self.get_flexilims_entry().get("origin_id", None) is not None:
raise FlexilimsError("Cannot set origin_id to null")
-
+ utils.clean_recursively(attributes)
resp = flz.update_entity(
datatype="dataset",
id=self.id,
@@ -711,12 +701,11 @@ def is_raw(self, value):
@property
def path_root(self):
"""Get CAMP root path that should apply to this dataset"""
- if self.is_raw:
- return Path(flz.config.PARAMETERS["data_root"]["raw"])
- elif self.is_raw is None:
+ if self.is_raw is None:
raise AttributeError("`is_raw` must be set to find path.")
- else:
- return Path(flz.config.PARAMETERS["data_root"]["processed"])
+ return flz.get_data_root(
+ which="raw" if self.is_raw else "processed", project=self.project
+ )
@property
def path_full(self):
diff --git a/flexiznam/schema/harp_data.py b/flexiznam/schema/harp_data.py
index 3613cd1..70a9f4f 100644
--- a/flexiznam/schema/harp_data.py
+++ b/flexiznam/schema/harp_data.py
@@ -55,17 +55,15 @@ def from_folder(
"file name." % bin_file
)
continue
- if any(m.groups()): # do not match csv if the bin is just "harpmessage"
- pattern = "(.*)".join(m.groups()) + ".csv"
- matches = [re.match(pattern, f) for f in csv_files]
- associated_csv = {
- m.groups()[0].strip("_"): f for f, m in zip(csv_files, matches) if m
- }
- if matched_files.intersection(associated_csv.values()):
- raise IOError("A csv file matched with multiple binary files.")
- matched_files.update(associated_csv.values())
- else:
- associated_csv = {}
+
+ pattern = "(.*)".join(m.groups()) + ".csv"
+ matches = [re.match(pattern, f) for f in csv_files]
+ associated_csv = {
+ m.groups()[0].strip("_"): f for f, m in zip(csv_files, matches) if m
+ }
+ if matched_files.intersection(associated_csv.values()):
+ raise IOError("A csv file matched with multiple binary files.")
+ matched_files.update(associated_csv.values())
bin_path = pathlib.Path(folder) / bin_file
created = datetime.datetime.fromtimestamp(bin_path.stat().st_mtime)
diff --git a/flexiznam/schema/sequencing_data.py b/flexiznam/schema/sequencing_data.py
index d226b68..4f4ab0f 100644
--- a/flexiznam/schema/sequencing_data.py
+++ b/flexiznam/schema/sequencing_data.py
@@ -48,7 +48,7 @@ def from_folder(
is_raw (bool): does this folder contain raw data?
verbose (bool=True): print info about what is found
flexilims_session (flm.Session): session to interact with flexilims
- project (str): project ID or name
+ project (str): project name
Returns:
dict of datasets (fzm.schema.sequencing_data.SequencingData)
diff --git a/flexiznam/utils.py b/flexiznam/utils.py
index 0effaaa..1e13483 100644
--- a/flexiznam/utils.py
+++ b/flexiznam/utils.py
@@ -150,6 +150,8 @@ def clean_recursively(
# we don't have a dictionary
ds_classes = set(Dataset.SUBCLASSES.values())
ds_classes.add(Dataset)
+ floats = (float, np.float32, np.float64)
+ ints = (int, np.int32, np.int64)
if (
(element is None)
or isinstance(element, str)
@@ -165,8 +167,14 @@ def clean_recursively(
element = element.tolist()
elif isinstance(element, pathlib.Path):
element = str(PurePosixPath(element))
- elif isinstance(element, float) and (not np.isfinite(element)):
- element = str(element)
+ elif isinstance(element, floats):
+ if not np.isfinite(element):
+ # nan and inf must be uploaded as string
+ element = str(element)
+ else:
+ element = float(element)
+ elif isinstance(element, ints):
+ element = int(element)
elif isinstance(element, pd.Series or pd.DataFrame):
raise IOError("Cannot make a pandas object json compatible")
else:
diff --git a/setup.py b/setup.py
index df05b7d..003ab18 100755
--- a/setup.py
+++ b/setup.py
@@ -2,17 +2,18 @@
setup(
name="flexiznam",
- version="v0.3.7",
+ version="v0.3.8",
url="https://github.com/znamlab/flexznam",
license="MIT",
author="Antonin Blot",
author_email="antonin.blot@gmail.com",
description="Znamlab tool to interact with flexilims",
- packages=find_packages(),
+ packages=find_packages(exclude=["tests"]),
include_package_data=True,
install_requires=[
"Click",
"pandas",
+ "portalocker",
"webbot",
"pyyaml",
"flexilims @ git+ssh://git@github.com/znamlab/flexilims.git#egg=flexilims",
diff --git a/tests/test-results/pytest_in_tests.xml b/tests/test-results/pytest_in_tests.xml
index d6f2e8e..e3838f1 100644
--- a/tests/test-results/pytest_in_tests.xml
+++ b/tests/test-results/pytest_in_tests.xml
@@ -1 +1 @@
-
\ No newline at end of file
+
\ No newline at end of file
diff --git a/tests/test_components/test_main.py b/tests/test_components/test_main.py
index 8544755..252a588 100644
--- a/tests/test_components/test_main.py
+++ b/tests/test_components/test_main.py
@@ -1,17 +1,41 @@
import datetime
+import pathlib
+from pathlib import Path
import pandas as pd
+import portalocker
import pytest
-import flexiznam.main as flz
+import flexiznam as flz
+import yaml
from flexiznam.config import PARAMETERS, get_password
from flexiznam.errors import FlexilimsError, NameNotUniqueError
from tests.tests_resources.data_for_testing import MOUSE_ID, SESSION
# Test functions from main.py
-from flexiznam.schema import Dataset
+from flexiznam.schema import Dataset, HarpData, ScanimageData
# this needs to change every time I reset flexlilims
+def test_get_path():
+ p = flz.get_data_root(which="raw", project="test", flexilims_session=None)
+ assert p == Path(PARAMETERS["data_root"]["raw"])
+ p = flz.get_data_root(which="processed", project="test", flexilims_session=None)
+ assert p == Path(PARAMETERS["data_root"]["processed"])
+ sess = flz.get_flexilims_session(
+ project_id=PARAMETERS["project_ids"]["test"], reuse_token=False
+ )
+ p = flz.get_data_root(which="processed", project=None, flexilims_session=sess)
+ assert p == Path(PARAMETERS["data_root"]["processed"])
+ p = flz.get_data_root(which="raw", project="example", flexilims_session=None)
+ assert p == Path("/camp/project/example_project/raw")
+ with pytest.raises(AssertionError):
+ flz.get_data_root(which="processed", project=None, flexilims_session=None)
+ with pytest.raises(ValueError):
+ flz.get_data_root(which="crap", project="test", flexilims_session=None)
+ with pytest.raises(AssertionError):
+ p = flz.get_data_root(which="raw", project="random", flexilims_session=None)
+
+
def test_get_flexilims_session():
sess = flz.get_flexilims_session(
project_id=PARAMETERS["project_ids"]["test"], reuse_token=False
@@ -19,13 +43,28 @@ def test_get_flexilims_session():
assert sess.username == PARAMETERS["flexilims_username"]
sess = flz.get_flexilims_session(project_id=None, reuse_token=True)
assert sess.username == PARAMETERS["flexilims_username"]
- token, date = get_password("flexilims", "token").split("_")
+ token_file = flz.config.config_tools._find_file("flexilims_token.yml")
+ tokinfo = yaml.safe_load(token_file.read_text())
+ token = tokinfo.get("token", None)
+ date = tokinfo.get("date", None)
today = datetime.datetime.now().strftime("%Y-%m-%d")
assert date == today
assert sess.session.headers["Authorization"].split(" ")[1] == token
sess = flz.get_flexilims_session(project_id=None, reuse_token=True)
assert sess.session.headers["Authorization"].split(" ")[1] == token
+ # manualy lock the token file to test timeout
+ with portalocker.Lock(token_file, "r+", timeout=10) as file_handle:
+ with pytest.raises(portalocker.exceptions.LockException):
+ sess = flz.get_flexilims_session(
+ project_id=None, reuse_token=True, timeout=0.1
+ )
+ # but fine without the reuse_token flag
+ sess = flz.get_flexilims_session(project_id=None, reuse_token=False)
+ assert sess.session.headers["Authorization"].split(" ")[1] != token
+ sess = flz.get_flexilims_session(project_id=None, reuse_token=True, timeout=0.1)
+ assert sess.session.headers["Authorization"].split(" ")[1] == token
+
def test_format_results():
exmple_res = {
@@ -106,28 +145,147 @@ def test_get_mouse_id(flm_sess):
def test_get_datasets(flm_sess):
- sess = flz.get_entity(
- name="mouse_physio_2p_S20211102", datatype="session", flexilims_session=flm_sess
+ ds = flz.get_datasets(
+ origin_id=MOUSE_ID,
+ flexilims_session=flm_sess,
+ )
+ assert len(ds) == 0
+ ds = flz.get_datasets(
+ origin_name=SESSION,
+ flexilims_session=flm_sess,
+ return_paths=True,
+ )
+ assert len(ds) == 3
+ assert all([isinstance(d, pathlib.PosixPath) for d in ds])
+ ds = flz.get_datasets(
+ origin_name=SESSION,
+ flexilims_session=flm_sess,
+ return_paths=False,
)
+ assert len(ds) == 3
+ assert all([hasattr(d, "path_full") for d in ds])
ds = flz.get_datasets(
- flexilims_session=flm_sess, origin_id=sess.id, return_paths=True
+ origin_name=SESSION,
+ flexilims_session=flm_sess,
+ return_paths=False,
+ filter_datasets=dict(acq_uid="overview_zoom1_00001"),
)
- assert len(ds) == 2
- for v in ds.values():
- assert isinstance(v, list)
- assert all([isinstance(d, str) for d in v])
+ assert len(ds) == 1
+ ds = flz.get_datasets(
+ origin_name=SESSION,
+ flexilims_session=flm_sess,
+ return_paths=False,
+ filter_datasets=dict(acq_uid="overview_zoom1_00001"),
+ allow_multiple=False,
+ )
+ assert isinstance(ds, ScanimageData)
+ ds = flz.get_datasets(
+ origin_name=SESSION,
+ flexilims_session=flm_sess,
+ return_paths=True,
+ filter_datasets=dict(acq_uid="overview_zoom1_00001"),
+ allow_multiple=False,
+ )
+ assert isinstance(ds, pathlib.PosixPath)
+ ds = flz.get_datasets(
+ origin_name=SESSION,
+ flexilims_session=flm_sess,
+ return_dataseries=True,
+ filter_datasets=dict(acq_uid="overview_zoom1_00001"),
+ allow_multiple=True,
+ )
+ assert isinstance(ds, pd.DataFrame)
+ ds = flz.get_datasets(
+ origin_name=SESSION,
+ flexilims_session=flm_sess,
+ return_dataseries=True,
+ filter_datasets=dict(acq_uid="overview_zoom1_00001"),
+ allow_multiple=False,
+ )
+ assert isinstance(ds, pd.Series)
+ rec = flz.get_children(
+ parent_name=SESSION, flexilims_session=flm_sess, children_datatype="recording"
+ ).iloc[0]
+ ds_all = flz.get_datasets(
+ origin_id=rec.id,
+ flexilims_session=flm_sess,
+ return_paths=False,
+ )
+ ds_cam = flz.get_datasets(
+ origin_id=rec.id,
+ dataset_type="camera",
+ flexilims_session=flm_sess,
+ return_paths=False,
+ )
+ assert len(ds_all) > len(ds_cam)
+ ds = flz.get_datasets(
+ origin_id=rec.id,
+ dataset_type="camera",
+ filter_datasets=dict(timestamp_file="face_camera_timestamps.csv"),
+ flexilims_session=flm_sess,
+ return_paths=True,
+ )
+ assert len(ds) == 1
ds2 = flz.get_datasets(
- flexilims_session=flm_sess, origin_id=sess.id, return_paths=False
+ origin_id=rec.id,
+ dataset_type="camera",
+ filter_datasets=dict(timestamp_file="face_camera_timestamps.csv"),
+ project_id=flm_sess.project_id,
+ return_paths=True,
)
- assert len(ds2) == 2
- assert all([k in ds2 for k in ds])
- for k in ds2:
- ds_paths = ds[k]
- ds_ds = ds2[k]
- assert len(ds_ds) == len(ds_paths)
- assert all([isinstance(d, Dataset) for d in ds_ds])
- assert all([str(d.path_full) in ds_paths for d in ds_ds])
+ assert ds == ds2
+ with pytest.raises(AssertionError):
+ flz.get_datasets(
+ origin_id=rec.id,
+ project_id=flm_sess.project_id,
+ allow_multiple=False,
+ )
+
+
+def test_get_datasets_recursively(flm_sess):
+ ds_dict = flz.get_datasets_recursively(
+ flexilims_session=flm_sess, origin_name=SESSION, return_paths=True
+ )
+ assert len(ds_dict) == 3
+ ds_dict = flz.get_datasets_recursively(
+ flexilims_session=flm_sess,
+ origin_name=SESSION,
+ return_paths=False,
+ dataset_type="harp",
+ )
+ assert len(ds_dict) == 2
+ ds = []
+ for d in ds_dict.values():
+ ds.extend(d)
+ assert all([isinstance(d, HarpData) for d in ds])
+ ds_dict = flz.get_datasets_recursively(
+ flexilims_session=flm_sess,
+ origin_name=SESSION,
+ return_paths=False,
+ dataset_type="harp",
+ filter_datasets=dict(
+ binary_file="PZAD9.4d_S20211102_R173917_SpheresPermTube_harpmessage.bin"
+ ),
+ )
+ assert len(ds_dict) == 1
+ ds = ds_dict.values().__iter__().__next__()[0]
+ assert isinstance(ds, HarpData)
+
+ ds_dict = flz.get_datasets_recursively(
+ flexilims_session=flm_sess,
+ origin_name=SESSION,
+ return_paths=False,
+ filter_parents={"timestamp": "165821"},
+ )
+ assert len(ds_dict) == 1
+ ds_dict = flz.get_datasets_recursively(
+ flexilims_session=flm_sess,
+ origin_name=SESSION,
+ return_paths=False,
+ parent_type="recording",
+ )
+ assert len(ds_dict) == 2
def test_add_mouse(flm_sess):
@@ -187,6 +345,13 @@ def test_get_children(flm_sess):
)
assert (res_part.type != "recording").sum() == 0
assert res_all.shape[1] > res_part.shape[1]
+ single_res = flz.get_children(
+ parent_name=SESSION,
+ flexilims_session=flm_sess,
+ children_datatype="dataset",
+ filter=dict(notes="Motion correction reference"),
+ )
+ assert single_res.shape[0] == 1
def test_add_entity(flm_sess):
diff --git a/tests/test_components/tests_schema/test_datasets.py b/tests/test_components/tests_schema/test_datasets.py
index 8c58fc5..38c5ab9 100644
--- a/tests/test_components/tests_schema/test_datasets.py
+++ b/tests/test_components/tests_schema/test_datasets.py
@@ -1,6 +1,8 @@
import pytest
import pathlib
import pandas as pd
+import numpy as np
+import flexiznam
from flexiznam.schema import Dataset, microscopy_data
from flexiznam.config import PARAMETERS
from flexiznam.errors import DatasetError, FlexilimsError, NameNotUniqueError
@@ -99,64 +101,47 @@ def test_dataset_flexilims_integration(flm_sess):
extra_attributes={},
created="",
flexilims_session=flm_sess,
- genealogy=(
- "mouse_physio_2p",
- "S20211102",
- "R165821_SpheresPermTube",
- "wf_camera",
- ),
+ genealogy=("PZAA15.1a", "temporary_dataset"),
)
st = ds.flexilims_status()
+ assert st == "not online"
+ ds.update_flexilims()
+ st = ds.flexilims_status()
+ assert st == "up-to-date"
+ ds.extra_attributes = dict(test="test")
+ ds.path = "new/path"
+ st = ds.flexilims_status()
assert st == "different"
rep = ds.flexilims_report()
expected = pd.DataFrame(
dict(
- offline={
- "is_raw": "no",
- "path": "fake/path",
- "created": "",
- "metadata_file": "NA",
- "timestamp_file": "NA",
- "video_file": "NA",
- },
- flexilims={
- "is_raw": "yes",
- "created": "2021-11-02 17:03:17",
- "path": "demo_project/mouse_physio_2p/"
- "S20211102/R165821_SpheresPermTube",
- "origin_id": "61ebf94120d82a35f724490d",
- "timestamp_file": "wf_camera_timestamps.csv",
- "video_file": "wf_camera_data.bin",
- "metadata_file": "wf_camera_metadata.txt",
- },
+ offline={"path": "fake/path", "test": "test"},
+ flexilims={"path": "new/path", "test": "NA"},
)
)
assert all(rep.sort_index() == expected.sort_index())
- ds_name = "mouse_physio_2p_S20211102_R165821_SpheresPermTube_wf_camera"
+ ds_name = "PZAA15.1a_temporary_dataset"
+ assert ds.format().name == ds_name
fmt = {
- "path": "fake/path",
"created": "",
"dataset_type": "camera",
+ "genealogy": ("PZAA15.1a", "temporary_dataset"),
"is_raw": "no",
- "name": ds_name,
+ "name": "PZAA15.1a_temporary_dataset",
+ "path": "new/path",
"project": "610989f9a651ff0b6237e0f6",
+ "test": "test",
"type": "dataset",
- "genealogy": (
- "mouse_physio_2p",
- "S20211102",
- "R165821_SpheresPermTube",
- "wf_camera",
- ),
}
- assert ds.format().name == ds_name
assert all(
ds.format().drop("origin_id").sort_index()
== pd.Series(data=fmt, name=ds_name).sort_index()
)
# same with yaml mode
- fmt["extra_attributes"] = {}
+ fmt["extra_attributes"] = {"test": "test"}
fmt["genealogy"] = list(fmt["genealogy"])
+ fmt.pop("test")
ds_yaml = ds.format(mode="yaml")
try:
del ds_yaml["origin_id"]
@@ -164,6 +149,9 @@ def test_dataset_flexilims_integration(flm_sess):
pass
assert ds_yaml == fmt
+ flm_sess.delete(ds.id)
+ assert ds.flexilims_status() == "not online"
+
ds = Dataset(
path="fake/path",
is_raw="no",
@@ -266,12 +254,12 @@ def test_from_origin(flm_sess):
flexilims_session=flm_sess,
)
ds0_bis = Dataset.from_origin(
- origin_type="recording",
- origin_name=origin_name,
- dataset_type="suite2p_rois",
- conflicts="overwrite",
- flexilims_session=flm_sess,
- )
+ origin_type="recording",
+ origin_name=origin_name,
+ dataset_type="suite2p_rois",
+ conflicts="overwrite",
+ flexilims_session=flm_sess,
+ )
assert ds0.id == ds0_bis.id
ds1 = Dataset.from_origin(
@@ -307,14 +295,14 @@ def test_from_origin(flm_sess):
# clean up
for ds in (ds0, ds1):
flm_sess.delete(ds.id)
-
-
+
def test_update_flexilims(flm_sess):
"""This test requires the database to be up-to-date for the physio mouse"""
project = "demo_project"
ds_name = "mouse_physio_2p_S20211102_R165821_SpheresPermTube_wf_camera"
ds = Dataset.from_flexilims(project, name=ds_name, flexilims_session=flm_sess)
+
original_path = ds.path
ds.path = "new/test/path"
with pytest.raises(FlexilimsError) as err:
@@ -341,6 +329,51 @@ def test_update_flexilims(flm_sess):
ds.origin_id = None
ds.update_flexilims(mode="overwrite")
assert err.value.args[0] == "Cannot set origin_id to null"
+ params = dict(
+ harp_bin="some/random/path",
+ di_names=("frame_triggers", "lick_detection", "di2_encoder_initial_state"),
+ verbose=False,
+ )
+ ds.extra_attributes = params
+ ds.origin_id = MOUSE_ID
+ ds.update_flexilims(mode="overwrite")
+ reloaded_ds = Dataset.from_flexilims(
+ project, name=ds_name, flexilims_session=flm_sess
+ )
+ assert reloaded_ds.extra_attributes["di_names"] == list(params["di_names"])
+ assert reloaded_ds.extra_attributes["verbose"] == params["verbose"]
+ # test weirded datatypes
+ ds = Dataset.from_origin(
+ project,
+ origin_id=ds.origin_id,
+ flexilims_session=flm_sess,
+ dataset_type="microscopy",
+ )
+ ds.extra_attributes = dict(
+ nf64=np.float64(1),
+ nf32=np.float32(1),
+ ni64=np.int64(1),
+ ni32=np.int32(1),
+ pathobj=pathlib.Path("some/path"),
+ list=[1, 2, 3],
+ tuple=(1, 2, 3),
+ f=float(1.0),
+ i=int(34),
+ )
+ ds.update_flexilims(mode="overwrite")
+ reloaded_ds = Dataset.from_flexilims(
+ project, name=ds.full_name, flexilims_session=flm_sess
+ )
+ assert reloaded_ds.extra_attributes["nf64"] == 1.0
+ assert reloaded_ds.extra_attributes["nf32"] == 1.0
+ assert reloaded_ds.extra_attributes["ni64"] == 1
+ assert reloaded_ds.extra_attributes["ni32"] == 1
+ assert reloaded_ds.extra_attributes["pathobj"] == str(pathlib.Path("some/path"))
+ assert reloaded_ds.extra_attributes["list"] == [1, 2, 3]
+ assert reloaded_ds.extra_attributes["tuple"] == list((1, 2, 3))
+ assert reloaded_ds.extra_attributes["f"] == 1.0
+ assert reloaded_ds.extra_attributes["i"] == 34
+ flm_sess.delete(ds.id)
def test_dataset_paths(flm_sess):
@@ -353,6 +386,16 @@ def test_dataset_paths(flm_sess):
assert str(ds.path_full) == str(
pathlib.Path(PARAMETERS["data_root"]["raw"] / ds.path)
)
+ ds = Dataset(
+ path="test_project",
+ is_raw="no",
+ dataset_type="camera",
+ extra_attributes={},
+ created="",
+ project="example",
+ )
+ assert ds.path_root != pathlib.Path(PARAMETERS["data_root"]["processed"])
+ assert ds.path_root == flexiznam.get_data_root(which="processed", project="example")
def test_project_project_id(flm_sess):
diff --git a/tests/test_components/tests_schema/test_harp.py b/tests/test_components/tests_schema/test_harp.py
index ed13fa4..e31679f 100644
--- a/tests/test_components/tests_schema/test_harp.py
+++ b/tests/test_components/tests_schema/test_harp.py
@@ -1,14 +1,13 @@
import pytest
from flexiznam.schema.harp_data import HarpData
from tests.tests_resources.data_for_testing import DATA_ROOT
-from pathlib import Path
def test_harp():
folder_genealogy = ["mouse_physio_2p", "S20211102", "R165821_SpheresPermTube"]
data_dir = DATA_ROOT.joinpath(*folder_genealogy)
ds = HarpData.from_folder(data_dir, verbose=False)
- assert len(ds) == 2
+ assert len(ds) == 1
ds_name = "PZAD9.4d_S20211102_R165821_SpheresPermTube_harpmessage"
d = ds[ds_name]
assert d.full_name == folder_genealogy[-1] + "_" + ds_name
diff --git a/tests/test_components/tests_schema/test_sequencing_data.py b/tests/test_components/tests_schema/test_sequencing_data.py
index b4983c9..e066926 100644
--- a/tests/test_components/tests_schema/test_sequencing_data.py
+++ b/tests/test_components/tests_schema/test_sequencing_data.py
@@ -1,6 +1,6 @@
import pytest
from flexiznam.schema.sequencing_data import SequencingData
-from tests.tests_resources.data_for_testing import DATA_ROOT
+from tests.tests_resources.data_for_testing import DATA_ROOT, PROJECT_ID
# Test creation of all dataset types.
#
@@ -13,10 +13,9 @@
def test_from_folder():
raw_folder = DATA_ROOT / "mouse_mapseq_lcm" / "Raw_sequencing"
- ds = SequencingData.from_folder(raw_folder, verbose=False)
+ ds = SequencingData.from_folder(raw_folder, verbose=False, project="demo_project")
assert len(ds) == 2
d = ds["TUR675_S896_l769_R2_001"]
assert d.full_name == "Raw_sequencing_TUR675_S896_l769_R2_001"
assert d.dataset_name == "TUR675_S896_l769_R2_001"
assert d.is_valid()
-
diff --git a/tests/tests_resources/data_for_testing.py b/tests/tests_resources/data_for_testing.py
index 78d5e34..dd6bb0a 100644
--- a/tests/tests_resources/data_for_testing.py
+++ b/tests/tests_resources/data_for_testing.py
@@ -4,7 +4,8 @@
from flexiznam.config import PARAMETERS
-MOUSE_ID = "6437dcb13ded9c65df142a12"
+MOUSE_ID = "6437dcb13ded9c65df142a12" # actual physio2p mouse
+MOUSE_TEMP = "647a1aec7ddb34517470d3e6" # some random mouse where I can change data
TEST_PROJECT = "demo_project"
PROJECT_ID = "610989f9a651ff0b6237e0f6"
SESSION = "mouse_physio_2p_S20211102"