From 8e0043ebe34c4272bae317a069d565d8d520c701 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 26 Sep 2024 14:37:54 +0100 Subject: [PATCH] [merge] dev --- .github/workflows/tests.yml | 2 +- .vscode/settings.json | 2 +- CHANGELOG.md | 50 +- README.md | 4 +- docs/make.bat | 70 +- docs/source/conf.py | 136 +-- docs/source/flexiznam.camp.rst | 2 +- flexiznam/.pre-commit-config.yaml | 28 + flexiznam/camp/sync_data.py | 1063 +++++++---------- flexiznam/cli.py | 60 +- flexiznam/config/config_tools.py | 18 +- flexiznam/config/default_config.py | 2 +- flexiznam/gui/__init__.py | 1 + flexiznam/gui/azure.tcl | 87 ++ flexiznam/gui/flexigui.py | 430 +++++++ flexiznam/main.py | 173 ++- flexiznam/mcms.py | 9 +- flexiznam/schema/__init__.py | 2 + flexiznam/schema/camera_data.py | 18 +- flexiznam/schema/datasets.py | 176 ++- flexiznam/schema/harp_data.py | 21 +- flexiznam/schema/microscopy_data.py | 16 +- flexiznam/schema/onix_data.py | 80 +- flexiznam/schema/scanimage_data.py | 14 +- flexiznam/schema/sequencing_data.py | 16 +- flexiznam/schema/visstim_data.py | 137 +++ notebooks/01-Setup.ipynb | 11 +- notebooks/02-Add Data.ipynb | 7 +- notebooks/03-Using the database.ipynb | 7 +- requirements.txt | 14 +- setup.py | 3 +- tests/ReadMe.md | 24 +- tests/test-results/pytest_in_tests.xml | 2 +- tests/test_2p.py | 1 - tests/test_barseq.py | 1 - tests/test_components/test_cli.py | 1 - tests/test_components/test_main.py | 2 +- tests/test_components/test_utils.py | 1 - .../tests_schema/test_camera_data.py | 1 - .../test_components/tests_schema/test_harp.py | 1 - .../tests_schema/test_microscopy_data.py | 1 - .../tests_schema/test_scanimage_data.py | 1 - .../tests_schema/test_sequencing_data.py | 3 +- .../tests_schema/test_visstim.py | 23 + tests/tests_resources/data_for_testing.py | 3 +- 45 files changed, 1781 insertions(+), 943 deletions(-) create mode 100644 flexiznam/.pre-commit-config.yaml create mode 100644 flexiznam/gui/__init__.py create mode 100644 flexiznam/gui/azure.tcl create mode 100644 flexiznam/gui/flexigui.py create mode 100644 flexiznam/schema/visstim_data.py create mode 100644 tests/test_components/tests_schema/test_visstim.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7e74ab0..0fc460c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: build: runs-on: ubuntu-latest - environment: + environment: name: testing strategy: matrix: diff --git a/.vscode/settings.json b/.vscode/settings.json index 06d557d..1c98995 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -10,4 +10,4 @@ ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, -} \ No newline at end of file +} diff --git a/CHANGELOG.md b/CHANGELOG.md index fb3c93d..01360d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,46 @@ # Change log +## v0.4 + +### Main changes + +- New `SequencingData` class to handle sequencing data +- GUI can now be used to add data to flexilims with `flexiznam gui` +- Add a `conda_envs` field in the config file to use in conjuction with `znamutils` +- `get_children` can work with name or id (instead of id only) +- `check_flexilims_issues` can now add missing paths +- `Dataset.from_origin` has a new `extra_attributes` argument to match online datasets + with specific attributes only. +- `delete_recursively` can delete all children of an entity +- Offline mode using downloaded copy of the database + +### Minor +- `add_mouse` uploads birth and death dates in a human readable format instead. +- Add `conflicts` argument to `add_mouse` to overwrite existing mice +- `get_entities` does not raise warnings anymore if `name` is specified and `datatype` +is not. This is now supported upstream by `flexilims` +- Clearer error message when mouse info cannot be found in MCMS +- `load_param` can print the file used to read config with the `verbose` flag. + +### Bugfixes + +- `update_config` actually adds the new fields (i.e. fields that are in the default +config but not the local config) to the config file + +## v0.3.11 + +### Bugfixes + +- Fix bugs related to raw_data for projects not in main folder +- Add mouse works with alive animals + + +## v0.3.10 + +### Main changes + +- Make `update_entity` safer by crashing if reserved fields are used as attributes. + ## v0.3.11 ### Bugfixes @@ -84,6 +125,7 @@ config but not the local config) to the config file `return_paths=False` - New `OnixData` class to handle Onix data - `get_flexilims_session` can now re-use token from a previous session +- Add a GUI module. ### Minor - More generic `clean_recursively` replaces the `clean_dictionary_recursively`. It @@ -92,8 +134,8 @@ config but not the local config) to the config file - `Dataset.format(mode='yaml')` ensure yaml compatibility. (path to str, tuple to list, etc...) - `add_experimental_session` can be done with `parent_id` (or `parent_name`). -- `add_dataset` can add a dataset to a mouse. -- `get_password` syntax changed to match the `add_password` syntax. +- `add_dataset` can add a dataset to a mouse and does not require genealogy. + ### Bugfixes - Fix [#68](https://github.com/znamlab/flexiznam/issues/68). Dataset.format returns @@ -101,8 +143,10 @@ config but not the local config) to the config file - Fix [#88](https://github.com/znamlab/flexiznam/issues/88). Now make attributes JSON compatible before uploading to flexilims. This will replace special characters in attribute names by `_` in the database. -- Fix [[#102](https://github.com/znamlab/flexiznam/issues/102). `add_mouse` now works +- Fix [#102](https://github.com/znamlab/flexiznam/issues/102). `add_mouse` now works with mice that have special character in their allele. +- `add_recording` and `add_sample` add the value online with the full name (including + genealogy) rather than the short name. ## v0.3.4 diff --git a/README.md b/README.md index 40a70f1..a8f9121 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ To set up the flexilims and mcms integration, the config file must be edited. Fi flexiznam config ``` -This should create a `~/.flexiznam/config.yml` file. Edit it with your favorite text editor to change `flexilims_username`, `mcms_username` and, +This should create a `~/.flexiznam/config.yml` file. Edit it with your favorite text editor to change `flexilims_username`, `mcms_username` and, if neeed `data_root`. You can then add passwords to make it simpler by running (one by one): @@ -78,7 +78,7 @@ If you used `pip -e .` to install, updating can be done with: ``` cd flexiznam -git pull +git pull pip install -e . --upgrade flexiznam config --update ``` diff --git a/docs/make.bat b/docs/make.bat index 6247f7e..9534b01 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -1,35 +1,35 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/conf.py b/docs/source/conf.py index 3295c09..82d2a42 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,68 +1,68 @@ -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) - -import os -import sys - -sys.path.insert(0, os.path.abspath("../..")) - -# -- Project information ----------------------------------------------------- - -project = "flexiznam" -copyright = "2021, Antonin Blot, Petr Znamenskiy" -author = "Antonin Blot, Petr Znamenskiy" - - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.autosectionlabel", - "sphinx.ext.intersphinx", - "sphinx.ext.napoleon", - "sphinx.ext.viewcode", - "sphinx_click", -] - -intersphinx_mapping = { - "python": ("https://docs.python.org/3", None), - "pandas": ("https://pandas.pydata.org/docs/", None), -} - - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "sphinx_rtd_theme" - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + +import os +import sys + +sys.path.insert(0, os.path.abspath("../..")) + +# -- Project information ----------------------------------------------------- + +project = "flexiznam" +copyright = "2021, Antonin Blot, Petr Znamenskiy" +author = "Antonin Blot, Petr Znamenskiy" + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.autosectionlabel", + "sphinx.ext.intersphinx", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx_click", +] + +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "pandas": ("https://pandas.pydata.org/docs/", None), +} + + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] diff --git a/docs/source/flexiznam.camp.rst b/docs/source/flexiznam.camp.rst index 3acc855..1cf4650 100644 --- a/docs/source/flexiznam.camp.rst +++ b/docs/source/flexiznam.camp.rst @@ -8,7 +8,7 @@ Module contents :members: :undoc-members: :show-inheritance: - + flexiznam.camp.sync\_data module -------------------------------- diff --git a/flexiznam/.pre-commit-config.yaml b/flexiznam/.pre-commit-config.yaml new file mode 100644 index 0000000..b9d55f8 --- /dev/null +++ b/flexiznam/.pre-commit-config.yaml @@ -0,0 +1,28 @@ +exclude: 'conf.py' + +# Configuring https://pre-commit.ci/ +ci: + autoupdate_schedule: monthly + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-docstring-first + - id: check-executables-have-shebangs + - id: check-merge-conflict + - id: check-toml + - id: end-of-file-fixer + - id: mixed-line-ending + args: [--fix=lf] + - id: requirements-txt-fixer + - id: trailing-whitespace + - repo: https://github.com/psf/black + rev: 23.3.0 + hooks: + - id: black + - repo: https://github.com/kynan/nbstripout + rev: 0.6.1 + hooks: + - id: nbstripout + args: [--extra-keys=metadata.language_info.version metadata.kernelspec.name metadata.kernelspec.display_name] diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index 006c121..8b3aa3a 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -1,148 +1,228 @@ """File to handle acquisition yaml file and create datasets on flexilims""" -import os import pathlib from pathlib import Path, PurePosixPath import re -import copy +import warnings +import pandas as pd import yaml -from yaml.parser import ParserError import flexiznam as flz -from flexiznam.errors import SyncYmlError, FlexilimsError from flexiznam.schema import Dataset -from flexiznam.config import PARAMETERS -from flexiznam.utils import clean_recursively -def create_yaml( - root_folder, - outfile=None, - project="NOT SPECIFIED", - mouse="NOT SPECIFIED", - overwrite=False, +def create_yaml(folder_to_parse, project, origin_name, output_file, overwrite=False): + """Create a yaml file from a folder + + Args: + folder_to_parse (str): Folder to parse + project (str): Name of the project + origin_name (str): Name of the origin on flexilims + output_file (str): Full path to output yaml. + overwrite (bool, optional): Overwrite output file if it exists. Defaults to False. + """ + output_file = pathlib.Path(output_file) + if (not overwrite) and output_file.exists(): + s = input("File %s already exists. Overwrite (yes/[no])? " % output_file) + if s == "yes": + overwrite = True + else: + raise ( + FileExistsError( + "File %s already exists and overwrite is not allowed" % output_file + ) + ) + folder_to_parse = pathlib.Path(folder_to_parse) + if not folder_to_parse.is_dir(): + raise FileNotFoundError("source_dir %s is not a directory" % folder_to_parse) + + data = create_yaml_dict(folder_to_parse, project, origin_name) + with open(output_file, "w") as f: + yaml.dump(data, f) + + +def create_yaml_dict( + folder_to_parse, + project, + origin_name, + format_yaml=True, ): - """Automatically create a yaml file skeleton + """Create a yaml dict from a folder - Goes recursively in root folder and create a set of nested structure + Recursively parse a folder and create a yaml dict with the structure of the folder. Args: - root_folder (str or Path): base folder, usually a session but can be a sample - outfile (str or Path): target to write the yaml. Do not write file if `None` - project (str): name of the project - mouse (str): name of the mouse - overwrite (bool): overwrite outfile if it exists. Default False. + folder_to_parse (str): Path to the folder to parse + project (str): Name of the project, used as root of the path in the output + origin_name (str): Name of the origin on flexilims. Must be online and have + genealogy set. + format_yaml (bool, optional): Format the output to be yaml compatible if True, + otherwise keep dataset as Dataset object and path as pathlib.Path. Defaults + to True. Returns: - yaml_dict (dict): created structure + dict: Dictionary with the structure of the folder and automatically detected + datasets """ - root_folder = pathlib.Path(root_folder) - assert root_folder.is_dir() - assert isinstance(project, str) - assert isinstance(mouse, str) - yaml_dict = dict(project=project, mouse=mouse) - yaml_dict["session"] = None - # check if we were given a session folder - if re.match(r"S\d*", root_folder.stem): - yaml_dict["session"] = root_folder.stem - - _find_yaml_struct(root_folder, yaml_dict) - - if outfile is not None: - outfile = Path(outfile) - if outfile.is_file() and not overwrite: - raise IOError( - "File %s already exists. Use `overwrite` to replace." % outfile - ) - with open(outfile, "w") as writer: - yaml.dump(yaml_dict, writer) + flm_sess = flz.get_flexilims_session(project_id=project) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + origin = flz.get_entity(name=origin_name, flexilims_session=flm_sess) + assert origin is not None, f"Origin {origin_name} not found in project {project}" + assert "genealogy" in origin, f"Origin {origin_name} has no genealogy" + genealogy = origin["genealogy"] + folder_to_parse = Path(folder_to_parse) + assert folder_to_parse.is_dir(), f"Folder {folder_to_parse} does not exist" + + data = _create_yaml_dict( + level_folder=folder_to_parse, + project=project, + genealogy=genealogy, + format_yaml=format_yaml, + parent_dict=dict(), + ) + if format_yaml: + root_folder = str(folder_to_parse.parent) + else: + root_folder = folder_to_parse.parent + out = dict( + root_folder=root_folder, + origin_name=origin_name, + children=data, + project=project, + ) + return out - return yaml_dict +def parse_yaml( + yaml_data, + root_folder=None, + origin_name=None, + project=None, + format_yaml=True, +): + """Parse a yaml file and check validity -def _find_yaml_struct(path, current_dict): - """Parse one level of yaml structure for autogenerating yaml + This will add datasets to each existing levels of the yaml, but won't create + nested levels Args: - path: path to the dir to parse - current_dict: current level - - Returns: - current_dict (do changes in place) + yaml_file (str): path to the yaml file (or data as dict) + root_folder (str): path to the root folder. If not provided, will be read from + the yaml file. This is the folder that contains the main folder, so "mouse" + for a "session". + origin_name (str): name of the origin on flexilims. If not provided, will be + read from the yaml file + project (str): name of the project. If not provided, will be read from the yaml + file + format_yaml (bool, optional): Format the output to be yaml compatible if True, + otherwise keep dataset as Dataset object and path as pathlib.Path. Defaults + to True. + Returns + dict: yaml dict with datasets added """ - path = Path(path) - for el in os.listdir(path): - if not (path / el).is_dir(): - continue - # match known recording format - m = re.fullmatch(r"R\d\d\d\d\d\d_?(.*)?", el) - if m: - el_type = "recordings" - protocol = m[1] if m[1] is not None else "PROTOCOL NOT SPECIFIED" - else: - el_type = "samples" - subdict = current_dict.get(el_type, {}) - subdict[el] = dict() - if el_type == "recordings": - subdict[el]["protocol"] = protocol - current_dict[el_type] = subdict - _find_yaml_struct(path / el, current_dict[el_type][el]) - return current_dict + if isinstance(yaml_data, str) or isinstance(yaml_data, Path): + with open(yaml_data, "r") as f: + yaml_data = yaml.safe_load(f) + + if root_folder is None: + root_folder = Path(yaml_data["root_folder"]) + assert root_folder.is_dir(), f"Folder {root_folder} does not exist" + + if project is None: + project = yaml_data["project"] + flm_sess = flz.get_flexilims_session(project_id=project) + + if origin_name is None: + origin_name = yaml_data["origin_name"] + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + origin = flz.get_entity(name=origin_name, flexilims_session=flm_sess) + assert origin is not None, f"Origin {origin_name} not found in project {project}" + assert "genealogy" in origin, f"Origin {origin_name} has no genealogy" + genealogy = origin["genealogy"] + + assert len(yaml_data["children"]) == 1, "Parsing only one folder is allowed" + child = list(yaml_data["children"].keys())[0] + data = _create_yaml_dict( + level_folder=root_folder / child, + project=project, + genealogy=genealogy, + format_yaml=format_yaml, + parent_dict=yaml_data["children"], + only_datasets=True, + ) + if format_yaml: + root_folder = str(root_folder) + out = dict( + root_folder=root_folder, + origin_name=origin_name, + children=data, + project=project, + ) + yaml_data, errors = check_yaml_validity( + yaml_data, root_folder, origin_name, project + ) + + return out -def parse_yaml(path_to_yaml, raw_data_folder=None, verbose=True): - """Read an acquisition yaml and create corresponding datasets +def check_yaml_validity(yaml_data, root_folder=None, origin_name=None, project=None): + """Check that a yaml file is valid + + This will check that the genealogy is correct, that the datasets are valid and + that the folder structure is correct Args: - path_to_yaml (str or dict): path to the file to parse or dict of yaml contect - raw_data_folder (str): root folder. Typically project folder or folder - containing the mice subfolders - verbose (bool): print info while looking for datasets + yaml_file (str): path to the yaml file (or data as dict) + root_folder (str): path to the root folder. If not provided, will be read from + the yaml file. This is the folder that contains the main folder, so "mouse" + for a "session". + origin_name (str): name of the origin on flexilims. If not provided, will be + read from the yaml file + project (str): name of the project. If not provided, will be read from the yaml + file Returns: - dict: A yaml dictionary with dataset classes - + dict: same as input yaml_data, but with errors added """ - session_data = _clean_yaml(path_to_yaml) + if isinstance(yaml_data, str) or isinstance(yaml_data, Path): + with open(yaml_data, "r") as f: + yaml_data = yaml.safe_load(f) + if root_folder is not None: + assert yaml_data["root_folder"] == str( + root_folder + ), f"root_folder is {yaml_data['root_folder']}. Expected {root_folder}" + else: + root_folder = yaml_data["root_folder"] - if raw_data_folder is None: - raw_data_folder = flz.get_data_root("raw", session_data["project"]) - raw_data_folder /= session_data["project"] + if project is not None: + assert ( + yaml_data["project"] == project + ), f"project is {yaml_data['project']}. Expected {project}" + else: + project = yaml_data["project"] - if session_data["path"] is not None: - home_folder = Path(raw_data_folder) / session_data["path"] - elif session_data["session"] is not None: - home_folder = ( - Path(raw_data_folder) / session_data["mouse"] / session_data["session"] - ) + if origin_name is not None: + assert ( + yaml_data["origin_name"] == origin_name + ), f"origin_name is {yaml_data['origin_name']}. Expected {origin_name}" else: - home_folder = Path(raw_data_folder) / session_data["mouse"] - # first load datasets in the session level - if not home_folder.is_dir(): - raise FileNotFoundError("Session directory %s does not exist" % home_folder) - session_data["path"] = home_folder - session_data["datasets"] = create_dataset( - dataset_infos=session_data["datasets"], - verbose=verbose, - parent=session_data, - raw_data_folder=raw_data_folder, - error_handling="report", + origin_name = yaml_data["origin_name"] + + flm_sess = flz.get_flexilims_session(project_id=project) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + origin = flz.get_entity(name=origin_name, flexilims_session=flm_sess) + assert hasattr(origin, "genealogy"), f"Origin {origin_name} has no genealogy" + + errors = _check_recursively( + yaml_data["children"], + origin_genealogy=origin["genealogy"], + root_folder=root_folder, + project=project, + genealogy=[], ) - - for rec_name, recording in session_data["recordings"].items(): - recording["path"] = str(PurePosixPath(home_folder / rec_name)) - recording["datasets"] = create_dataset( - dataset_infos=recording["datasets"], - parent=recording, - raw_data_folder=raw_data_folder, - verbose=verbose, - error_handling="report", - ) - - session_data["samples"] = _create_sample_datasets(session_data, raw_data_folder) - - # remove the full path that are not needed - clean_recursively(session_data) - return session_data + return yaml_data, errors def upload_yaml( @@ -156,7 +236,7 @@ def upload_yaml( """Upload data from one yaml to flexilims Args: - source_yaml (str): path to clean yaml + source_yaml (dict or str): path to clean yaml or yaml dict raw_data_folder (str): path to the folder containing the data. Default to data_root['raw'] verbose (bool): print progress information @@ -171,525 +251,298 @@ def upload_yaml( list of names of entities created/updated """ + if isinstance(source_yaml, str) or isinstance(source_yaml, Path): + source_yaml = Path(source_yaml) + with open(source_yaml, "r") as f: + yaml_data = yaml.safe_load(f) + else: + assert isinstance(source_yaml, dict), "source_yaml must be a dict or a path" + yaml_data = source_yaml - output = [] - - # if there are errors, I cannot safely parse the yaml - errors = find_xxerrorxx(yml_file=source_yaml) - if errors: - raise SyncYmlError("The yaml file still contains error. Fix it") - session_data = parse_yaml(source_yaml, raw_data_folder, verbose) - - # parsing can created errors, check again - errors = find_xxerrorxx(yml_file=source_yaml) - if errors: - raise SyncYmlError("Invalid yaml. Use `parse_yaml` and fix errors manually.") + # first find the origin - # first find the mouse if flexilims_session is None: - flexilims_session = flz.get_flexilims_session( - project_id=session_data["project"] - ) - mouse = flz.get_entity( - datatype="mouse", - name=session_data["mouse"], + flexilims_session = flz.get_flexilims_session(project_id=yaml_data["project"]) + + origin_name = yaml_data["origin_name"] + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + origin = flz.get_entity(name=origin_name, flexilims_session=flexilims_session) + assert origin is not None, f"`{origin_name}` not found on flexilims" + if verbose: + print(f"Found origin `{origin_name}` with id `{origin.id}`") + # then upload the data recursively + _upload_yaml_dict( + yaml_data["children"], + origin=origin, + raw_data_folder=raw_data_folder, + log_func=log_func, flexilims_session=flexilims_session, - format_reply=False, + conflicts=conflicts, + verbose=verbose, ) - if mouse is None: - raise SyncYmlError("Mouse not on flexilims. You must add it manually first") - - # deal with the session - if session_data["session"] is not None: - m = re.match(r"S(\d{4})(\d\d)(\d\d)", session_data["session"]) - if m: - date = "-".join(m.groups()) - else: - log_func("Cannot parse date for session %s." % session_data["session"]) - date = "N/A" - - session_data = _trim_paths(session_data, raw_data_folder) - - attributes = session_data.get("attributes", None) - if attributes is None: - attributes = {} - for field in ("path", "notes"): - value = session_data.get(field, None) - if value is not None: - attributes[field] = value - - # if session is not specified, then entries will be added directly as - # children of the mouse - if session_data["session"] is not None: - session = flz.add_experimental_session( - parent_name=mouse["name"], - session_name=session_data["session"], - flexilims_session=flexilims_session, - date=date, - attributes=attributes, - conflicts=conflicts, - ) - root_id = session["id"] - output.append(session["name"]) - else: - root_id = mouse["id"] - # session datasets - # use "overwrite" as mode if conflict is "overwrite", otherwise use "safe" mode - if conflicts == "overwrite": - mode = "overwrite" - else: - mode = "safe" - for ds_name, ds in session_data.get("datasets", {}).items(): - ds.genealogy = [mouse["name"], session_data["session"], ds_name] - ds.project = session_data["project"] - ds.origin_id = root_id - ds.flexilims_session = flexilims_session - ds.update_flexilims(mode=mode) - output.append(ds.full_name) - - # now deal with recordings - for short_rec_name, rec_data in session_data.get("recordings", {}).items(): - rec_name = session["name"] + "_" + short_rec_name - attributes = rec_data.get("attributes", None) - if attributes is None: - attributes = {} - for field in ["notes", "path", "timestamp"]: - value = rec_data.get(field, "") - attributes[field] = value if value is not None else "" - attributes["genealogy"] = session["attributes"]["genealogy"] + [short_rec_name] - rec_type = rec_data.get("recording_type", "unspecified") - if not rec_type: - rec_type = "unspecified" - rec_rep = flz.add_recording( - session_id=root_id, - recording_type=rec_type, - protocol=rec_data.get("protocol", ""), - attributes=attributes, - recording_name=rec_name, - other_relations=None, - flexilims_session=flexilims_session, - conflicts=conflicts, - ) - output.append(rec_rep["name"]) - - # now deal with recordings' datasets - for ds_name, ds in rec_data.get("datasets", {}).items(): - ds.genealogy = [ - mouse["name"], - session_data["session"], - short_rec_name, - ds_name, - ] - ds.project = session_data["project"] - ds.origin_id = rec_rep["id"] - ds.flexilims_session = flexilims_session - ds.update_flexilims(mode=mode) - output.append(ds.full_name) - - # now deal with samples - def add_samples(samples, parent, output=None): - # we'll need a utility function to deal with recursion - for short_sample_name, sample_data in samples.items(): - # we always use `skip` to add samples - sample_rep = flz.add_sample( - parent["id"], - attributes=attributes, - sample_name=short_sample_name, - conflicts="skip", - flexilims_session=flexilims_session, - ) - if output is not None: - output.append(sample_rep["name"]) - # deal with datasets attached to this sample - for ds_name, ds in sample_data.get("datasets", {}).items(): - ds.genealogy = sample_rep["attributes"]["genealogy"] + [ds_name] - ds.project = session_data["project"] - ds.origin_id = sample_rep["id"] - ds.flexilims_session = flexilims_session - ds.update_flexilims(mode="safe") - if output is not None: - output.append(ds.full_name) - # now add child samples - add_samples(sample_data["samples"], sample_rep, output) - - # samples are attached to mice, not sessions - add_samples(session_data["samples"], mouse, output=output) - return output - - -def write_session_data_as_yaml(session_data, target_file=None, overwrite=False): - """Write a session_data dictionary into a yaml - Args: - session_data (dict): dictionary with Dataset instances, as returned by parse_yaml - target_file (str): path to the output file (if None, does not write to disk) - overwrite (bool): replace target file if it already exists (default False) - - Returns: - dict: the pure yaml dictionary - - """ - out_dict = copy.deepcopy(session_data) - clean_recursively(out_dict, keys=["name"], format_dataset=True) - if target_file is not None: - target_file = Path(target_file) - if target_file.exists() and not overwrite: - raise IOError("Target file %s already exists" % target_file) - with open(target_file, "w") as writer: - yaml.dump(out_dict, writer) - # temp check: - with open(target_file, "r") as reader: - writen = yaml.safe_load(reader) - return out_dict - - -def create_dataset( - dataset_infos, parent, raw_data_folder, verbose=True, error_handling="crash" +def _create_yaml_dict( + level_folder, + project, + genealogy, + format_yaml, + parent_dict, + only_datasets=False, ): - """Create dictionary of datasets + """Private function to create a yaml dict from a folder - Args: - dataset_infos: extra information for reading dataset outside of raw_data_folder - or adding optional arguments - parent (dict): yaml dictionary of the parent level - raw_data_folder (str): folder where to look for data - verbose (bool): (True) Print info about dataset found - error_handling (str) `crash` or `report`. When something goes wrong, raise an - error if `crash` otherwise replace the dataset instance by the error - message in the output dictionary + Add a private function to hide the arguments that are used only for recursion + (parent_dict) - Returns: - dict: dictionary of dataset instances + See `create_yaml_dict` for documentation + Args: + level_folder (Path): folder to parse + project (str): name of the project + genealogy (tuple): genealogy of the current folder + format_yaml (bool): format results to be yaml compatible or keep Dataset + and pathlib.Path objects + parent_dict (dict): dict of the parent folder. Used for recursion + only_datasets (bool): only parse datasets, not folders """ - # autoload datasets - datasets = Dataset.from_folder(parent["path"], verbose=verbose) - error_handling = error_handling.lower() - if error_handling not in ("crash", "report"): - raise IOError("error_handling must be `crash` or `report`") - - # check dataset_infos for extra datasets - for ds_name, ds_data in dataset_infos.items(): - ds_path = Path(raw_data_folder) / ds_data["path"] - # first deal with dataset that are not in parent path - ds_class = Dataset.SUBCLASSES.get(ds_data["dataset_type"], Dataset) - if ds_path.is_dir() and (ds_path != parent["path"]): - ds = ds_class.from_folder(ds_path, verbose=verbose) - elif ds_path.is_file() and (ds_path.parent != parent["path"]): - ds = ds_class.from_folder(ds_path.parent, verbose=verbose) - elif not ds_path.exists(): - err_msg = "Dataset not found. Path %s does not exist" % ds_path - if error_handling == "crash": - raise FileNotFoundError(err_msg) - datasets[ds_name] = "XXERRORXX!! " + err_msg - continue + level_folder = Path(level_folder) + assert level_folder.is_dir(), "root_folder must be a directory" + level_name = level_folder.name + if level_name in parent_dict: + level_dict = parent_dict[level_name] + if level_dict is None: + level_dict = dict() + else: + level_dict = dict() + genealogy = list(genealogy) + + m = re.fullmatch(r"R\d\d\d\d\d\d_?(.*)?", level_name) + if m: + if "type" in level_dict: + assert ( + level_dict["type"] == "recording" + ), "Conflicting types, expected recording" else: - # if it is in the parent['path'] folder, I already loaded it. - ds = {k: v for k, v in datasets.items() if isinstance(v, ds_class)} - if not ds: - err_msg = 'Dataset "%s" not found in %s' % (ds_name, ds_path) - if error_handling == "crash": - raise SyncYmlError(err_msg) - datasets[ds_name] = "XXERRORXX!! " + err_msg - - # match by name - if ds_name in ds: - ds = ds[ds_name] - else: # now we're in trouble. - err_msg = 'Could not find dataset "%s". Found "%s" instead' % ( - ds_name, - ", ".join(ds.keys()), + level_dict["type"] = "recording" + if "protocol" not in level_dict: + level_dict["protocol"] = ( + m[1] if m[1] is not None else "XXERRORXX PROTOCOL NOT SPECIFIED" ) - if error_handling == "crash": - raise SyncYmlError(err_msg) - datasets[ds_name] = "XXERRORXX!! " + err_msg - continue - if ds_data["attributes"] is not None: - ds.extra_attributes.update(ds_data["attributes"]) - if ds_data["notes"] is not None: - ds.extra_attributes["notes"] = ds_data["notes"] - datasets[ds_name] = ds - return datasets - - -def _trim_paths(session_data, raw_data_folder): - """Parses paths to make them relative to `raw_data_folder` - - Args: - session_data (dict): dictionary containing children of the session - raw_data_folder (str): part of the path to be omitted from on flexilims - - Returns: - dict: `session_data` after trimming the paths + if "recording_type" not in level_dict: + if "camera" in level_dict["protocol"]: + level_dict["recording_type"] = "camera" + elif "onix" in level_dict["protocol"]: + level_dict["recording_type"] = "ephys" + elif "harp" in level_dict["protocol"]: + level_dict["recording_type"] = "behaviour" + else: + level_dict["recording_type"] = "NOT SPECIFIED" + elif re.fullmatch(r"S\d*", level_name): + if "type" in level_dict: + assert ( + level_dict["type"] == "session" + ), "Conflicting types, expected session" + else: + level_dict["type"] = "session" + else: + if "type" not in level_dict: + level_dict["type"] = "sample" + if "genealogy" in level_dict: + assert level_dict["genealogy"] == genealogy + [ + level_name + ], f"Conflicting genealogy for {level_name}" + else: + level_dict["genealogy"] = genealogy + [level_name] + if "path" not in level_dict: + level_dict["path"] = Path(project, *level_dict["genealogy"]) + if format_yaml: + level_dict["path"] = str(PurePosixPath(level_dict["path"])) + children = dict() if "children" not in level_dict else level_dict["children"] + datasets = Dataset.from_folder(level_folder) + if datasets: + for ds_name, ds in datasets.items(): + if ds_name in children: + warnings.warn(f"Dataset {ds_name} already exists in {level_name}. Skip") + continue + ds.genealogy = genealogy + list(ds.genealogy) + if format_yaml: + # find path root + proot = str(level_folder)[: -len(level_dict["path"])] + ds.path = ds.path.relative_to(proot) + children[ds_name] = ds.format(mode="yaml") + # remove fields that are not needed + for field in ["origin_id", "project_id", "name"]: + children[ds_name].pop(field, None) + children[ds_name]["path"] = str( + PurePosixPath(children[ds_name]["path"]) + ) + else: + children[ds_name] = ds + + if only_datasets: + subfolders = [ + level_folder / n + for n, c in children.items() + if (c is None) or (c.get("type", "unknown") != "dataset") + ] + else: + subfolders = level_folder.glob("*") + + for child in subfolders: + if child.is_dir(): + _create_yaml_dict( + child, + project=project, + genealogy=genealogy + [level_name], + format_yaml=format_yaml, + parent_dict=children, + ) + level_dict["children"] = children + parent_dict[level_name] = level_dict + return parent_dict - """ - def trim_sample_paths(samples): - # utility function to recurse into samples - for sample_name, sample_data in samples.items(): - samples[sample_name]["path"] = str( - PurePosixPath( - Path(samples[sample_name]["path"]).relative_to(raw_data_folder) - ) +def _upload_yaml_dict( + yaml_dict, origin, raw_data_folder, log_func, flexilims_session, conflicts, verbose +): + for entity, entity_data in yaml_dict.items(): + entity_data = entity_data.copy() + children = entity_data.pop("children", {}) + datatype = entity_data.pop("type") + if datatype == "session": + if verbose: + print(f"Adding session `{entity}`") + new_entity = flz.add_experimental_session( + date=entity[1:], + flexilims_session=flexilims_session, + parent_id=origin["id"], + attributes=entity_data, + session_name=entity, + conflicts=conflicts, ) - for ds_name, ds in sample_data.get("datasets", {}).items(): - ds.path = PurePosixPath(ds.path.relative_to(raw_data_folder)) - trim_sample_paths(sample_data["samples"]) - - if raw_data_folder is None: - raw_data_folder = flz.get_data_root("raw", session_data["project"]) - if "path" in session_data.keys(): - session_data["path"] = str( - PurePosixPath(Path(session_data["path"]).relative_to(raw_data_folder)) - ) - for ds_name, ds in session_data.get("datasets", {}).items(): - ds.path = ds.path.relative_to(raw_data_folder) - for rec_name, rec_data in session_data["recordings"].items(): - session_data["recordings"][rec_name]["path"] = str( - PurePosixPath( - Path(session_data["recordings"][rec_name]["path"]).relative_to( - raw_data_folder + elif datatype == "recording": + rec_type = entity_data.pop("recording_type", "Not specified") + prot = entity_data.pop("protocol", "Not specified") + if verbose: + print( + f"Adding recording `{entity}`, type `{rec_type}`, protocol `{prot}`" ) + new_entity = flz.add_recording( + session_id=origin["id"], + recording_type=rec_type, + protocol=prot, + attributes=entity_data, + recording_name=entity, + conflicts=conflicts, + flexilims_session=flexilims_session, + ) + elif datatype == "sample": + if verbose: + print(f"Adding sample `{entity}`") + new_entity = flz.add_sample( + parent_id=origin["id"], + attributes=entity_data, + sample_name=entity, + conflicts=conflicts, + flexilims_session=flexilims_session, + ) + elif datatype == "dataset": + created = entity_data.pop("created") + dataset_type = entity_data.pop("dataset_type") + path = entity_data.pop("path") + is_raw = entity_data.pop("is_raw") + + if verbose: + print(f"Adding dataset `{entity}`, type `{dataset_type}`") + new_entity = flz.add_dataset( + parent_id=origin["id"], + dataset_type=dataset_type, + created=created, + path=path, + is_raw=is_raw, + flexilims_session=flexilims_session, + dataset_name=entity, + attributes=entity_data["extra_attributes"], + strict_validation=False, + conflicts=conflicts, ) - ) - for ds_name, ds in rec_data.get("datasets", {}).items(): - ds.path = PurePosixPath(ds.path.relative_to(raw_data_folder)) - trim_sample_paths(session_data["samples"]) - return session_data - - -def _create_sample_datasets(parent, raw_data_folder): - """Recursively index samples creating a nested dictionary and generate - corresponding datasets - - Args: - parent (dict): Dictionary corresponding to the parent entity - - Return: - dict: dictionary of child samples - """ - if "samples" not in parent: - return dict() - for sample_name, sample in parent["samples"].items(): - sample["path"] = parent["path"] / sample_name - sample["datasets"] = create_dataset( - dataset_infos=sample["datasets"], - parent=sample, + _upload_yaml_dict( + yaml_dict=children, + origin=new_entity, raw_data_folder=raw_data_folder, - error_handling="report", - ) - - # recurse into child samples - sample["samples"] = _create_sample_datasets(sample, raw_data_folder) - # we update in place but we also return the dictionary of samples to make - # for more readable code - return parent["samples"] - - -def _clean_yaml(path_to_yaml): - """Read a yaml file and check that it is correctly formatted - - This does not do any processing, just make sure that I can read the whole yaml and - generate dictionary will all expected fields - - Args: - path_to_yaml (str): path to the YAML file, or dict of the yaml content - - Returns: - dict: nested dictionary containing entries in the YAML file - - """ - - if isinstance(path_to_yaml, dict): - yml_data = path_to_yaml - else: - with open(path_to_yaml, "r") as yml_file: - try: - yml_data = yaml.safe_load(yml_file) - except ParserError as e: - raise IOError("Invalid yaml. Parser returned an error: %s" % e) - - session, nested_levels = _read_level(yml_data) - - session["datasets"] = {} - for dataset_name, dataset_dict in nested_levels["datasets"].items(): - session["datasets"][dataset_name] = _read_dataset( - name=dataset_name, data=dataset_dict - ) - - session["recordings"] = {} - for rec_name, rec_dict in nested_levels["recordings"].items(): - session["recordings"][rec_name] = _read_recording(name=rec_name, data=rec_dict) - - session["samples"] = {} - for sample_name, sample_dict in nested_levels["samples"].items(): - session["samples"][sample_name] = _read_sample( - name=sample_name, data=sample_dict - ) - - return session - - -def _read_sample(name, data): - """Read YAML information corresponding to a sample - - Args: - name (str): the name of the sample - data (dict): data for this sample only - - Returns: - dict: the sample read from the yaml - - """ - if data is None: - data = {} - sample, nested_levels = _read_level( - data, - mandatory_args=(), - optional_args=("notes", "attributes", "path"), - nested_levels=("datasets", "samples"), - ) - sample["name"] = name - - sample["datasets"] = dict() - for ds_name, ds_data in nested_levels["datasets"].items(): - sample["datasets"][ds_name] = _read_dataset(name=ds_name, data=ds_data) - sample["samples"] = dict() - for sample_name, sample_data in nested_levels["samples"].items(): - sample["samples"][sample_name] = _read_sample( - name=sample_name, data=sample_data + log_func=log_func, + flexilims_session=flexilims_session, + conflicts=conflicts, + verbose=verbose, ) - return sample - - -def _read_recording(name, data): - """Read YAML information corresponding to a recording - - Args: - name (str): the name of the recording - data (dict): data for this dataset only - - Returns: - dict: the recording read from the yaml - - """ - recording, datasets = _read_level( - data, - mandatory_args=("protocol",), - optional_args=("notes", "attributes", "path", "recording_type", "timestamp"), - nested_levels=("datasets",), - ) - recording["name"] = name - - # if timestamps is None, the name must start with RHHMMSS - if recording["timestamp"] is None: - m = re.match(r"R(\d\d\d\d\d\d)", recording["name"]) - if not m: - raise SyncYmlError( - "Timestamp must be provided if recording name is not " - "properly formatted" - ) - recording["timestamp"] = m.groups()[0] - recording["datasets"] = dict() - for ds_name, ds_data in datasets["datasets"].items(): - recording["datasets"][ds_name] = _read_dataset(name=ds_name, data=ds_data) - - return recording - -def _read_dataset(name, data): - """Read YAML information corresponding to a dataset - Args: - name (str): the name of the dataset, will be composed with parent names to - generate an identifier - data (dict): data for this dataset only - - Returns: - dict: a formatted dictionary including, 'dataset_type', 'path', 'notes', - 'attributes' and 'name' - - """ - level, _ = _read_level( - data, - mandatory_args=("dataset_type", "path"), - optional_args=( - "notes", - "attributes", - "created", - "is_raw", - "origin_id", - "genealogy", - ), - nested_levels=(), - ) - level["name"] = name - return level - - -def _read_level( - yml_level, - mandatory_args=("project", "mouse", "session"), - optional_args=("path", "notes", "attributes", "genealogy"), - nested_levels=("recordings", "datasets", "samples"), +def _check_recursively( + yaml_data, + origin_genealogy, + root_folder, + project, + genealogy, + fixerrors=False, + errors=None, ): - """Read one layer of the yml file (i.e. a dictionary) - - Args: - yml_level (dict): a dictionary containing the yml level to analyse (and all sublevels) - mandatory_args: arguments that must be in this level - optional_args: arguments that are expected but not mandatory, will be `None` if - absent - nested_levels: name of any nested level that should not be parsed - - Returns: - (tuple): a tuple containing two dictionaries: - level (dict): dictionary of top level attributes - nested_levels (dict): dictionary of nested dictionaries - """ - # make a copy to not change original version - yml_level = yml_level.copy() - is_absent = [m not in yml_level for m in mandatory_args] - if any(is_absent): - absents = ", ".join(["%s" % a for a, m in zip(mandatory_args, is_absent) if m]) - raise SyncYmlError("%s must be provided in the YAML file." % absents) - level = {m: yml_level.pop(m) for m in mandatory_args} - - for opt in optional_args: - level[opt] = yml_level.pop(opt, None) - - nested_levels = {n: yml_level.pop(n, {}) for n in nested_levels} - - # the rest is unexpected - if len(yml_level): - raise SyncYmlError( - "Got unexpected attribute(s): %s" % (", ".join(yml_level.keys())) - ) - return level, nested_levels - + if errors is None: + errors = dict() + root_folder = Path(root_folder) + + for child, child_dict in yaml_data.items(): + fname = root_folder / Path(*genealogy) / child + child_genealogy = genealogy + [child] + + if child_dict["type"] != "dataset": + if not fname.is_dir(): + child_dict["PATH_ERROR"] = f"XXERRORXX folder {fname} does not exist" + errors[fname] = child_dict + else: + data_series = pd.Series(child_dict) + for k, v in data_series.pop("extra_attributes").items(): + data_series[k] = v + data_series.id = None + data_series.name = "_".join(origin_genealogy + child_genealogy) + ds = flz.Dataset.from_dataseries(data_series) + ds.project = project + msg = ds.is_valid(return_reason=True) + if msg: + child_dict["VALIDATION_ERROR"] = f"XXERRORXX {msg}" + errors[fname] = child_dict + + if child_dict["genealogy"] != origin_genealogy + child_genealogy: + if fixerrors: + print(f"Fixing genealogy for {child}") + child_dict["genealogy"] = origin_genealogy + child_genealogy + else: + child_dict["GENEALOGY_ERROR"] = "XXERRORXX genealogy is not correct" + errors[fname] = child_dict + if "children" in child_dict: + _check_recursively( + child_dict["children"], + origin_genealogy, + root_folder, + project, + genealogy=genealogy + [child], + fixerrors=fixerrors, + errors=errors, + ) + return errors -def find_xxerrorxx(yml_file=None, yml_data=None, pattern="XXERRORXX", _output=None): - """Utility to find where things went wrong - Look through a `yml_file` or the corresponding `yml_Data` dictionary recursively. - Returns a dictionary with all entries containing the error `pattern` +if __name__ == "__main__": + example_yml = "/Users/blota/Desktop/test_yaml.yml" + out = parse_yaml(example_yml) + with open("/Users/blota/Desktop/test_yaml_redump.yml", "w") as f: + yaml.dump(out, f) - _output is used for recursive calling. - """ - if yml_file is not None: - if yml_data is not None: - raise IOError("Set either yml_file OR yml_data") - with open(yml_file, "r") as reader: - yml_data = yaml.safe_load(reader) - - if _output is None: - _output = dict() - for k, v in yml_data.items(): - if isinstance(v, dict): - _output = find_xxerrorxx(yml_data=v, pattern=pattern, _output=_output) - elif isinstance(v, str) and (pattern in v): - _output[k] = v - return _output + rel = "blota_onix_pilote/BRAC7448.2d/" + root_folder = Path(flz.PARAMETERS["data_root"]["raw"]) / rel + yaml_file = Path(flz.PARAMETERS["data_root"]["processed"]) / rel / "S20230421.yml" + origin_name = "BRAC7448.2d" + check_yaml_validity(yaml_file, root_folder, origin_name) diff --git a/flexiznam/cli.py b/flexiznam/cli.py index 734cc3f..61e97d7 100644 --- a/flexiznam/cli.py +++ b/flexiznam/cli.py @@ -6,6 +6,17 @@ def cli(): pass +@cli.command() +@click.argument("root_folder", type=click.Path(exists=True), default=".") +def gui(root_folder): + """Start the GUI""" + from flexiznam.gui import flexigui + + app = flexigui.FlexiGui() + app.root_folder.set(root_folder) + app.mainloop() + + @cli.command() @click.option("-p", "--project_id", prompt="Enter the project ID", help="Project ID.") @click.option( @@ -28,6 +39,7 @@ def cli(): show_default=True, ) def add_genealogy(project_id, name, recursive, verbose): + """Add genealogy to a flexilims entity""" from flexiznam import get_flexilims_session flm_sess = get_flexilims_session(project_id=project_id) @@ -60,9 +72,9 @@ def add_mouse( flexilims_username=None, mcms_username=None, ): + """Add a single mouse to a project.""" from flexiznam import main - """Add a single mouse to a project.""" click.echo("Trying to add %s in %s" % (mouse_name, project_id)) main.add_mouse( mouse_name=mouse_name, @@ -168,7 +180,9 @@ def add_password(app, username, password, password_file): @click.option( "-p", "--project", default="NOT SPECIFIED", help="Project name on flexilims." ) -@click.option("-m", "--mouse", default="NOT SPECIFIED", help="Mouse name on flexilims.") +@click.option( + "-o", "--origin", default="NOT SPECIFIED", help="Origin name on flexilims." +) @click.option( "--overwrite/--no-overwrite", default=False, @@ -179,43 +193,20 @@ def add_password(app, username, password, password_file): default=False, help="After creating the yaml skeleton, should I also parse it?", ) -@click.option( - "-r", - "--raw_data_folder", - default=None, - help="Path to the root folder containing raw data. Only used with " "`--process`", -) -def create_yaml( - source_dir, target_yaml, project, mouse, overwrite, process, raw_data_folder -): +def create_yaml(source_dir, target_yaml, project, origin, overwrite, process): """Create a yaml file by looking recursively in `root_dir`""" from flexiznam import camp - import pathlib - target_yaml = pathlib.Path(target_yaml) - if (not overwrite) and target_yaml.exists(): - s = input("File %s already exists. Overwrite (yes/[no])? " % target_yaml) - if s == "yes": - overwrite = True - else: - raise ( - FileExistsError( - "File %s already exists and overwrite is not allowed" % target_yaml - ) - ) - source_dir = pathlib.Path(source_dir) - if not source_dir.is_dir(): - raise FileNotFoundError("source_dir %s is not a directory" % source_dir) - yml_content = camp.sync_data.create_yaml( + camp.sync_data.create_yaml( root_folder=source_dir, - outfile=target_yaml, + output_file=target_yaml, + origin_name=origin, project=project, - mouse=mouse, overwrite=overwrite, ) click.echo("Created yml skeleton in %s" % target_yaml) if process: - raise NotImplementedError + raise NotImplementedError("Process yaml at creation is not implemented yet") @cli.command() @@ -316,7 +307,10 @@ def yaml_to_flexilims(source_yaml, raw_data_folder=None, conflicts=None): @click.option("-t", "--target_file", default=None, help="Path to write csv output.") @click.option("-r", "--root_name", default=None, help="Root entity to start the check.") @click.option("--flexilims_username", default=None, help="Your username on flexilims.") -def check_flexilims_issues(project_id, target_file, root_name, flexilims_username): +@click.option("--add-path/--no-add-path", default=False, help="Add missing paths.") +def check_flexilims_issues( + project_id, target_file, root_name, flexilims_username, add_path +): """Check that database is properly formatted This will check recursively all mice if `root_name` is not provided. Elements that @@ -325,7 +319,6 @@ def check_flexilims_issues(project_id, target_file, root_name, flexilims_usernam """ from flexiznam.main import get_flexilims_session from flexiznam import utils - import pathlib import pandas as pd flexilims_session = get_flexilims_session( @@ -350,3 +343,6 @@ def check_flexilims_issues(project_id, target_file, root_name, flexilims_usernam else: df = pdf df.to_csv(target_file) + if add_path: + print("Adding missing paths") + utils.add_missing_paths(flexilims_session, root_name=root_name) diff --git a/flexiznam/config/config_tools.py b/flexiznam/config/config_tools.py index bf2d373..660a796 100644 --- a/flexiznam/config/config_tools.py +++ b/flexiznam/config/config_tools.py @@ -53,12 +53,24 @@ def _find_file(file_name, config_folder=None, create_if_missing=False): raise ConfigurationError("Cannot find %s" % file_name) -def load_param(param_folder=None, config_file="config.yml"): - """Read parameter file from config folder""" +def load_param(param_folder=None, config_file="config.yml", verbose=False): + """Read parameter file from config folder + + Args: + param_folder (str, optional): folder to look for the file. Defaults to None. + config_file (str, optional): name of the file to find. Defaults to "config.yml". + verbose (bool, optional): if True, print the path of the file being read. + Defaults to False. + + Returns: + dict: parameters read from the file + """ if param_folder is None: param_file = _find_file(config_file) else: param_file = Path(param_folder) / config_file + if verbose: + print(f"Reading parameters from {param_file}") with open(param_file, "r") as yml_file: prm = yaml.safe_load(yml_file) return prm @@ -155,7 +167,7 @@ def update_config( project_ids.update(kwargs["project_ids"]) kwargs["project_ids"] = project_ids all_ids = {} - for (pname, pid) in kwargs["project_ids"].items(): + for pname, pid in kwargs["project_ids"].items(): if pid in all_ids: warnings.warn(f"PIDs {pname} and {all_ids[pid]} have the same ID") all_ids[pid] = pname diff --git a/flexiznam/config/default_config.py b/flexiznam/config/default_config.py index 5e0d4ae..55c7e2b 100644 --- a/flexiznam/config/default_config.py +++ b/flexiznam/config/default_config.py @@ -30,7 +30,7 @@ # list of all datatypes datatypes=["mouse", "session", "recording", "dataset", "sample"], # should we limit the valid dataset types? - enforce_dataset_types=True, + enforce_dataset_types=False, # if we enforce, what is the list of valid dataset type dataset_types=[ "scanimage", diff --git a/flexiznam/gui/__init__.py b/flexiznam/gui/__init__.py new file mode 100644 index 0000000..428404e --- /dev/null +++ b/flexiznam/gui/__init__.py @@ -0,0 +1 @@ +from . import flexigui diff --git a/flexiznam/gui/azure.tcl b/flexiznam/gui/azure.tcl new file mode 100644 index 0000000..fead545 --- /dev/null +++ b/flexiznam/gui/azure.tcl @@ -0,0 +1,87 @@ +# Copyright © 2021 rdbende + +source [file join [file dirname [info script]] theme light.tcl] +source [file join [file dirname [info script]] theme dark.tcl] + +option add *tearOff 0 + +proc set_theme {mode} { + if {$mode == "dark"} { + ttk::style theme use "azure-dark" + + array set colors { + -fg "#ffffff" + -bg "#333333" + -disabledfg "#ffffff" + -disabledbg "#737373" + -selectfg "#ffffff" + -selectbg "#007fff" + } + + ttk::style configure . \ + -background $colors(-bg) \ + -foreground $colors(-fg) \ + -troughcolor $colors(-bg) \ + -focuscolor $colors(-selectbg) \ + -selectbackground $colors(-selectbg) \ + -selectforeground $colors(-selectfg) \ + -insertcolor $colors(-fg) \ + -insertwidth 1 \ + -fieldbackground $colors(-selectbg) \ + -font {"Segoe Ui" 10} \ + -borderwidth 1 \ + -relief flat + + tk_setPalette background [ttk::style lookup . -background] \ + foreground [ttk::style lookup . -foreground] \ + highlightColor [ttk::style lookup . -focuscolor] \ + selectBackground [ttk::style lookup . -selectbackground] \ + selectForeground [ttk::style lookup . -selectforeground] \ + activeBackground [ttk::style lookup . -selectbackground] \ + activeForeground [ttk::style lookup . -selectforeground] + + ttk::style map . -foreground [list disabled $colors(-disabledfg)] + + option add *font [ttk::style lookup . -font] + option add *Menu.selectcolor $colors(-fg) + + } elseif {$mode == "light"} { + ttk::style theme use "azure-light" + + array set colors { + -fg "#000000" + -bg "#ffffff" + -disabledfg "#737373" + -disabledbg "#ffffff" + -selectfg "#ffffff" + -selectbg "#007fff" + } + + ttk::style configure . \ + -background $colors(-bg) \ + -foreground $colors(-fg) \ + -troughcolor $colors(-bg) \ + -focuscolor $colors(-selectbg) \ + -selectbackground $colors(-selectbg) \ + -selectforeground $colors(-selectfg) \ + -insertcolor $colors(-fg) \ + -insertwidth 1 \ + -fieldbackground $colors(-selectbg) \ + -font {"Segoe Ui" 10} \ + -borderwidth 1 \ + -relief flat + + tk_setPalette background [ttk::style lookup . -background] \ + foreground [ttk::style lookup . -foreground] \ + highlightColor [ttk::style lookup . -focuscolor] \ + selectBackground [ttk::style lookup . -selectbackground] \ + selectForeground [ttk::style lookup . -selectforeground] \ + activeBackground [ttk::style lookup . -selectbackground] \ + activeForeground [ttk::style lookup . -selectforeground] + + ttk::style map . -foreground [list disabled $colors(-disabledfg)] + + option add *font [ttk::style lookup . -font] + option add *Menu.selectcolor $colors(-fg) + } +} diff --git a/flexiznam/gui/flexigui.py b/flexiznam/gui/flexigui.py new file mode 100644 index 0000000..eb9b3a8 --- /dev/null +++ b/flexiznam/gui/flexigui.py @@ -0,0 +1,430 @@ +import os +import tkinter as tk +from ttkwidgets import CheckboxTreeview +import yaml +from pathlib import Path +import flexiznam as flz +import flexiznam.camp.sync_data + + +class FlexiGui(tk.Tk): + FLEXILIMS_ONLY_FIELDS = ("children", "project", "origin_id") + RESOURCES = Path(__file__).parent + + def __init__(self): + super().__init__() + + self.title("FlexiZnam GUI") + self.geometry("800x600") + + self.rowconfigure(1, weight=10) + self.columnconfigure(0, weight=1) + self.columnconfigure(1, weight=3) + + self.frames = dict() + self._create_frames() + self._setup_widgets() + self._entity_by_itemid = {} + self.contains_errors = False + self.data = {} + + ############# GUI setup methods ############# + # These methods are used to create the GUI elements + + def _setup_widgets(self): + self._create_frames() + self._create_buttons() + self._create_treeview() + self._create_textview() + self._create_statusbar() + + def _create_frames(self): + self.frames["T"] = tk.Frame(self) + self.frames["T"].grid( + row=0, column=0, padx=10, pady=5, columnspan=2, sticky="nwe" + ) + self.frames["T"].rowconfigure(0, weight=1) + self.frames["T"].rowconfigure(1, weight=1) + for i in range(10): + self.frames["T"].columnconfigure(i, weight=1) + self.frames["T"].columnconfigure(3, weight=10) + self.frames["L"] = tk.Frame(self) + self.frames["L"].grid(row=1, column=0, padx=10, pady=5, sticky="nsew") + self.frames["L"].rowconfigure(0, weight=1) + self.frames["L"].columnconfigure(0, weight=1) + self.frames["R"] = tk.Frame(self) + self.frames["R"].grid(row=1, column=1, padx=10, pady=5, sticky="nsew") + self.frames["R"].rowconfigure(0, weight=1) + self.frames["R"].rowconfigure(1, weight=30) + self.frames["R"].rowconfigure(2, weight=1) + self.frames["R"].columnconfigure(0, weight=1) + self.frames["B"] = tk.Frame(self) + self.frames["B"].grid( + row=2, column=0, columnspan=2, padx=10, pady=5, sticky="sew" + ) + self.frames["B"].rowconfigure(0, weight=1) + self.frames["B"].columnconfigure(0, weight=10) + + def _create_treeview(self): + # Create the Treeview + self.treeview = CheckboxTreeview( + self.frames["L"], + columns=("datatype",), + selectmode="browse", + ) + + self.treeview.grid(row=0, column=0, sticky="nsew") + self.treeview.heading("datatype", text="Datatype") + self.treeview.column("datatype", width=200) + # Bind the Treeview selection event + self.treeview.bind("<>", self.on_treeview_select) + self.treeview.tag_configure("error", background="red") + + def _create_textview(self): + # Create the Text widget + tk.Label(self.frames["R"], text="Selected item:").grid( + row=0, + column=0, + sticky="nw", + ) + self.selected_item = tk.StringVar() + self.selected_item.set("None") + l = tk.Label(self.frames["R"], textvariable=self.selected_item) + l.grid(row=0, column=1, sticky="new") + self.textview = tk.Text(self.frames["R"], width=40, height=10, wrap="none") + self.textview.grid(row=1, column=0, sticky="nsew", columnspan=2) + self.textview.bind("<>", self.on_textview_change) + self.update_item_btn = tk.Button( + self.frames["R"], text="Update item", command=self.update_item + ) + self.update_item_btn.grid(row=2, column=1, sticky="nsw") + + def _create_buttons(self): + topf = self.frames["T"] + self.parse_btn = tk.Button(topf, text="Parse", command=self.parse_folder) + self.parse_btn.grid(row=0, column=0, sticky="w") + self.load_btn = tk.Button(topf, text="Load", command=self.load_yaml) + self.load_btn.grid(row=0, column=1, sticky="w") + self.write_btn = tk.Button(topf, text="Write", command=self.write_yaml) + self.write_btn.grid(row=0, column=2) + + # add project dropdown and label + tk.Label(topf, text="Project:").grid(row=0, column=3, sticky="w") + self.project = tk.StringVar(self) + self.project.set("SELECT PROJECT") + self.proj_ddwn = tk.OptionMenu( + topf, + self.project, + "SELECT PROJECT", + *flz.PARAMETERS["project_ids"].keys(), + ).grid(row=0, column=4, columnspan=3, sticky="w") + self.upload_btn = tk.Button(topf, text="Upload", command=self.upload) + self.upload_btn.grid(row=0, column=7) + + # add conflicts dropdown and label + tk.Label(topf, text="Conflicts:").grid(row=0, column=8, sticky="w") + self.conflicts = tk.StringVar(self) + self.conflicts.set("abort") + self.conflicts_ddwn = tk.OptionMenu( + topf, self.conflicts, "abort", "overwrite", "skip" + ) + self.conflicts_ddwn.grid(row=0, column=9, sticky="w") + self.quit_btn = tk.Button(topf, text="Quit", command=self.quit) + self.quit_btn.grid(row=0, column=10, sticky="e") + + # add origin name and root dir + tk.Label(topf, text="Origin name:").grid(row=1, column=0, sticky="w") + self.origin_name = tk.StringVar(self) + self.origin_name.set("ENTER FLEXILIMS ORIGIN NAME") + self.origin_name_entry = tk.Entry(topf, textvariable=self.origin_name) + self.origin_name_entry.grid(row=1, column=1, columnspan=2, sticky="nsew") + tk.Label(topf, text="Root directory:").grid(row=1, column=3, sticky="w") + self.root_folder = tk.StringVar(self) + self.root_folder.set(os.getcwd()) + self.root_folder_entry = tk.Entry(topf, textvariable=self.root_folder) + self.root_folder_entry.grid(row=1, column=4, columnspan=6, sticky="nsew") + self.chg_dir_btn = tk.Button(topf, text="...", command=self.chg_root_folder) + self.chg_dir_btn.grid(row=1, column=10) + + def _create_statusbar(self): + self.sb_msg = tk.StringVar() + self.statusbar = tk.Label( + self.frames["B"], textvariable=self.sb_msg, bd=1, relief=tk.SUNKEN + ) + self.statusbar.grid(row=0, column=0, sticky="sw") + self.sb_msg.set("Ready") + + ############# GUI update methods ############# + # These methods are used to actually do stuff with the GUI elements + def get_checked_data(self, item=None, checked_data=None): + if checked_data is None: + checked_data = dict(children=dict()) + for k in ["project", "origin_name", "root_folder"]: + checked_data[k] = self.data[k] + + for child in self.treeview.get_children(item=item): + if self.treeview.tag_has("checked", child) or self.treeview.tag_has( + "tristate", child + ): + name, data = self._entity_by_itemid[child] + data = data.copy() + if "children" in data: + data["children"] = {} + data = self.get_checked_data(item=child, checked_data=data) + checked_data["children"][name] = data + return checked_data + + def report(self, message): + self.sb_msg.set(message) + print(message) + self.update() + + def _check_options_are_set(self, options=("project", "origin_name")): + self.report("Checking options") + init_values = dict(project="SELECT", origin_name="ENTER") + for option in options: + value = getattr(self, option).get() + if value.startswith(init_values[option]): + tk.messagebox.showerror("Error", f"Error: enter {option} first!") + return False + self.report("Options are set") + return True + + def parse_folder(self): + if not self._check_options_are_set(): + return + folder = tk.filedialog.askdirectory( + initialdir=self.root_folder.get(), title="Select directory to parse" + ) + self.report(f"Parsing folder {folder}...") + self.root_folder.set(folder) + data = flz.camp.sync_data.create_yaml_dict( + folder_to_parse=folder, + project=self.project.get(), + origin_name=self.origin_name.get(), + format_yaml=True, + ) + self.report("Parsing done. Validating data...") + data, errors = flz.camp.sync_data.check_yaml_validity(data) + self.data = data + self.update_data(remove_unchecked=False) + checked = self.get_checked_data(item=None, checked_data=None) + assert checked == self.data + self.report("Done") + + def chg_root_folder(self): + self.report("Changing root folder") + self.root_folder.set( + tk.filedialog.askdirectory( + initialdir=self.root_folder.get(), title="Select root directory" + ) + ) + + def on_treeview_select(self, event): + item = self.treeview.focus() + name, data = self._entity_by_itemid[item] + self.report(f"Selected item: {name}") + self.selected_item.set(name) + display = {k: v for k, v in data.items() if k not in self.FLEXILIMS_ONLY_FIELDS} + self.textview.delete(1.0, tk.END) + self.textview.insert(tk.END, yaml.dump(display)) + + def on_textview_change(self, event): + return + + def load_yaml(self): + """Load a YAML file and display it in the treeview""" + self.report("Select YAML file to load") + filetypes = (("Yaml files", "*.yml *.yaml"), ("All files", "*.*")) + + filename = tk.filedialog.askopenfilename( + title="Select YAML file to load", filetypes=filetypes + ) + if not filename: + return + self.report(f"Loading YAML file {filename}...") + with open(filename, "r") as f: + self.data = yaml.safe_load(f) + self.update_data() + self.report("Done") + + def update_data(self, name_to_select=None, remove_unchecked=True): + """Update GUI data from self.data + + Args: + name_to_select (str, optional): Name of item to select in treeview. + Defaults to None.""" + self.report("Updating GUI") + if remove_unchecked: + self.data = self.get_checked_data() + self.textview.delete("1.0", tk.END) + self.selected_item.set("None") + self.treeview.delete(*self.treeview.get_children()) + self._entity_by_itemid = {} + + if "project" in self.data: + self.project.set(self.data["project"]) + if "origin_name" in self.data: + self.origin_name.set(self.data["origin_name"]) + if "root_folder" in self.data: + self.root_folder.set(self.data["root_folder"]) + + self.contains_errors = False + self._insert_yaml_data(self.data["children"], name_to_select=name_to_select) + + def _insert_yaml_data(self, data, parent="", name_to_select=None): + assert isinstance(data, dict), "data must be a dict" + for child, child_data in data.items(): + assert "type" in child_data, f"datatype missing for {child}" + dtype = child_data["type"] + item = self.treeview.insert( + parent, + "end", + text=child, + values=[dtype], + open=True, + ) + if any( + [ + v.startswith("XXERRORXX") + for v in child_data.values() + if isinstance(v, str) + ] + ): + self.contains_errors = True + self.report(f"ERROR: {child} contains errors") + self.treeview.item(item, tags=("error", "checked")) + self.treeview.change_state(item, "checked") + self._entity_by_itemid[item] = (child, child_data) + if name_to_select and child == name_to_select: + self.treeview.focus(item) + self.treeview.selection_set(item) + + if "children" in child_data: + self._insert_yaml_data( + child_data["children"], parent=item, name_to_select=name_to_select + ) + + def write_yaml(self): + """Write the current data to a YAML file""" + self.report("Select YAML file to write") + target = tk.filedialog.asksaveasfilename( + initialdir=self.root_folder.get(), + title="Select YAML file to write", + filetypes=(("Yaml files", "*.yml *.yaml"), ("All files", "*.*")), + ) + if not target: + self.report("No file selected. Cancel") + return + data = dict(self.data) + data["project"] = self.project.get() + data["root_folder"] = self.root_folder.get() + with open(target, "w") as f: + yaml.dump(data, f) + self.report('Wrote YAML file "{}"'.format(target)) + + def upload(self): + """Upload data to flexilims""" + print("Uploading data to flexilims") + if not self._check_options_are_set(): + return + + if not self.data: + tk.messagebox.showerror("Error", "No data loaded") + return + + self.report("Validating data...") + self.update_data() + data, errors = flz.camp.sync_data.check_yaml_validity(self.get_checked_data()) + + if self.contains_errors: + tk.messagebox.showerror( + "Error", + "There are still errors. Please fix them before uploading", + ) + return + + data = dict(self.data) + # remove unchecked items + for item in self.treeview.get_children(): + if not self.treeview.tag_has("checked", item): + name, _ = self._entity_by_itemid[item] + self.report(f"Removing item {name}") + data["children"].pop(name) + + data["project"] = self.project.get() + data["root_folder"] = self.root_folder.get() + + self.report("Validating data...") + flz.camp.sync_data.upload_yaml( + source_yaml=data, + raw_data_folder=data["root_folder"], + verbose=True, + log_func=print, + flexilims_session=None, + conflicts=self.conflicts.get(), + ) + self.report("Done") + + def update_item(self): + """Update the selected item with the textview contents""" + + text = self.textview.get(1.0, tk.END) + if not text.strip(): + return + item = self.treeview.focus() + name, original_data = self._entity_by_itemid[item] + self.report(f"Updating item {name}") + assert name == self.selected_item.get(), "Selected item does not match" + data = yaml.safe_load(text) + for field in self.FLEXILIMS_ONLY_FIELDS: + if field in original_data: + data[field] = original_data[field] + self._entity_by_itemid[item] = (name, data) + parents = [] + parent_id = item + while True: + parent = self.treeview.parent(parent_id) + if not parent: + break + parents.append(self._entity_by_itemid[parent][0]) + parent_id = parent + ref = self.data + for parent in reversed(parents): + ref = ref["children"][parent] + ref["children"][name] = data + self.update_data(name_to_select=name) + self.report("Done") + + +if __name__ == "__main__": + + def diffofdict(d1, d2, diff=None, level=""): + """Find differences between 2 dictionary of dictionaries""" + + if diff is None: + diff = [] + all_keys = set(list(d1.keys()) + list(d2.keys())) + for k in all_keys: + level = level + k + "." + if k not in d2: + diff.append(f"{level} (missing in d2)") + elif k not in d1: + diff.append(f"{level} (missing in d1)") + elif isinstance(d1[k], dict): + diff = diffofdict(d1[k], d2[k], diff, level) + elif d1[k] != d2[k]: + diff.append(f"{level} ({d1[k]} != {d2[k]})") + return diff + + app = FlexiGui() + app.root_folder.set( + "/Volumes/lab-znamenskiyp/data/instruments/raw_data/projects/blota_onix_pilote/BRYA142.5d/" + ) + app.origin_name.set("BRYA142.5d") + app.project.set("blota_onix_pilote") + app.mainloop() + df = diffofdict(app.data["children"], app.get_checked_data()["children"]) + a = app.data["children"]["S20230915"]["children"] + b = app.get_checked_data()["children"]["S20230915"]["children"] + a == b diff --git a/flexiznam/main.py b/flexiznam/main.py index 3f24cda..6347b4e 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -9,7 +9,7 @@ import flexiznam import yaml from flexiznam import mcms -from flexiznam.config import PARAMETERS, get_password, add_password +from flexiznam.config import PARAMETERS, get_password from flexiznam.errors import NameNotUniqueError, FlexilimsError, ConfigurationError @@ -45,8 +45,9 @@ def get_data_root(which, project=None, flexilims_session=None): project = flexilims_session.project_id if project not in PARAMETERS["project_ids"]: - project = lookup_project(project, prm=None) - assert project is not None, f"Invalid project {project}" + proj = lookup_project(project, prm=None) + assert proj is not None, f"Invalid project {project}" + project = proj if project in PARAMETERS["project_paths"]: return Path(PARAMETERS["project_paths"][project][which]) @@ -75,6 +76,7 @@ def get_flexilims_session( password=None, reuse_token=True, timeout=10, + offline_mode=None, ): """Open a new flexilims session by creating a new authentication token. @@ -87,7 +89,12 @@ def get_flexilims_session( read from the secrets file, or failing that triggers an input prompt. reuse_token (bool): (optional) if True, try to reuse an existing token timeout (int): (optional) timeout in seconds for the portalocker lock. Default - to 10. + to 10. + offline_mode (bool): (optional) if True, will use an offline session. In this + case, the `offline_yaml` parameter must be set in the config file. If + not provided, will look for the `offline_mode` parameter in the config + file. Default to None. + Returns: :py:class:`flexilims.Flexilims`: Flexilims session object. @@ -97,6 +104,22 @@ def get_flexilims_session( project_id = _format_project(project_id, PARAMETERS) else: warnings.warn("Starting flexilims session without setting project_id.") + + if offline_mode is None: + offline_mode = PARAMETERS.get("offline_mode", False) + + if offline_mode: + yaml_file = PARAMETERS.get("offline_yaml", None) + if yaml_file is None: + raise ConfigurationError("offline_mode is set but offline_yaml is not") + yaml_file = Path(yaml_file) + if not yaml_file.exists(): + yaml_file = get_data_root("processed", project=project_id) / yaml_file + if not yaml_file.exists(): + raise ConfigurationError(f"offline_yaml file {yaml_file} not found") + flexilims_session = flm.OfflineFlexilims(yaml_file, project_id=project_id) + return flexilims_session + if username is None: username = PARAMETERS["flexilims_username"] if password is None: @@ -138,6 +161,7 @@ def add_mouse( mcms_password=None, flexilims_username=None, flexilims_password=None, + conflicts="abort", ): """Check if a mouse is already in the database and add it if it isn't @@ -161,6 +185,8 @@ def add_mouse( flexilims session is not provided flexilims_password (str): [optional] password for flexilims, used only if flexilims session is not provided + conflicts (str): `abort`, `skip`, `update` or `overwrite` (see update_entity for + detailed description) Returns (dict): flexilims reply @@ -174,8 +200,14 @@ def add_mouse( mice_df = get_entities(flexilims_session=flexilims_session, datatype="mouse") if mouse_name in mice_df.index: - print("Mouse already online") - return mice_df.loc[mouse_name] + if conflicts.lower() == "skip": + print("Mouse already online") + return mice_df.loc[mouse_name] + elif conflicts.lower() == "abort": + raise FlexilimsError("Mouse already online") + is_online = True + else: + is_online = False if mouse_info is None: mouse_info = {} @@ -222,12 +254,21 @@ def add_mouse( mouse_info["genealogy"] = [mouse_name] project_name = lookup_project(flexilims_session.project_id, PARAMETERS) mouse_info["path"] = str(Path(project_name) / mouse_name) - resp = flexilims_session.post( - datatype="mouse", - name=mouse_name, - attributes=mouse_info, - strict_validation=False, - ) + if is_online: + resp = update_entity( + datatype="mouse", + name=mouse_name, + mode=conflicts, + attributes=mouse_info, + flexilims_session=flexilims_session, + ) + else: + resp = flexilims_session.post( + datatype="mouse", + name=mouse_name, + attributes=mouse_info, + strict_validation=False, + ) return resp @@ -381,17 +422,16 @@ def add_recording( "conflicts must be `skip`, `abort`, `overwrite` or `update`" ) - experimental_session = get_entity( - datatype="session", flexilims_session=flexilims_session, id=session_id - ) + parent_series = get_entity(flexilims_session=flexilims_session, id=session_id) recording_info = {"recording_type": recording_type, "protocol": protocol} + if attributes is None: attributes = {} if "path" not in attributes: attributes["path"] = str( Path( get_path( - experimental_session["path"], + parent_series["path"], datatype="session", flexilims_session=flexilims_session, ) @@ -407,20 +447,25 @@ def add_recording( recording_info.update(attributes) if recording_name is None: - recording_name = experimental_session["name"] + "_" + protocol + "_0" + recording_name = parent_series["name"] + "_" + protocol + "_0" + + if "genealogy" not in attributes: + attributes["genealogy"] = list(parent_series["genealogy"]) + [recording_name] + rec_full_name = "_".join(attributes["genealogy"]) + online_recording = get_entity( - datatype="recording", name=recording_name, flexilims_session=flexilims_session + datatype="recording", name=rec_full_name, flexilims_session=flexilims_session ) if online_recording is not None: if conflicts.lower() == "skip": - print("A recording named %s already exists" % (recording_name)) + print("A recording named %s already exists" % (rec_full_name)) return online_recording elif conflicts.lower() == "abort": - raise FlexilimsError("A recording named %s already exists" % recording_name) + raise FlexilimsError("A recording named %s already exists" % rec_full_name) else: resp = update_entity( datatype="recording", - name=recording_name, + name=rec_full_name, id=online_recording["id"], origin_id=session_id, mode=conflicts, @@ -432,7 +477,7 @@ def add_recording( resp = flexilims_session.post( datatype="recording", - name=recording_name, + name=rec_full_name, attributes=recording_info, origin_id=session_id, other_relations=other_relations, @@ -589,7 +634,6 @@ def add_dataset( dataset_type, created, path, - genealogy, is_raw="yes", project_id=None, flexilims_session=None, @@ -605,8 +649,6 @@ def add_dataset( dataset_type (str): dataset_type, must be a type define in the config file created (str): date of creation as text, usually in this format: '2021-05-24 14:56:41' path (str): path to the data relative to the project folder - genealogy (tuple): parents of this dataset from the project (excluded) down to - the dataset name itself (included) is_raw (str): `yes` or `no`, used to find the root directory project_id (str): hexadecimal ID or name of the project flexilims_session (:py:class:`flexilims.Flexilims`): authentication @@ -632,11 +674,10 @@ def add_dataset( if conflicts.lower() not in valid_conflicts: raise AttributeError("`conflicts` must be in [%s]" % ", ".join(valid_conflicts)) + parent = get_entity(flexilims_session=flexilims_session, id=parent_id) + if dataset_name is None: - parent_name = get_entity( - flexilims_session=flexilims_session, - id=parent_id, - )["name"] + parent_name = parent["name"] dataset_name = parent_name + "_" + dataset_type + "_0" dataset_info = { @@ -644,7 +685,7 @@ def add_dataset( "created": created, "path": path, "is_raw": is_raw, - "genealogy": genealogy, + "genealogy": list(parent["genealogy"]), } reserved_attributes = ["dataset_type", "created", "path", "is_raw", "genealogy"] if attributes is not None: @@ -656,32 +697,37 @@ def add_dataset( dataset_name = generate_name( "dataset", dataset_name, flexilims_session=flexilims_session ) + dataset_info["genealogy"].append(dataset_name) + dataset_full_name = "_".join(dataset_info["genealogy"]) else: + dataset_info["genealogy"].append(dataset_name) + dataset_full_name = "_".join(dataset_info["genealogy"]) online_version = get_entity( - "dataset", name=dataset_name, flexilims_session=flexilims_session + "dataset", name=dataset_full_name, flexilims_session=flexilims_session ) if online_version is not None: if conflicts.lower() == "abort": - raise FlexilimsError("A dataset named %s already exists" % dataset_name) + raise FlexilimsError( + "A dataset named %s already exists" % dataset_full_name + ) elif conflicts.lower() == "skip": - print("A dataset named %s already exists" % dataset_name) + print("A dataset named %s already exists" % dataset_full_name) return online_version else: resp = update_entity( datatype="dataset", - name=dataset_name, + name=dataset_full_name, id=online_version["id"], origin_id=parent_id, mode=conflicts, attributes=dataset_info, - other_relations=None, flexilims_session=flexilims_session, ) return resp resp = flexilims_session.post( datatype="dataset", - name=dataset_name, + name=dataset_full_name, origin_id=parent_id, attributes=dataset_info, strict_validation=strict_validation, @@ -803,7 +849,7 @@ def get_entities( :py:class:`pandas.DataFrame`: containing all matching entities """ - assert (project_id is not None) or (flexilims_session is not None) + # assert (project_id is not None) or (flexilims_session is not None) if flexilims_session is None: flexilims_session = get_flexilims_session(project_id) results = flexilims_session.get( @@ -947,6 +993,8 @@ def get_id(name, datatype=None, project_id=None, flexilims_session=None): entity = get_entity( datatype=datatype, flexilims_session=flexilims_session, name=name ) + if entity is None: + raise FlexilimsError("Cannot find entity named `%s`" % name) return entity["id"] @@ -1086,10 +1134,27 @@ def get_datasets_recursively( For example, this is useful if you want to retrieve paths to all *scanimage* datasets associated with a given session. + Args: + origin_id (str): hexadecimal ID of the origin session. Not required if + origin_name is provided. + origin_name (str): text name of the origin session. Not required if origin_id + is provided. + origin_series (pandas.Series): series of the origin session. Not required if + origin_id or origin_name is provided. + dataset_type (str): type of the dataseet to filter by. If `None`, + will return all datasets. + filter_datasets (dict): dictionary of key-value pairs to filter datasets by. + parent_type (str): type of the parent entity. If `None`, will return all + filter_parents (dict): dictionary of key-value pairs to filter parents by. + return_paths (bool): if True, return a list of paths + project_id (str): text name of the project. Not required if + `flexilims_session` is provided. + flexilims_session (:py:class:`flexilims.Flexilims`): Flexylims session object + _output (list): internal argument used for recursion. + Returns: dict: Dictionary with direct parent id as keys and lists of associated datasets, or dataset paths as values - """ if origin_series is None: if origin_id is None: @@ -1173,7 +1238,7 @@ def get_datasets( otherwise ensure that only one dataset exists online and return it. return_paths (bool): if True, return a list of paths return_dataseries (bool): if True, a dataframe or a dataseries - _output (list): internal argument used for recursion. + """ @@ -1274,3 +1339,33 @@ def format_results(results, return_list=False): if return_list: return results return pd.DataFrame(results) + + +def delete_recursively(source_id, flexilims_session, do_it=False): + """Delete an entity and all its children recursively + + Args: + source_id (str): hexadecimal ID of the entity to delete + flexilims_session (:py:class:`flexilims.Flexilims`): Flexylims session object + do_it (bool): if True, will actually delete the entities + + Returns: + list: hexadecimal IDs of the entities to delete + + """ + to_delete = [source_id] + + def _get_children(parent_id): + children = get_children( + parent_id=parent_id, flexilims_session=flexilims_session + ) + for _, child in children.iterrows(): + to_delete.append(child["id"]) + if child["type"] != "dataset": + _get_children(child["id"]) + + _get_children(source_id) + if do_it: + for child_id in to_delete: + flexilims_session.delete(child_id) + return to_delete diff --git a/flexiznam/mcms.py b/flexiznam/mcms.py index 2976375..8d756b7 100644 --- a/flexiznam/mcms.py +++ b/flexiznam/mcms.py @@ -1,7 +1,8 @@ import re import pandas as pd +from requests.exceptions import InvalidURL +from flexiznam.config import get_password from pymcms.main import McmsSession -from flexiznam.config import PARAMETERS, get_password def get_mouse_info(mouse_name, username, password=None): @@ -18,7 +19,11 @@ def get_mouse_info(mouse_name, username, password=None): if password is None: password = get_password(username=username, app="mcms") mcms_sess = McmsSession(username=username, password=password) - original_data = mcms_sess.get_animal(name=mouse_name) + try: + original_data = mcms_sess.get_animal(name=mouse_name) + except InvalidURL: + raise InvalidURL(f"Mouse {mouse_name} not found under your PPL") + # convert to camel case for flexlilims mouse_data = {} pattern = re.compile(r"(? 0 - if (not already_processed) or (conflicts == "append"): + + def _create_new_ds( + origin, + base_name, + project, + flexilims_session, + dataset_type, + extra_attributes, + ): + """Inner function to create a new dataset object""" dataset_root = "%s_%s" % (origin["name"], base_name) dataset_name = flz.generate_name( "dataset", @@ -208,23 +241,82 @@ def from_origin( project=project, origin_id=origin["id"], flexilims_session=flexilims_session, + extra_attributes=extra_attributes, ) - else: - if (conflicts is None) or (conflicts == "abort"): - raise flz.errors.DatasetError( - f"Dataset(s) of type {dataset_type} already exist(s):" - + f" {processed.loc[:, 'name']}" + + # CONFLICTS RESOLUTION + # There are no datasets, create one + if not already_processed: + if verbose: + print("No datasets of type %s found. Creating new" % dataset_type) + return _create_new_ds( + origin, + base_name, + project, + flexilims_session, + dataset_type, + extra_attributes, + ) + # There are some datasets of this type already online and we abort + if (conflicts is None) or (conflicts == "abort"): + raise flz.errors.DatasetError( + f"Dataset(s) of type {dataset_type} already exist(s):" + + f" {processed.loc[:, 'name']}" + ) + # Three cases left: skip, append, overwrite + if conflicts == "overwrite": + # If overwrite, ensure there is only one dataset of this type as we + # won't be able to guess which one should be replaced + if len(valid_processed) == 1: + if verbose: + print("Overwriting dataset %s" % valid_processed[0].name) + dataset = Dataset.from_dataseries(dataseries=valid_processed[0]) + dataset.extra_attributes = extra_attributes + return dataset + if len(processed) == 1: + if verbose: + print("Overwriting dataset %s" % processed.iloc[0].name) + dataset = Dataset.from_dataseries(dataseries=processed.iloc[0]) + dataset.extra_attributes = extra_attributes + return dataset + raise flz.errors.NameNotUniqueError( + f"Multiple datasets of type {dataset_type} already exist(s):" + + f" {processed.loc[:, 'name']}" + ) + if conflicts == "skip": + # If skip and we have an exact match, return it + if len(valid_processed) == 1: + if verbose: + print("Skip. Returning dataset %s" % valid_processed[0].name) + return Dataset.from_dataseries(dataseries=valid_processed[0]) + # If there is no match, create a new dataset + if len(valid_processed) == 0: + if verbose: + print("No matching dataset found. Creating new dataset") + return _create_new_ds( + origin, + base_name, + project, + flexilims_session, + dataset_type, + extra_attributes, ) - elif conflicts == "skip" or conflicts == "overwrite": - if len(processed) == 1: - return Dataset.from_dataseries(dataseries=processed.iloc[0]) - else: - raise flz.errors.NameNotUniqueError( - "{} {} datasets with name starting by {} exists for {}, " - "which one to return?".format( - len(processed), dataset_type, base_name, origin["name"] - ) - ) + raise flz.errors.NameNotUniqueError( + f"Multiple datasets of type {dataset_type} already exist(s):" + + f" {processed.loc[:, 'name']}" + ) + if conflicts == "append": + # Create a new dataset + if verbose: + print("Appending dataset") + return _create_new_ds( + origin, + base_name, + project, + flexilims_session, + dataset_type, + extra_attributes, + ) @staticmethod def _format_series_to_kwargs(flm_series): @@ -328,13 +420,16 @@ def __init__( elif project_id is not None: self.project_id = project_id - def is_valid(self): - """ - Dummy method definition. Should be reimplemented in children classes + def is_valid(self, return_reason=False): + """Check if the file path is valid for this dataset + Should be reimplemented in children classes. Should return True if the dataset is found a valid, false otherwise """ - raise NotImplementedError("`is_valid` is not defined for generic datasets") + if not self.path_full.exists(): + msg = f"Path {self.path_full} does not exist" + return msg if return_reason else False + return "" if return_reason else True def associated_files(self, folder=None): """Give a list of all files associated with this dataset @@ -431,10 +526,9 @@ def update_flexilims(self, mode="safe"): dataset_type=self.dataset_type, created=self.created, path=str(PurePosixPath(self.path)), - genealogy=self.genealogy, is_raw="yes" if self.is_raw else "no", project_id=self.project_id, - dataset_name=self.full_name, + dataset_name=self.dataset_name, attributes=attributes, flexilims_session=self.flexilims_session, conflicts="abort", @@ -596,7 +690,7 @@ def flexilims_session(self, value): self._flexilims_session = value if value is None: return - if hasattr(value, "project_id"): + if hasattr(value, "project_id") and (value.project_id is not None): if self.project_id is None: self.project_id = value.project_id elif self.project_id != value.project_id: diff --git a/flexiznam/schema/harp_data.py b/flexiznam/schema/harp_data.py index 70a9f4f..9a53b76 100644 --- a/flexiznam/schema/harp_data.py +++ b/flexiznam/schema/harp_data.py @@ -161,11 +161,18 @@ def csv_files(self): def csv_files(self, value): self.extra_attributes["csv_files"] = str(value) - def is_valid(self): - """Check that video, metadata and timestamps files exist""" - if not (pathlib.Path(self.path) / self.binary_file).exists(): - return False + def is_valid(self, return_reason=False): + """Check that video, metadata and timestamps files exist + + Args: + return_reason (bool): if True, return a string with the reason why the + dataset is not valid + Returns:""" + if not (self.path_full / self.binary_file).exists(): + msg = f"Missing file {self.binary_file}" + return msg if return_reason else False for _, file_path in self.csv_files.items(): - if not (pathlib.Path(self.path) / file_path).exists(): - return False - return True + if not (self.path_full / file_path).exists(): + msg = f"Missing file {file_path}" + return msg if return_reason else False + return "" if return_reason else True diff --git a/flexiznam/schema/microscopy_data.py b/flexiznam/schema/microscopy_data.py index dc0c44b..ab7d99c 100644 --- a/flexiznam/schema/microscopy_data.py +++ b/flexiznam/schema/microscopy_data.py @@ -140,8 +140,14 @@ def __init__( flexilims_session=flexilims_session, ) - def is_valid(self): - """Check that the file exist""" - if not (pathlib.Path(self.path)).exists(): - return False - return True + def is_valid(self, return_reason=False): + """Check that file exist + + Args: + return_reason (bool): if True, return a string with the reason why the + dataset is not valid + Returns:""" + if not self.path_full.exists(): + msg = f"{self.path_full} does not exist" + return msg if return_reason else False + return "" if return_reason else True diff --git a/flexiznam/schema/onix_data.py b/flexiznam/schema/onix_data.py index 57c59b6..1d34ec1 100644 --- a/flexiznam/schema/onix_data.py +++ b/flexiznam/schema/onix_data.py @@ -79,31 +79,34 @@ def from_folder( data = pd.DataFrame(data) output = dict() - for ts, df in data.groupby("timestamp"): - if ( - enforce_validity - and ("rhd2164" not in df.device_name.values) - or ("breakout" not in df.device_name.values) - ): - if verbose: - print( - "Skipping partial onix dataset %s" - % ts.strftime("%Y-%m-%d_%H_%M_%S") - ) - continue - onix_name = "onix_data_%s" % ts.strftime("%Y-%m-%d_%H_%M_%S") - extra_attributes = dict() - for device, dev_df in df.groupby("device_name"): - extra_attributes[device] = {s.subname: s.file for s in dev_df.itertuples()} - output[onix_name] = OnixData( - path=folder, - genealogy=folder_genealogy + (onix_name,), - extra_attributes=extra_attributes, - created=ts.strftime("%Y-%m-%d " "%H:%M:%S"), - flexilims_session=flexilims_session, - project=project, - is_raw=is_raw, - ) + if max(data.timestamp - data.timestamp.min()).total_seconds() > 2: + raise IOError(f"Multiple timestamps found in folder {folder}") + + ts = data.timestamp.min() + if ( + enforce_validity + and ("rhd2164" not in data.device_name.values) + or ("breakout" not in data.device_name.values) + ): + if verbose: + print( + "Skipping partial onix dataset %s" + % ts.strftime("%Y-%m-%d_%H_%M_%S") + ) + return + onix_name = "onix_data_%s" % ts.strftime("%Y-%m-%d_%H_%M_%S") + extra_attributes = dict() + for device, dev_df in data.groupby("device_name"): + extra_attributes[device] = {s.subname: s.file for s in dev_df.itertuples()} + output[onix_name] = OnixData( + path=folder, + genealogy=folder_genealogy + (onix_name,), + extra_attributes=extra_attributes, + created=ts.strftime("%Y-%m-%d " "%H:%M:%S"), + flexilims_session=flexilims_session, + project=project, + is_raw=is_raw, + ) return output def __init__( @@ -150,3 +153,30 @@ def __init__( id=id, flexilims_session=flexilims_session, ) + + def is_valid(self, return_reason=False): + """Check that the onix dataset is valid + + Args: + return_reason (bool): if True, return a string with the reason why the + dataset is not valid. If False, return True or False + + Returns: + bool or str: True if valid, False if not. If return_reason is True, return + a string with the reason why the dataset is not valid.""" + + ndevices = 0 + for device_name in OnixData.DEVICE_NAMES: + if device_name not in self.extra_attributes: + continue + ndevices += 1 + dev_dict = self.extra_attributes[device_name] + for v in dev_dict.values(): + p = self.path_full / v + if not p.exists(): + msg = f"File {p} does not exist" + return msg if return_reason else False + if ndevices == 0: + msg = "No devices found" + return msg if return_reason else False + return "" if return_reason else True diff --git a/flexiznam/schema/scanimage_data.py b/flexiznam/schema/scanimage_data.py index 7c3ab94..9468890 100644 --- a/flexiznam/schema/scanimage_data.py +++ b/flexiznam/schema/scanimage_data.py @@ -233,21 +233,23 @@ def tif_files(self, value): ) self.extra_attributes["tif_files"] = value - def is_valid(self, tif_files=None): + def is_valid(self, return_reason=False, tif_files=None): """Check that associated files exist""" if tif_files is None: tif_files = self.tif_files # checking file one by one is long, compare sets tif_files = set(tif_files) existing_file = { - f for f in os.listdir(self.path) if f.endswith(("tif", ".tiff")) + f for f in os.listdir(self.path_full) if f.endswith(("tif", ".tiff")) } if tif_files - existing_file: - return False + msg = "Some tif files do not exist: %s" % (tif_files - existing_file) + return msg if return_reason else False for _, file_path in self.csv_files.items(): - if not (pathlib.Path(self.path) / file_path).exists(): - return False - return True + if not (self.path_full / file_path).exists(): + msg = "Csv file does not exist: %s" % file_path + return msg if return_reason else False + return "" if return_reason else True def __len__(self): """Number of tif files in the dataset""" diff --git a/flexiznam/schema/sequencing_data.py b/flexiznam/schema/sequencing_data.py index 4f4ab0f..ca453b4 100644 --- a/flexiznam/schema/sequencing_data.py +++ b/flexiznam/schema/sequencing_data.py @@ -1,7 +1,5 @@ import datetime -import os import pathlib -import re import warnings from flexiznam.schema.datasets import Dataset @@ -126,8 +124,14 @@ def __init__( project_id=project_id, ) - def is_valid(self): - """Check that the file exist""" + def is_valid(self, return_reason=False): + """Check that file exist + + Args: + return_reason (bool): if True, return a string with the reason why the + dataset is not valid + Returns:""" if not self.path_full.exists(): - return False - return True + msg = f"{self.path_full} does not exist" + return msg if return_reason else False + return "" if return_reason else True diff --git a/flexiznam/schema/visstim_data.py b/flexiznam/schema/visstim_data.py new file mode 100644 index 0000000..fe0ff8e --- /dev/null +++ b/flexiznam/schema/visstim_data.py @@ -0,0 +1,137 @@ +import datetime +import pathlib + +from flexiznam.schema.datasets import Dataset + + +class VisStimData(Dataset): + DATASET_TYPE = "visstim" + + @classmethod + def from_folder( + cls, + folder, + folder_genealogy=None, + is_raw=None, + verbose=True, + flexilims_session=None, + project=None, + ): + """Create a visual stimulation dataset by loading info from folder + + A visual stimulation dataset is a folder containing at least a `FrameLog.csv` + file and any number of other associated csvs. + + Args: + folder (str): path to the folder + folder_genealogy (tuple): genealogy of the folder, if None assume that + the genealogy is just (folder,), i.e. no parents + is_raw (bool): does this folder contain raw data? + verbose (bool=True): print info about what is found + flexilims_session (flm.Session): session to interact with flexilims + project (str): project ID or name + + Returns: + dict of dataset (flz.schema.harp_data.HarpData) + """ + + csv_files = list(pathlib.Path(folder).glob("*.csv")) + + fnames = [f.name for f in csv_files] + if "framelog.csv" not in [f.lower() for f in fnames]: + raise IOError("Cannot find FrameLog.csv file") + + log_file = [f for f in csv_files if f.name.lower() == "framelog.csv"][0] + if verbose: + print(f"Found FrameLog.csv file: {log_file}") + + if folder_genealogy is None: + folder_genealogy = (pathlib.Path(folder).stem,) + elif isinstance(folder_genealogy, list): + folder_genealogy = tuple(folder_genealogy) + output = {} + extra_attributes = dict(csv_files={f.stem: f.name for f in csv_files}) + genealogy = folder_genealogy + ("visstim",) + created = datetime.datetime.fromtimestamp(log_file.stat().st_mtime) + output["visstim"] = VisStimData( + genealogy=genealogy, + is_raw=is_raw, + path=folder, + extra_attributes=extra_attributes, + created=created.strftime("%Y-%m-%d %H:%M:%S"), + flexilims_session=flexilims_session, + project=project, + ) + return output + + def __init__( + self, + path, + is_raw=None, + genealogy=None, + extra_attributes=None, + created=None, + project=None, + project_id=None, + origin_id=None, + id=None, + flexilims_session=None, + ): + """Create a VisStim dataset + + Args: + path: folder containing the dataset or path to file (valid only for single + file datasets) + is_raw: bool, used to sort in raw and processed subfolders + genealogy (tuple): parents of this dataset from the project (excluded) down to + the dataset name itself (included) + extra_attributes: dict, optional attributes. + created: Creation date, in "YYYY-MM-DD HH:mm:SS" + project: name of the project. Must be in config, can be guessed from + project_id + project_id: hexadecimal code for the project. Must be in config, can be + guessed from project + origin_id: hexadecimal code for the origin on flexilims. + id: hexadecimal code for the dataset on flexilims. + flexilims_session: authentication session to connect to flexilims + + Expected extra_attributes: + csv_files (optional): Dictionary of csv files associated to the binary file. + Keys are identifier provided for convenience, + values are the full file name + """ + + super().__init__( + genealogy=genealogy, + path=path, + is_raw=is_raw, + dataset_type=VisStimData.DATASET_TYPE, + extra_attributes=extra_attributes, + created=created, + project=project, + project_id=project_id, + origin_id=origin_id, + id=id, + flexilims_session=flexilims_session, + ) + + @property + def csv_files(self): + return self.extra_attributes.get("csv_files", None) + + @csv_files.setter + def csv_files(self, value): + self.extra_attributes["csv_files"] = str(value) + + def is_valid(self, return_reason=False): + """Check that all csv files exist + + Args: + return_reason (bool): if True, return a string with the reason why the + dataset is not valid + Returns:""" + for _, file_path in self.csv_files.items(): + if not (self.path_full / file_path).exists(): + msg = f"Missing file {file_path}" + return msg if return_reason else False + return "" if return_reason else True diff --git a/notebooks/01-Setup.ipynb b/notebooks/01-Setup.ipynb index ad0fdc0..f749f7d 100644 --- a/notebooks/01-Setup.ipynb +++ b/notebooks/01-Setup.ipynb @@ -139,9 +139,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "!cat ~/.flexiznam/secret_password.yml" @@ -172,9 +170,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "language": "python" }, "language_info": { "codemirror_mode": { @@ -185,8 +181,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" + "pygments_lexer": "ipython3" }, "toc": { "base_numbering": 1, diff --git a/notebooks/02-Add Data.ipynb b/notebooks/02-Add Data.ipynb index 2b8524f..3b9e85d 100644 --- a/notebooks/02-Add Data.ipynb +++ b/notebooks/02-Add Data.ipynb @@ -241,9 +241,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "language": "python" }, "language_info": { "codemirror_mode": { @@ -254,8 +252,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" + "pygments_lexer": "ipython3" }, "toc": { "base_numbering": 1, diff --git a/notebooks/03-Using the database.ipynb b/notebooks/03-Using the database.ipynb index d839c14..ad48e98 100644 --- a/notebooks/03-Using the database.ipynb +++ b/notebooks/03-Using the database.ipynb @@ -290,9 +290,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "language": "python" }, "language_info": { "codemirror_mode": { @@ -303,8 +301,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" + "pygments_lexer": "ipython3" }, "toc": { "base_numbering": 1, diff --git a/requirements.txt b/requirements.txt index 21b31d0..ad16b0d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,12 @@ -pytest -setuptools -pandas -webbot +black click git+ssh://git@github.com/znamlab/flexilims.git#egg=flexilims +pandas +pytest pyyaml -tifffile +setuptools sphinx -sphinx-rtd-theme sphinx-click -black +sphinx-rtd-theme +tifffile +webbot diff --git a/setup.py b/setup.py index 6644af0..cfcb3e9 100755 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="flexiznam", - version="v0.3.11", + version="v0.4", url="https://github.com/znamlab/flexznam", license="MIT", author="Antonin Blot", @@ -19,6 +19,7 @@ "flexilims @ git+ssh://git@github.com/znamlab/flexilims.git#egg=flexilims", "pymcms @ git+ssh://git@github.com/znamlab/pymcms.git#egg=pymcms", "tifffile", + "ttkwidgets", ], entry_points=""" [console_scripts] diff --git a/tests/ReadMe.md b/tests/ReadMe.md index edaf613..10036bd 100644 --- a/tests/ReadMe.md +++ b/tests/ReadMe.md @@ -4,37 +4,37 @@ Tests are separated in two: -- Main use cases found in the main test folder +- Main use cases found in the main test folder - Test of individual components found in `test_components` - -The `test_components` should cover most of the code but are not user friendly. The + +The `test_components` should cover most of the code but are not user friendly. The main use cases are example scripts that could be use for a real experiment. ## Data -Example datasets are available in the +Example datasets are available in the raw data folder on camp `data/instruments/raw_data/projects/demo_project/`. A corresponding preprocessed folder is also used by tests. ## Notes: ### MCMS -To test the MCMS part, you need a graphical interface and a browser. It is also +To test the MCMS part, you need a graphical interface and a browser. It is also particularly slow. -To avoid having to run it every time, the tests are marked as slow and require the +To avoid having to run it every time, the tests are marked as slow and require the `--runslow` flag to be executed. This is False by default ### Flexilims -For interaction with flexilims, you need to be connected via the crick network -(vpn or from the crick). Neither is easily doable on github workflow. Furthermore -flexilims does not have an API to delete entries. You will have clean it manually +For interaction with flexilims, you need to be connected via the crick network +(vpn or from the crick). Neither is easily doable on github workflow. Furthermore +flexilims does not have an API to delete entries. You will have clean it manually before running the tests -To make things simpler, the tests requiring flexilims or mcms are marked as integration +To make things simpler, the tests requiring flexilims or mcms are marked as integration tests. They can be skipped by running `pytest -m "not integtest"`. -To test the upload to flexilims properly, you need to clear flexilims yourself -(as there is no API to delete stuff). There should be a flag `FLM_IS_WIPED` at +To test the upload to flexilims properly, you need to clear flexilims yourself +(as there is no API to delete stuff). There should be a flag `FLM_IS_WIPED` at the beginning of each test file. If set to `False` (default), then tests involving flexilims will run with `conflicts=skip`. diff --git a/tests/test-results/pytest_in_tests.xml b/tests/test-results/pytest_in_tests.xml index e3838f1..d3c28e6 100644 --- a/tests/test-results/pytest_in_tests.xml +++ b/tests/test-results/pytest_in_tests.xml @@ -1 +1 @@ - \ No newline at end of file + diff --git a/tests/test_2p.py b/tests/test_2p.py index bcf476c..01dbd1d 100644 --- a/tests/test_2p.py +++ b/tests/test_2p.py @@ -20,7 +20,6 @@ TEST_PROJECT, ) import flexiznam as fzn -from flexiznam import camp MOUSE = "mouse_physio_2p" SESSION = "S20211102" diff --git a/tests/test_barseq.py b/tests/test_barseq.py index 7863fca..fd34425 100644 --- a/tests/test_barseq.py +++ b/tests/test_barseq.py @@ -17,7 +17,6 @@ TEST_PROJECT, ) import flexiznam as fzn -from flexiznam import camp MOUSE = "mouse_barseq" YAML = "yaml_automatic_skeleton.yml" diff --git a/tests/test_components/test_cli.py b/tests/test_components/test_cli.py index 2dc0ccb..a1e5f36 100644 --- a/tests/test_components/test_cli.py +++ b/tests/test_components/test_cli.py @@ -1,4 +1,3 @@ -import pytest import pathlib import yaml from click.testing import CliRunner diff --git a/tests/test_components/test_main.py b/tests/test_components/test_main.py index 468ea68..bbd4927 100644 --- a/tests/test_components/test_main.py +++ b/tests/test_components/test_main.py @@ -6,7 +6,7 @@ import pytest import flexiznam as flz import yaml -from flexiznam.config import PARAMETERS, get_password +from flexiznam.config import PARAMETERS from flexiznam.errors import FlexilimsError, NameNotUniqueError from tests.tests_resources.data_for_testing import MOUSE_ID, SESSION diff --git a/tests/test_components/test_utils.py b/tests/test_components/test_utils.py index 9384073..1a37254 100644 --- a/tests/test_components/test_utils.py +++ b/tests/test_components/test_utils.py @@ -2,7 +2,6 @@ import pytest import numpy as np from pathlib import Path -import pandas as pd import tempfile from flexiznam.config import config_tools, DEFAULT_CONFIG from flexiznam import utils diff --git a/tests/test_components/tests_schema/test_camera_data.py b/tests/test_components/tests_schema/test_camera_data.py index 2c3a3d8..c691996 100644 --- a/tests/test_components/tests_schema/test_camera_data.py +++ b/tests/test_components/tests_schema/test_camera_data.py @@ -1,4 +1,3 @@ -import pytest from flexiznam.schema.camera_data import CameraData from flexiznam.schema.datasets import Dataset from tests.tests_resources.data_for_testing import DATA_ROOT, TEST_PROJECT diff --git a/tests/test_components/tests_schema/test_harp.py b/tests/test_components/tests_schema/test_harp.py index e31679f..2a0574a 100644 --- a/tests/test_components/tests_schema/test_harp.py +++ b/tests/test_components/tests_schema/test_harp.py @@ -1,4 +1,3 @@ -import pytest from flexiznam.schema.harp_data import HarpData from tests.tests_resources.data_for_testing import DATA_ROOT diff --git a/tests/test_components/tests_schema/test_microscopy_data.py b/tests/test_components/tests_schema/test_microscopy_data.py index 2eeb05d..4f3a6b1 100644 --- a/tests/test_components/tests_schema/test_microscopy_data.py +++ b/tests/test_components/tests_schema/test_microscopy_data.py @@ -1,4 +1,3 @@ -import pytest from flexiznam.schema.microscopy_data import MicroscopyData from tests.tests_resources.data_for_testing import DATA_ROOT diff --git a/tests/test_components/tests_schema/test_scanimage_data.py b/tests/test_components/tests_schema/test_scanimage_data.py index 675190a..eb07c24 100644 --- a/tests/test_components/tests_schema/test_scanimage_data.py +++ b/tests/test_components/tests_schema/test_scanimage_data.py @@ -1,4 +1,3 @@ -import pytest from flexiznam.schema.scanimage_data import ScanimageData from tests.tests_resources.data_for_testing import DATA_ROOT diff --git a/tests/test_components/tests_schema/test_sequencing_data.py b/tests/test_components/tests_schema/test_sequencing_data.py index e066926..d129f59 100644 --- a/tests/test_components/tests_schema/test_sequencing_data.py +++ b/tests/test_components/tests_schema/test_sequencing_data.py @@ -1,6 +1,5 @@ -import pytest from flexiznam.schema.sequencing_data import SequencingData -from tests.tests_resources.data_for_testing import DATA_ROOT, PROJECT_ID +from tests.tests_resources.data_for_testing import DATA_ROOT # Test creation of all dataset types. # diff --git a/tests/test_components/tests_schema/test_visstim.py b/tests/test_components/tests_schema/test_visstim.py new file mode 100644 index 0000000..97b3f4a --- /dev/null +++ b/tests/test_components/tests_schema/test_visstim.py @@ -0,0 +1,23 @@ +from flexiznam.schema.visstim_data import VisStimData +from tests.tests_resources.data_for_testing import DATA_ROOT + + +def test_vistim(): + folder_genealogy = ["mouse_onix", "S20230915", "R165222_SpheresPermTubeReward"] + data_dir = DATA_ROOT.joinpath(*folder_genealogy) + ds = VisStimData.from_folder(data_dir, verbose=False) + assert len(ds) == 1 + ds_name = "visstim" + d = ds[ds_name] + assert d.full_name == folder_genealogy[-1] + "_" + ds_name + d.project = "demo_project" + assert d.is_valid() + assert len(d.csv_files) == 4 + ds = VisStimData.from_folder( + data_dir, verbose=False, folder_genealogy=folder_genealogy + ) + d = ds[ds_name] + d.project = "demo_project" + assert d.full_name == "_".join(folder_genealogy + [ds_name]) + assert d.is_valid() + assert len(d.csv_files) == 4 diff --git a/tests/tests_resources/data_for_testing.py b/tests/tests_resources/data_for_testing.py index dd6bb0a..d43a6ce 100644 --- a/tests/tests_resources/data_for_testing.py +++ b/tests/tests_resources/data_for_testing.py @@ -1,11 +1,10 @@ """A list of file coming from one experiment""" from pathlib import Path -import datetime from flexiznam.config import PARAMETERS MOUSE_ID = "6437dcb13ded9c65df142a12" # actual physio2p mouse -MOUSE_TEMP = "647a1aec7ddb34517470d3e6" # some random mouse where I can change data +MOUSE_TEMP = "647a1aec7ddb34517470d3e6" # some random mouse where I can change data TEST_PROJECT = "demo_project" PROJECT_ID = "610989f9a651ff0b6237e0f6" SESSION = "mouse_physio_2p_S20211102"