diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 7e74ab0..0fc460c 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -13,7 +13,7 @@ jobs:
   build:
 
     runs-on: ubuntu-latest
-    environment: 
+    environment:
      name: testing
     strategy:
       matrix:
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 06d557d..1c98995 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -10,4 +10,4 @@
     ],
     "python.testing.unittestEnabled": false,
     "python.testing.pytestEnabled": true,
-}
\ No newline at end of file
+}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fb3c93d..01360d8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,46 @@
 # Change log
 
+## v0.4
+
+### Main changes
+
+- New `SequencingData` class to handle sequencing data
+- GUI can now be used to add data to flexilims with `flexiznam gui`
+- Add a `conda_envs` field in the config file to use in conjuction with `znamutils`
+- `get_children` can work with name or id (instead of id only)
+- `check_flexilims_issues` can now add missing paths
+- `Dataset.from_origin` has a new `extra_attributes` argument to match online datasets
+  with specific attributes only.
+- `delete_recursively` can delete all children of an entity
+- Offline mode using downloaded copy of the database
+
+### Minor
+- `add_mouse` uploads birth and death dates in a human readable format instead.
+- Add `conflicts` argument to `add_mouse` to overwrite existing mice
+- `get_entities` does not raise warnings anymore if `name` is specified and `datatype`
+is not. This is now supported upstream by `flexilims`
+- Clearer error message when mouse info cannot be found in MCMS
+- `load_param` can print the file used to read config with the `verbose` flag.
+
+### Bugfixes
+
+- `update_config` actually adds the new fields (i.e. fields that are in the default
+config but not the local config) to the config file
+
+## v0.3.11
+
+### Bugfixes
+
+- Fix bugs related to raw_data for projects not in main folder
+- Add mouse works with alive animals
+
+
+## v0.3.10
+
+### Main changes
+
+- Make `update_entity` safer by crashing if reserved fields are used as attributes.
+
 ## v0.3.11
 
 ### Bugfixes
@@ -84,6 +125,7 @@ config but not the local config) to the config file
   `return_paths=False`
 - New `OnixData` class to handle Onix data
 - `get_flexilims_session` can now re-use token from a previous session
+- Add a GUI module.
 
 ### Minor
 - More generic `clean_recursively` replaces the `clean_dictionary_recursively`. It
@@ -92,8 +134,8 @@ config but not the local config) to the config file
 - `Dataset.format(mode='yaml')` ensure yaml compatibility. (path to str, tuple to list,
   etc...)
 - `add_experimental_session` can be done with `parent_id` (or `parent_name`).
-- `add_dataset` can add a dataset to a mouse.
-- `get_password` syntax changed to match the `add_password` syntax.
+- `add_dataset` can add a dataset to a mouse and does not require genealogy.
+
 
 ### Bugfixes
 - Fix [#68](https://github.com/znamlab/flexiznam/issues/68). Dataset.format returns
@@ -101,8 +143,10 @@ config but not the local config) to the config file
 - Fix [#88](https://github.com/znamlab/flexiznam/issues/88). Now make attributes JSON
   compatible before uploading to flexilims. This will replace special characters in
   attribute names by `_` in the database.
-- Fix [[#102](https://github.com/znamlab/flexiznam/issues/102). `add_mouse` now works
+- Fix [#102](https://github.com/znamlab/flexiznam/issues/102). `add_mouse` now works
   with mice that have special character in their allele.
+- `add_recording` and `add_sample` add the value online with the full name (including
+  genealogy) rather than the short name.
 
 ## v0.3.4
 
diff --git a/README.md b/README.md
index 40a70f1..a8f9121 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ To set up the flexilims and mcms integration, the config file must be edited. Fi
 flexiznam config
 ```
 
-This should create a `~/.flexiznam/config.yml` file. Edit it with your favorite text editor to change `flexilims_username`, `mcms_username` and, 
+This should create a `~/.flexiznam/config.yml` file. Edit it with your favorite text editor to change `flexilims_username`, `mcms_username` and,
 if neeed `data_root`.
 
 You can then add passwords to make it simpler by running (one by one):
@@ -78,7 +78,7 @@ If you used `pip -e .` to install, updating can be done with:
 
 ```
 cd flexiznam
-git pull 
+git pull
 pip install -e . --upgrade
 flexiznam config --update
 ```
diff --git a/docs/make.bat b/docs/make.bat
index 6247f7e..9534b01 100644
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -1,35 +1,35 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=source
-set BUILDDIR=build
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-
-:end
-popd
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 3295c09..82d2a42 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,68 +1,68 @@
-# Configuration file for the Sphinx documentation builder.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# https://www.sphinx-doc.org/en/master/usage/configuration.html
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
-
-import os
-import sys
-
-sys.path.insert(0, os.path.abspath("../.."))
-
-# -- Project information -----------------------------------------------------
-
-project = "flexiznam"
-copyright = "2021, Antonin Blot, Petr Znamenskiy"
-author = "Antonin Blot, Petr Znamenskiy"
-
-
-# -- General configuration ---------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
-    "sphinx.ext.autodoc",
-    "sphinx.ext.autosectionlabel",
-    "sphinx.ext.intersphinx",
-    "sphinx.ext.napoleon",
-    "sphinx.ext.viewcode",
-    "sphinx_click",
-]
-
-intersphinx_mapping = {
-    "python": ("https://docs.python.org/3", None),
-    "pandas": ("https://pandas.pydata.org/docs/", None),
-}
-
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ["_templates"]
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = []
-
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = "sphinx_rtd_theme"
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ["_static"]
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+# import os
+# import sys
+# sys.path.insert(0, os.path.abspath('.'))
+
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath("../.."))
+
+# -- Project information -----------------------------------------------------
+
+project = "flexiznam"
+copyright = "2021, Antonin Blot, Petr Znamenskiy"
+author = "Antonin Blot, Petr Znamenskiy"
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosectionlabel",
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+    "sphinx_click",
+]
+
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3", None),
+    "pandas": ("https://pandas.pydata.org/docs/", None),
+}
+
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = "sphinx_rtd_theme"
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
diff --git a/docs/source/flexiznam.camp.rst b/docs/source/flexiznam.camp.rst
index 3acc855..1cf4650 100644
--- a/docs/source/flexiznam.camp.rst
+++ b/docs/source/flexiznam.camp.rst
@@ -8,7 +8,7 @@ Module contents
    :members:
    :undoc-members:
    :show-inheritance:
-   
+
 flexiznam.camp.sync\_data module
 --------------------------------
 
diff --git a/flexiznam/.pre-commit-config.yaml b/flexiznam/.pre-commit-config.yaml
new file mode 100644
index 0000000..b9d55f8
--- /dev/null
+++ b/flexiznam/.pre-commit-config.yaml
@@ -0,0 +1,28 @@
+exclude: 'conf.py'
+
+# Configuring https://pre-commit.ci/
+ci:
+    autoupdate_schedule: monthly
+
+repos:
+    - repo: https://github.com/pre-commit/pre-commit-hooks
+      rev: v4.4.0
+      hooks:
+          - id: check-docstring-first
+          - id: check-executables-have-shebangs
+          - id: check-merge-conflict
+          - id: check-toml
+          - id: end-of-file-fixer
+          - id: mixed-line-ending
+            args: [--fix=lf]
+          - id: requirements-txt-fixer
+          - id: trailing-whitespace
+    - repo: https://github.com/psf/black
+      rev: 23.3.0
+      hooks:
+          - id: black
+    - repo: https://github.com/kynan/nbstripout
+      rev: 0.6.1
+      hooks:
+          - id: nbstripout
+            args: [--extra-keys=metadata.language_info.version metadata.kernelspec.name metadata.kernelspec.display_name]
diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py
index 006c121..8b3aa3a 100644
--- a/flexiznam/camp/sync_data.py
+++ b/flexiznam/camp/sync_data.py
@@ -1,148 +1,228 @@
 """File to handle acquisition yaml file and create datasets on flexilims"""
-import os
 import pathlib
 from pathlib import Path, PurePosixPath
 import re
-import copy
+import warnings
+import pandas as pd
 import yaml
-from yaml.parser import ParserError
 
 import flexiznam as flz
-from flexiznam.errors import SyncYmlError, FlexilimsError
 from flexiznam.schema import Dataset
-from flexiznam.config import PARAMETERS
-from flexiznam.utils import clean_recursively
 
 
-def create_yaml(
-    root_folder,
-    outfile=None,
-    project="NOT SPECIFIED",
-    mouse="NOT SPECIFIED",
-    overwrite=False,
+def create_yaml(folder_to_parse, project, origin_name, output_file, overwrite=False):
+    """Create a yaml file from a folder
+
+    Args:
+        folder_to_parse (str): Folder to parse
+        project (str): Name of the project
+        origin_name (str): Name of the origin on flexilims
+        output_file (str): Full path to output yaml.
+        overwrite (bool, optional): Overwrite output file if it exists. Defaults to False.
+    """
+    output_file = pathlib.Path(output_file)
+    if (not overwrite) and output_file.exists():
+        s = input("File %s already exists. Overwrite (yes/[no])? " % output_file)
+        if s == "yes":
+            overwrite = True
+        else:
+            raise (
+                FileExistsError(
+                    "File %s already exists and overwrite is not allowed" % output_file
+                )
+            )
+    folder_to_parse = pathlib.Path(folder_to_parse)
+    if not folder_to_parse.is_dir():
+        raise FileNotFoundError("source_dir %s is not a directory" % folder_to_parse)
+
+    data = create_yaml_dict(folder_to_parse, project, origin_name)
+    with open(output_file, "w") as f:
+        yaml.dump(data, f)
+
+
+def create_yaml_dict(
+    folder_to_parse,
+    project,
+    origin_name,
+    format_yaml=True,
 ):
-    """Automatically create a yaml file skeleton
+    """Create a yaml dict from a folder
 
-    Goes recursively in root folder and create a set of nested structure
+    Recursively parse a folder and create a yaml dict with the structure of the folder.
 
     Args:
-        root_folder (str or Path): base folder, usually a session but can be a sample
-        outfile (str or Path): target to write the yaml. Do not write file if `None`
-        project (str): name of the project
-        mouse (str): name of the mouse
-        overwrite (bool): overwrite outfile if it exists. Default False.
+        folder_to_parse (str): Path to the folder to parse
+        project (str): Name of the project, used as root of the path in the output
+        origin_name (str): Name of the origin on flexilims. Must be online and have
+            genealogy set.
+        format_yaml (bool, optional): Format the output to be yaml compatible if True,
+            otherwise keep dataset as Dataset object and path as pathlib.Path. Defaults
+            to True.
 
     Returns:
-        yaml_dict (dict): created structure
+        dict: Dictionary with the structure of the folder and automatically detected
+            datasets
     """
-    root_folder = pathlib.Path(root_folder)
-    assert root_folder.is_dir()
-    assert isinstance(project, str)
-    assert isinstance(mouse, str)
-    yaml_dict = dict(project=project, mouse=mouse)
-    yaml_dict["session"] = None
-    # check if we were given a session folder
-    if re.match(r"S\d*", root_folder.stem):
-        yaml_dict["session"] = root_folder.stem
-
-    _find_yaml_struct(root_folder, yaml_dict)
-
-    if outfile is not None:
-        outfile = Path(outfile)
-        if outfile.is_file() and not overwrite:
-            raise IOError(
-                "File %s already exists. Use `overwrite` to replace." % outfile
-            )
-        with open(outfile, "w") as writer:
-            yaml.dump(yaml_dict, writer)
+    flm_sess = flz.get_flexilims_session(project_id=project)
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        origin = flz.get_entity(name=origin_name, flexilims_session=flm_sess)
+    assert origin is not None, f"Origin {origin_name} not found in project {project}"
+    assert "genealogy" in origin, f"Origin {origin_name} has no genealogy"
+    genealogy = origin["genealogy"]
+    folder_to_parse = Path(folder_to_parse)
+    assert folder_to_parse.is_dir(), f"Folder {folder_to_parse} does not exist"
+
+    data = _create_yaml_dict(
+        level_folder=folder_to_parse,
+        project=project,
+        genealogy=genealogy,
+        format_yaml=format_yaml,
+        parent_dict=dict(),
+    )
+    if format_yaml:
+        root_folder = str(folder_to_parse.parent)
+    else:
+        root_folder = folder_to_parse.parent
+    out = dict(
+        root_folder=root_folder,
+        origin_name=origin_name,
+        children=data,
+        project=project,
+    )
+    return out
 
-    return yaml_dict
 
+def parse_yaml(
+    yaml_data,
+    root_folder=None,
+    origin_name=None,
+    project=None,
+    format_yaml=True,
+):
+    """Parse a yaml file and check validity
 
-def _find_yaml_struct(path, current_dict):
-    """Parse one level of yaml structure for autogenerating yaml
+    This will add datasets to each existing levels of the yaml, but won't create
+    nested levels
 
     Args:
-        path: path to the dir to parse
-        current_dict: current level
-
-    Returns:
-        current_dict (do changes in place)
+        yaml_file (str): path to the yaml file (or data as dict)
+        root_folder (str): path to the root folder. If not provided, will be read from
+            the yaml file. This is the folder that contains the main folder, so "mouse"
+            for a  "session".
+        origin_name (str): name of the origin on flexilims. If not provided, will be
+            read from the yaml file
+        project (str): name of the project. If not provided, will be read from the yaml
+            file
+        format_yaml (bool, optional): Format the output to be yaml compatible if True,
+            otherwise keep dataset as Dataset object and path as pathlib.Path. Defaults
+            to True.
+    Returns
+        dict: yaml dict with datasets added
     """
-    path = Path(path)
-    for el in os.listdir(path):
-        if not (path / el).is_dir():
-            continue
-        # match known recording format
-        m = re.fullmatch(r"R\d\d\d\d\d\d_?(.*)?", el)
-        if m:
-            el_type = "recordings"
-            protocol = m[1] if m[1] is not None else "PROTOCOL NOT SPECIFIED"
-        else:
-            el_type = "samples"
-        subdict = current_dict.get(el_type, {})
-        subdict[el] = dict()
-        if el_type == "recordings":
-            subdict[el]["protocol"] = protocol
-        current_dict[el_type] = subdict
-        _find_yaml_struct(path / el, current_dict[el_type][el])
-    return current_dict
+    if isinstance(yaml_data, str) or isinstance(yaml_data, Path):
+        with open(yaml_data, "r") as f:
+            yaml_data = yaml.safe_load(f)
+
+    if root_folder is None:
+        root_folder = Path(yaml_data["root_folder"])
+    assert root_folder.is_dir(), f"Folder {root_folder} does not exist"
+
+    if project is None:
+        project = yaml_data["project"]
+    flm_sess = flz.get_flexilims_session(project_id=project)
+
+    if origin_name is None:
+        origin_name = yaml_data["origin_name"]
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        origin = flz.get_entity(name=origin_name, flexilims_session=flm_sess)
+    assert origin is not None, f"Origin {origin_name} not found in project {project}"
+    assert "genealogy" in origin, f"Origin {origin_name} has no genealogy"
+    genealogy = origin["genealogy"]
+
+    assert len(yaml_data["children"]) == 1, "Parsing only one folder is allowed"
+    child = list(yaml_data["children"].keys())[0]
+    data = _create_yaml_dict(
+        level_folder=root_folder / child,
+        project=project,
+        genealogy=genealogy,
+        format_yaml=format_yaml,
+        parent_dict=yaml_data["children"],
+        only_datasets=True,
+    )
+    if format_yaml:
+        root_folder = str(root_folder)
+    out = dict(
+        root_folder=root_folder,
+        origin_name=origin_name,
+        children=data,
+        project=project,
+    )
+    yaml_data, errors = check_yaml_validity(
+        yaml_data, root_folder, origin_name, project
+    )
+
+    return out
 
 
-def parse_yaml(path_to_yaml, raw_data_folder=None, verbose=True):
-    """Read an acquisition yaml and create corresponding datasets
+def check_yaml_validity(yaml_data, root_folder=None, origin_name=None, project=None):
+    """Check that a yaml file is valid
+
+    This will check that the genealogy is correct, that the datasets are valid and
+    that the folder structure is correct
 
     Args:
-        path_to_yaml (str or dict): path to the file to parse or dict of yaml contect
-        raw_data_folder (str): root folder. Typically project folder or folder
-            containing the mice subfolders
-        verbose (bool): print info while looking for datasets
+        yaml_file (str): path to the yaml file (or data as dict)
+        root_folder (str): path to the root folder. If not provided, will be read from
+            the yaml file. This is the folder that contains the main folder, so "mouse"
+            for a  "session".
+        origin_name (str): name of the origin on flexilims. If not provided, will be
+            read from the yaml file
+        project (str): name of the project. If not provided, will be read from the yaml
+            file
 
     Returns:
-        dict: A yaml dictionary with dataset classes
-
+        dict: same as input yaml_data, but with errors added
     """
-    session_data = _clean_yaml(path_to_yaml)
+    if isinstance(yaml_data, str) or isinstance(yaml_data, Path):
+        with open(yaml_data, "r") as f:
+            yaml_data = yaml.safe_load(f)
+    if root_folder is not None:
+        assert yaml_data["root_folder"] == str(
+            root_folder
+        ), f"root_folder is {yaml_data['root_folder']}. Expected {root_folder}"
+    else:
+        root_folder = yaml_data["root_folder"]
 
-    if raw_data_folder is None:
-        raw_data_folder = flz.get_data_root("raw", session_data["project"])
-        raw_data_folder /= session_data["project"]
+    if project is not None:
+        assert (
+            yaml_data["project"] == project
+        ), f"project is {yaml_data['project']}. Expected {project}"
+    else:
+        project = yaml_data["project"]
 
-    if session_data["path"] is not None:
-        home_folder = Path(raw_data_folder) / session_data["path"]
-    elif session_data["session"] is not None:
-        home_folder = (
-            Path(raw_data_folder) / session_data["mouse"] / session_data["session"]
-        )
+    if origin_name is not None:
+        assert (
+            yaml_data["origin_name"] == origin_name
+        ), f"origin_name is {yaml_data['origin_name']}. Expected {origin_name}"
     else:
-        home_folder = Path(raw_data_folder) / session_data["mouse"]
-        # first load datasets in the session level
-    if not home_folder.is_dir():
-        raise FileNotFoundError("Session directory %s does not exist" % home_folder)
-    session_data["path"] = home_folder
-    session_data["datasets"] = create_dataset(
-        dataset_infos=session_data["datasets"],
-        verbose=verbose,
-        parent=session_data,
-        raw_data_folder=raw_data_folder,
-        error_handling="report",
+        origin_name = yaml_data["origin_name"]
+
+    flm_sess = flz.get_flexilims_session(project_id=project)
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        origin = flz.get_entity(name=origin_name, flexilims_session=flm_sess)
+    assert hasattr(origin, "genealogy"), f"Origin {origin_name} has no genealogy"
+
+    errors = _check_recursively(
+        yaml_data["children"],
+        origin_genealogy=origin["genealogy"],
+        root_folder=root_folder,
+        project=project,
+        genealogy=[],
     )
-
-    for rec_name, recording in session_data["recordings"].items():
-        recording["path"] = str(PurePosixPath(home_folder / rec_name))
-        recording["datasets"] = create_dataset(
-            dataset_infos=recording["datasets"],
-            parent=recording,
-            raw_data_folder=raw_data_folder,
-            verbose=verbose,
-            error_handling="report",
-        )
-
-    session_data["samples"] = _create_sample_datasets(session_data, raw_data_folder)
-
-    # remove the full path that are not needed
-    clean_recursively(session_data)
-    return session_data
+    return yaml_data, errors
 
 
 def upload_yaml(
@@ -156,7 +236,7 @@ def upload_yaml(
     """Upload data from one yaml to flexilims
 
     Args:
-        source_yaml (str): path to clean yaml
+        source_yaml (dict or str): path to clean yaml or yaml dict
         raw_data_folder (str): path to the folder containing the data. Default to
             data_root['raw']
         verbose (bool): print progress information
@@ -171,525 +251,298 @@ def upload_yaml(
         list of names of entities created/updated
 
     """
+    if isinstance(source_yaml, str) or isinstance(source_yaml, Path):
+        source_yaml = Path(source_yaml)
+        with open(source_yaml, "r") as f:
+            yaml_data = yaml.safe_load(f)
+    else:
+        assert isinstance(source_yaml, dict), "source_yaml must be a dict or a path"
+        yaml_data = source_yaml
 
-    output = []
-
-    # if there are errors, I cannot safely parse the yaml
-    errors = find_xxerrorxx(yml_file=source_yaml)
-    if errors:
-        raise SyncYmlError("The yaml file still contains error. Fix it")
-    session_data = parse_yaml(source_yaml, raw_data_folder, verbose)
-
-    # parsing can created errors, check again
-    errors = find_xxerrorxx(yml_file=source_yaml)
-    if errors:
-        raise SyncYmlError("Invalid yaml. Use `parse_yaml` and fix errors manually.")
+    # first find the origin
 
-    # first find the mouse
     if flexilims_session is None:
-        flexilims_session = flz.get_flexilims_session(
-            project_id=session_data["project"]
-        )
-    mouse = flz.get_entity(
-        datatype="mouse",
-        name=session_data["mouse"],
+        flexilims_session = flz.get_flexilims_session(project_id=yaml_data["project"])
+
+    origin_name = yaml_data["origin_name"]
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        origin = flz.get_entity(name=origin_name, flexilims_session=flexilims_session)
+    assert origin is not None, f"`{origin_name}` not found on flexilims"
+    if verbose:
+        print(f"Found origin `{origin_name}` with id `{origin.id}`")
+    # then upload the data recursively
+    _upload_yaml_dict(
+        yaml_data["children"],
+        origin=origin,
+        raw_data_folder=raw_data_folder,
+        log_func=log_func,
         flexilims_session=flexilims_session,
-        format_reply=False,
+        conflicts=conflicts,
+        verbose=verbose,
     )
-    if mouse is None:
-        raise SyncYmlError("Mouse not on flexilims. You must add it manually first")
-
-    # deal with the session
-    if session_data["session"] is not None:
-        m = re.match(r"S(\d{4})(\d\d)(\d\d)", session_data["session"])
-        if m:
-            date = "-".join(m.groups())
-        else:
-            log_func("Cannot parse date for session %s." % session_data["session"])
-            date = "N/A"
-
-    session_data = _trim_paths(session_data, raw_data_folder)
-
-    attributes = session_data.get("attributes", None)
-    if attributes is None:
-        attributes = {}
-    for field in ("path", "notes"):
-        value = session_data.get(field, None)
-        if value is not None:
-            attributes[field] = value
-
-    # if session is not specified, then entries will be added directly as
-    # children of the mouse
-    if session_data["session"] is not None:
-        session = flz.add_experimental_session(
-            parent_name=mouse["name"],
-            session_name=session_data["session"],
-            flexilims_session=flexilims_session,
-            date=date,
-            attributes=attributes,
-            conflicts=conflicts,
-        )
-        root_id = session["id"]
-        output.append(session["name"])
-    else:
-        root_id = mouse["id"]
 
-    # session datasets
-    # use "overwrite" as mode if conflict is "overwrite", otherwise use "safe" mode
-    if conflicts == "overwrite":
-        mode = "overwrite"
-    else:
-        mode = "safe"
-    for ds_name, ds in session_data.get("datasets", {}).items():
-        ds.genealogy = [mouse["name"], session_data["session"], ds_name]
-        ds.project = session_data["project"]
-        ds.origin_id = root_id
-        ds.flexilims_session = flexilims_session
-        ds.update_flexilims(mode=mode)
-        output.append(ds.full_name)
-
-    # now deal with recordings
-    for short_rec_name, rec_data in session_data.get("recordings", {}).items():
-        rec_name = session["name"] + "_" + short_rec_name
-        attributes = rec_data.get("attributes", None)
-        if attributes is None:
-            attributes = {}
-        for field in ["notes", "path", "timestamp"]:
-            value = rec_data.get(field, "")
-            attributes[field] = value if value is not None else ""
-        attributes["genealogy"] = session["attributes"]["genealogy"] + [short_rec_name]
-        rec_type = rec_data.get("recording_type", "unspecified")
-        if not rec_type:
-            rec_type = "unspecified"
-        rec_rep = flz.add_recording(
-            session_id=root_id,
-            recording_type=rec_type,
-            protocol=rec_data.get("protocol", ""),
-            attributes=attributes,
-            recording_name=rec_name,
-            other_relations=None,
-            flexilims_session=flexilims_session,
-            conflicts=conflicts,
-        )
-        output.append(rec_rep["name"])
-
-        # now deal with recordings' datasets
-        for ds_name, ds in rec_data.get("datasets", {}).items():
-            ds.genealogy = [
-                mouse["name"],
-                session_data["session"],
-                short_rec_name,
-                ds_name,
-            ]
-            ds.project = session_data["project"]
-            ds.origin_id = rec_rep["id"]
-            ds.flexilims_session = flexilims_session
-            ds.update_flexilims(mode=mode)
-            output.append(ds.full_name)
-
-    # now deal with samples
-    def add_samples(samples, parent, output=None):
-        # we'll need a utility function to deal with recursion
-        for short_sample_name, sample_data in samples.items():
-            # we always use `skip` to add samples
-            sample_rep = flz.add_sample(
-                parent["id"],
-                attributes=attributes,
-                sample_name=short_sample_name,
-                conflicts="skip",
-                flexilims_session=flexilims_session,
-            )
-            if output is not None:
-                output.append(sample_rep["name"])
-            # deal with datasets attached to this sample
-            for ds_name, ds in sample_data.get("datasets", {}).items():
-                ds.genealogy = sample_rep["attributes"]["genealogy"] + [ds_name]
-                ds.project = session_data["project"]
-                ds.origin_id = sample_rep["id"]
-                ds.flexilims_session = flexilims_session
-                ds.update_flexilims(mode="safe")
-                if output is not None:
-                    output.append(ds.full_name)
-            # now add child samples
-            add_samples(sample_data["samples"], sample_rep, output)
-
-    # samples are attached to mice, not sessions
-    add_samples(session_data["samples"], mouse, output=output)
-    return output
-
-
-def write_session_data_as_yaml(session_data, target_file=None, overwrite=False):
-    """Write a session_data dictionary into a yaml
 
-    Args:
-        session_data (dict): dictionary with Dataset instances, as returned by parse_yaml
-        target_file (str): path to the output file (if None, does not write to disk)
-        overwrite (bool): replace target file if it already exists (default False)
-
-    Returns:
-        dict: the pure yaml dictionary
-
-    """
-    out_dict = copy.deepcopy(session_data)
-    clean_recursively(out_dict, keys=["name"], format_dataset=True)
-    if target_file is not None:
-        target_file = Path(target_file)
-        if target_file.exists() and not overwrite:
-            raise IOError("Target file %s already exists" % target_file)
-        with open(target_file, "w") as writer:
-            yaml.dump(out_dict, writer)
-        # temp check:
-        with open(target_file, "r") as reader:
-            writen = yaml.safe_load(reader)
-    return out_dict
-
-
-def create_dataset(
-    dataset_infos, parent, raw_data_folder, verbose=True, error_handling="crash"
+def _create_yaml_dict(
+    level_folder,
+    project,
+    genealogy,
+    format_yaml,
+    parent_dict,
+    only_datasets=False,
 ):
-    """Create dictionary of datasets
+    """Private function to create a yaml dict from a folder
 
-    Args:
-        dataset_infos: extra information for reading dataset outside of raw_data_folder
-          or adding optional arguments
-        parent (dict): yaml dictionary of the parent level
-        raw_data_folder (str): folder where to look for data
-        verbose (bool): (True) Print info about dataset found
-        error_handling (str) `crash` or `report`. When something goes wrong, raise an
-            error if `crash` otherwise replace the dataset instance by the error
-            message in the output dictionary
+    Add a private function to hide the arguments that are used only for recursion
+    (parent_dict)
 
-    Returns:
-        dict: dictionary of dataset instances
+    See `create_yaml_dict` for documentation
 
+    Args:
+        level_folder (Path): folder to parse
+        project (str): name of the project
+        genealogy (tuple): genealogy of the current folder
+        format_yaml (bool): format results to be yaml compatible or keep Dataset
+            and pathlib.Path objects
+        parent_dict (dict): dict of the parent folder. Used for recursion
+        only_datasets (bool): only parse datasets, not folders
     """
 
-    # autoload datasets
-    datasets = Dataset.from_folder(parent["path"], verbose=verbose)
-    error_handling = error_handling.lower()
-    if error_handling not in ("crash", "report"):
-        raise IOError("error_handling must be `crash` or `report`")
-
-    # check dataset_infos for extra datasets
-    for ds_name, ds_data in dataset_infos.items():
-        ds_path = Path(raw_data_folder) / ds_data["path"]
-        # first deal with dataset that are not in parent path
-        ds_class = Dataset.SUBCLASSES.get(ds_data["dataset_type"], Dataset)
-        if ds_path.is_dir() and (ds_path != parent["path"]):
-            ds = ds_class.from_folder(ds_path, verbose=verbose)
-        elif ds_path.is_file() and (ds_path.parent != parent["path"]):
-            ds = ds_class.from_folder(ds_path.parent, verbose=verbose)
-        elif not ds_path.exists():
-            err_msg = "Dataset not found. Path %s does not exist" % ds_path
-            if error_handling == "crash":
-                raise FileNotFoundError(err_msg)
-            datasets[ds_name] = "XXERRORXX!! " + err_msg
-            continue
+    level_folder = Path(level_folder)
+    assert level_folder.is_dir(), "root_folder must be a directory"
+    level_name = level_folder.name
+    if level_name in parent_dict:
+        level_dict = parent_dict[level_name]
+        if level_dict is None:
+            level_dict = dict()
+    else:
+        level_dict = dict()
+    genealogy = list(genealogy)
+
+    m = re.fullmatch(r"R\d\d\d\d\d\d_?(.*)?", level_name)
+    if m:
+        if "type" in level_dict:
+            assert (
+                level_dict["type"] == "recording"
+            ), "Conflicting types, expected recording"
         else:
-            # if it is in the parent['path'] folder, I already loaded it.
-            ds = {k: v for k, v in datasets.items() if isinstance(v, ds_class)}
-        if not ds:
-            err_msg = 'Dataset "%s" not found in %s' % (ds_name, ds_path)
-            if error_handling == "crash":
-                raise SyncYmlError(err_msg)
-            datasets[ds_name] = "XXERRORXX!! " + err_msg
-
-        # match by name
-        if ds_name in ds:
-            ds = ds[ds_name]
-        else:  # now we're in trouble.
-            err_msg = 'Could not find dataset "%s". Found "%s" instead' % (
-                ds_name,
-                ", ".join(ds.keys()),
+            level_dict["type"] = "recording"
+        if "protocol" not in level_dict:
+            level_dict["protocol"] = (
+                m[1] if m[1] is not None else "XXERRORXX PROTOCOL NOT SPECIFIED"
             )
-            if error_handling == "crash":
-                raise SyncYmlError(err_msg)
-            datasets[ds_name] = "XXERRORXX!! " + err_msg
-            continue
-        if ds_data["attributes"] is not None:
-            ds.extra_attributes.update(ds_data["attributes"])
-        if ds_data["notes"] is not None:
-            ds.extra_attributes["notes"] = ds_data["notes"]
-        datasets[ds_name] = ds
-    return datasets
-
-
-def _trim_paths(session_data, raw_data_folder):
-    """Parses paths to make them relative to `raw_data_folder`
-
-    Args:
-        session_data (dict): dictionary containing children of the session
-        raw_data_folder (str): part of the path to be omitted from on flexilims
-
-    Returns:
-        dict: `session_data` after trimming the paths
+        if "recording_type" not in level_dict:
+            if "camera" in level_dict["protocol"]:
+                level_dict["recording_type"] = "camera"
+            elif "onix" in level_dict["protocol"]:
+                level_dict["recording_type"] = "ephys"
+            elif "harp" in level_dict["protocol"]:
+                level_dict["recording_type"] = "behaviour"
+            else:
+                level_dict["recording_type"] = "NOT SPECIFIED"
+    elif re.fullmatch(r"S\d*", level_name):
+        if "type" in level_dict:
+            assert (
+                level_dict["type"] == "session"
+            ), "Conflicting types, expected session"
+        else:
+            level_dict["type"] = "session"
+    else:
+        if "type" not in level_dict:
+            level_dict["type"] = "sample"
+    if "genealogy" in level_dict:
+        assert level_dict["genealogy"] == genealogy + [
+            level_name
+        ], f"Conflicting genealogy for {level_name}"
+    else:
+        level_dict["genealogy"] = genealogy + [level_name]
+    if "path" not in level_dict:
+        level_dict["path"] = Path(project, *level_dict["genealogy"])
+    if format_yaml:
+        level_dict["path"] = str(PurePosixPath(level_dict["path"]))
+    children = dict() if "children" not in level_dict else level_dict["children"]
+    datasets = Dataset.from_folder(level_folder)
+    if datasets:
+        for ds_name, ds in datasets.items():
+            if ds_name in children:
+                warnings.warn(f"Dataset {ds_name} already exists in {level_name}. Skip")
+                continue
+            ds.genealogy = genealogy + list(ds.genealogy)
+            if format_yaml:
+                # find path root
+                proot = str(level_folder)[: -len(level_dict["path"])]
+                ds.path = ds.path.relative_to(proot)
+                children[ds_name] = ds.format(mode="yaml")
+                # remove fields that are not needed
+                for field in ["origin_id", "project_id", "name"]:
+                    children[ds_name].pop(field, None)
+                children[ds_name]["path"] = str(
+                    PurePosixPath(children[ds_name]["path"])
+                )
+            else:
+                children[ds_name] = ds
+
+    if only_datasets:
+        subfolders = [
+            level_folder / n
+            for n, c in children.items()
+            if (c is None) or (c.get("type", "unknown") != "dataset")
+        ]
+    else:
+        subfolders = level_folder.glob("*")
+
+    for child in subfolders:
+        if child.is_dir():
+            _create_yaml_dict(
+                child,
+                project=project,
+                genealogy=genealogy + [level_name],
+                format_yaml=format_yaml,
+                parent_dict=children,
+            )
+    level_dict["children"] = children
+    parent_dict[level_name] = level_dict
+    return parent_dict
 
-    """
 
-    def trim_sample_paths(samples):
-        # utility function to recurse into samples
-        for sample_name, sample_data in samples.items():
-            samples[sample_name]["path"] = str(
-                PurePosixPath(
-                    Path(samples[sample_name]["path"]).relative_to(raw_data_folder)
-                )
+def _upload_yaml_dict(
+    yaml_dict, origin, raw_data_folder, log_func, flexilims_session, conflicts, verbose
+):
+    for entity, entity_data in yaml_dict.items():
+        entity_data = entity_data.copy()
+        children = entity_data.pop("children", {})
+        datatype = entity_data.pop("type")
+        if datatype == "session":
+            if verbose:
+                print(f"Adding session `{entity}`")
+            new_entity = flz.add_experimental_session(
+                date=entity[1:],
+                flexilims_session=flexilims_session,
+                parent_id=origin["id"],
+                attributes=entity_data,
+                session_name=entity,
+                conflicts=conflicts,
             )
-            for ds_name, ds in sample_data.get("datasets", {}).items():
-                ds.path = PurePosixPath(ds.path.relative_to(raw_data_folder))
-            trim_sample_paths(sample_data["samples"])
-
-    if raw_data_folder is None:
-        raw_data_folder = flz.get_data_root("raw", session_data["project"])
-    if "path" in session_data.keys():
-        session_data["path"] = str(
-            PurePosixPath(Path(session_data["path"]).relative_to(raw_data_folder))
-        )
-    for ds_name, ds in session_data.get("datasets", {}).items():
-        ds.path = ds.path.relative_to(raw_data_folder)
-    for rec_name, rec_data in session_data["recordings"].items():
-        session_data["recordings"][rec_name]["path"] = str(
-            PurePosixPath(
-                Path(session_data["recordings"][rec_name]["path"]).relative_to(
-                    raw_data_folder
+        elif datatype == "recording":
+            rec_type = entity_data.pop("recording_type", "Not specified")
+            prot = entity_data.pop("protocol", "Not specified")
+            if verbose:
+                print(
+                    f"Adding recording `{entity}`, type `{rec_type}`, protocol `{prot}`"
                 )
+            new_entity = flz.add_recording(
+                session_id=origin["id"],
+                recording_type=rec_type,
+                protocol=prot,
+                attributes=entity_data,
+                recording_name=entity,
+                conflicts=conflicts,
+                flexilims_session=flexilims_session,
+            )
+        elif datatype == "sample":
+            if verbose:
+                print(f"Adding sample `{entity}`")
+            new_entity = flz.add_sample(
+                parent_id=origin["id"],
+                attributes=entity_data,
+                sample_name=entity,
+                conflicts=conflicts,
+                flexilims_session=flexilims_session,
+            )
+        elif datatype == "dataset":
+            created = entity_data.pop("created")
+            dataset_type = entity_data.pop("dataset_type")
+            path = entity_data.pop("path")
+            is_raw = entity_data.pop("is_raw")
+
+            if verbose:
+                print(f"Adding dataset `{entity}`, type `{dataset_type}`")
+            new_entity = flz.add_dataset(
+                parent_id=origin["id"],
+                dataset_type=dataset_type,
+                created=created,
+                path=path,
+                is_raw=is_raw,
+                flexilims_session=flexilims_session,
+                dataset_name=entity,
+                attributes=entity_data["extra_attributes"],
+                strict_validation=False,
+                conflicts=conflicts,
             )
-        )
-        for ds_name, ds in rec_data.get("datasets", {}).items():
-            ds.path = PurePosixPath(ds.path.relative_to(raw_data_folder))
-    trim_sample_paths(session_data["samples"])
-    return session_data
-
-
-def _create_sample_datasets(parent, raw_data_folder):
-    """Recursively index samples creating a nested dictionary and generate
-    corresponding datasets
-
-    Args:
-        parent (dict): Dictionary corresponding to the parent entity
-
-    Return:
-        dict: dictionary of child samples
 
-    """
-    if "samples" not in parent:
-        return dict()
-    for sample_name, sample in parent["samples"].items():
-        sample["path"] = parent["path"] / sample_name
-        sample["datasets"] = create_dataset(
-            dataset_infos=sample["datasets"],
-            parent=sample,
+        _upload_yaml_dict(
+            yaml_dict=children,
+            origin=new_entity,
             raw_data_folder=raw_data_folder,
-            error_handling="report",
-        )
-
-        # recurse into child samples
-        sample["samples"] = _create_sample_datasets(sample, raw_data_folder)
-    # we update in place but we also return the dictionary of samples to make
-    # for more readable code
-    return parent["samples"]
-
-
-def _clean_yaml(path_to_yaml):
-    """Read a yaml file and check that it is correctly formatted
-
-    This does not do any processing, just make sure that I can read the whole yaml and
-    generate dictionary will all expected fields
-
-    Args:
-        path_to_yaml (str): path to the YAML file, or dict of the yaml content
-
-    Returns:
-        dict: nested dictionary containing entries in the YAML file
-
-    """
-
-    if isinstance(path_to_yaml, dict):
-        yml_data = path_to_yaml
-    else:
-        with open(path_to_yaml, "r") as yml_file:
-            try:
-                yml_data = yaml.safe_load(yml_file)
-            except ParserError as e:
-                raise IOError("Invalid yaml. Parser returned an error: %s" % e)
-
-    session, nested_levels = _read_level(yml_data)
-
-    session["datasets"] = {}
-    for dataset_name, dataset_dict in nested_levels["datasets"].items():
-        session["datasets"][dataset_name] = _read_dataset(
-            name=dataset_name, data=dataset_dict
-        )
-
-    session["recordings"] = {}
-    for rec_name, rec_dict in nested_levels["recordings"].items():
-        session["recordings"][rec_name] = _read_recording(name=rec_name, data=rec_dict)
-
-    session["samples"] = {}
-    for sample_name, sample_dict in nested_levels["samples"].items():
-        session["samples"][sample_name] = _read_sample(
-            name=sample_name, data=sample_dict
-        )
-
-    return session
-
-
-def _read_sample(name, data):
-    """Read YAML information corresponding to a sample
-
-    Args:
-        name (str): the name of the sample
-        data (dict): data for this sample only
-
-    Returns:
-        dict: the sample read from the yaml
-
-    """
-    if data is None:
-        data = {}
-    sample, nested_levels = _read_level(
-        data,
-        mandatory_args=(),
-        optional_args=("notes", "attributes", "path"),
-        nested_levels=("datasets", "samples"),
-    )
-    sample["name"] = name
-
-    sample["datasets"] = dict()
-    for ds_name, ds_data in nested_levels["datasets"].items():
-        sample["datasets"][ds_name] = _read_dataset(name=ds_name, data=ds_data)
-    sample["samples"] = dict()
-    for sample_name, sample_data in nested_levels["samples"].items():
-        sample["samples"][sample_name] = _read_sample(
-            name=sample_name, data=sample_data
+            log_func=log_func,
+            flexilims_session=flexilims_session,
+            conflicts=conflicts,
+            verbose=verbose,
         )
-    return sample
-
-
-def _read_recording(name, data):
-    """Read YAML information corresponding to a recording
-
-    Args:
-        name (str): the name of the recording
-        data (dict): data for this dataset only
-
-    Returns:
-        dict: the recording read from the yaml
-
-    """
-    recording, datasets = _read_level(
-        data,
-        mandatory_args=("protocol",),
-        optional_args=("notes", "attributes", "path", "recording_type", "timestamp"),
-        nested_levels=("datasets",),
-    )
-    recording["name"] = name
-
-    # if timestamps is None, the name must start with RHHMMSS
-    if recording["timestamp"] is None:
-        m = re.match(r"R(\d\d\d\d\d\d)", recording["name"])
-        if not m:
-            raise SyncYmlError(
-                "Timestamp must be provided if recording name is not "
-                "properly formatted"
-            )
-        recording["timestamp"] = m.groups()[0]
-    recording["datasets"] = dict()
-    for ds_name, ds_data in datasets["datasets"].items():
-        recording["datasets"][ds_name] = _read_dataset(name=ds_name, data=ds_data)
-
-    return recording
-
 
-def _read_dataset(name, data):
-    """Read YAML information corresponding to a dataset
 
-    Args:
-        name (str): the name of the dataset, will be composed with parent names to
-        generate an identifier
-        data (dict): data for this dataset only
-
-    Returns:
-        dict: a formatted dictionary including,  'dataset_type', 'path', 'notes',
-        'attributes' and 'name'
-
-    """
-    level, _ = _read_level(
-        data,
-        mandatory_args=("dataset_type", "path"),
-        optional_args=(
-            "notes",
-            "attributes",
-            "created",
-            "is_raw",
-            "origin_id",
-            "genealogy",
-        ),
-        nested_levels=(),
-    )
-    level["name"] = name
-    return level
-
-
-def _read_level(
-    yml_level,
-    mandatory_args=("project", "mouse", "session"),
-    optional_args=("path", "notes", "attributes", "genealogy"),
-    nested_levels=("recordings", "datasets", "samples"),
+def _check_recursively(
+    yaml_data,
+    origin_genealogy,
+    root_folder,
+    project,
+    genealogy,
+    fixerrors=False,
+    errors=None,
 ):
-    """Read one layer of the yml file (i.e. a dictionary)
-
-    Args:
-        yml_level (dict): a dictionary containing the yml level to analyse (and all sublevels)
-        mandatory_args: arguments that must be in this level
-        optional_args: arguments that are expected but not mandatory, will be `None` if
-            absent
-        nested_levels: name of any nested level that should not be parsed
-
-    Returns:
-        (tuple): a tuple containing two dictionaries:
-            level (dict): dictionary of top level attributes
-            nested_levels (dict): dictionary of nested dictionaries
-    """
-    # make a copy to not change original version
-    yml_level = yml_level.copy()
-    is_absent = [m not in yml_level for m in mandatory_args]
-    if any(is_absent):
-        absents = ", ".join(["%s" % a for a, m in zip(mandatory_args, is_absent) if m])
-        raise SyncYmlError("%s must be provided in the YAML file." % absents)
-    level = {m: yml_level.pop(m) for m in mandatory_args}
-
-    for opt in optional_args:
-        level[opt] = yml_level.pop(opt, None)
-
-    nested_levels = {n: yml_level.pop(n, {}) for n in nested_levels}
-
-    # the rest is unexpected
-    if len(yml_level):
-        raise SyncYmlError(
-            "Got unexpected attribute(s): %s" % (", ".join(yml_level.keys()))
-        )
-    return level, nested_levels
-
+    if errors is None:
+        errors = dict()
+    root_folder = Path(root_folder)
+
+    for child, child_dict in yaml_data.items():
+        fname = root_folder / Path(*genealogy) / child
+        child_genealogy = genealogy + [child]
+
+        if child_dict["type"] != "dataset":
+            if not fname.is_dir():
+                child_dict["PATH_ERROR"] = f"XXERRORXX folder {fname} does not exist"
+                errors[fname] = child_dict
+        else:
+            data_series = pd.Series(child_dict)
+            for k, v in data_series.pop("extra_attributes").items():
+                data_series[k] = v
+            data_series.id = None
+            data_series.name = "_".join(origin_genealogy + child_genealogy)
+            ds = flz.Dataset.from_dataseries(data_series)
+            ds.project = project
+            msg = ds.is_valid(return_reason=True)
+            if msg:
+                child_dict["VALIDATION_ERROR"] = f"XXERRORXX {msg}"
+                errors[fname] = child_dict
+
+        if child_dict["genealogy"] != origin_genealogy + child_genealogy:
+            if fixerrors:
+                print(f"Fixing genealogy for {child}")
+                child_dict["genealogy"] = origin_genealogy + child_genealogy
+            else:
+                child_dict["GENEALOGY_ERROR"] = "XXERRORXX genealogy is not correct"
+                errors[fname] = child_dict
+        if "children" in child_dict:
+            _check_recursively(
+                child_dict["children"],
+                origin_genealogy,
+                root_folder,
+                project,
+                genealogy=genealogy + [child],
+                fixerrors=fixerrors,
+                errors=errors,
+            )
+    return errors
 
-def find_xxerrorxx(yml_file=None, yml_data=None, pattern="XXERRORXX", _output=None):
-    """Utility to find where things went wrong
 
-    Look through a `yml_file` or the corresponding `yml_Data` dictionary recursively.
-    Returns a dictionary with all entries containing the error `pattern`
+if __name__ == "__main__":
+    example_yml = "/Users/blota/Desktop/test_yaml.yml"
+    out = parse_yaml(example_yml)
+    with open("/Users/blota/Desktop/test_yaml_redump.yml", "w") as f:
+        yaml.dump(out, f)
 
-    _output is used for recursive calling.
-    """
-    if yml_file is not None:
-        if yml_data is not None:
-            raise IOError("Set either yml_file OR yml_data")
-        with open(yml_file, "r") as reader:
-            yml_data = yaml.safe_load(reader)
-
-    if _output is None:
-        _output = dict()
-    for k, v in yml_data.items():
-        if isinstance(v, dict):
-            _output = find_xxerrorxx(yml_data=v, pattern=pattern, _output=_output)
-        elif isinstance(v, str) and (pattern in v):
-            _output[k] = v
-    return _output
+    rel = "blota_onix_pilote/BRAC7448.2d/"
+    root_folder = Path(flz.PARAMETERS["data_root"]["raw"]) / rel
+    yaml_file = Path(flz.PARAMETERS["data_root"]["processed"]) / rel / "S20230421.yml"
+    origin_name = "BRAC7448.2d"
+    check_yaml_validity(yaml_file, root_folder, origin_name)
diff --git a/flexiznam/cli.py b/flexiznam/cli.py
index 734cc3f..61e97d7 100644
--- a/flexiznam/cli.py
+++ b/flexiznam/cli.py
@@ -6,6 +6,17 @@ def cli():
     pass
 
 
+@cli.command()
+@click.argument("root_folder", type=click.Path(exists=True), default=".")
+def gui(root_folder):
+    """Start the GUI"""
+    from flexiznam.gui import flexigui
+
+    app = flexigui.FlexiGui()
+    app.root_folder.set(root_folder)
+    app.mainloop()
+
+
 @cli.command()
 @click.option("-p", "--project_id", prompt="Enter the project ID", help="Project ID.")
 @click.option(
@@ -28,6 +39,7 @@ def cli():
     show_default=True,
 )
 def add_genealogy(project_id, name, recursive, verbose):
+    """Add genealogy to a flexilims entity"""
     from flexiznam import get_flexilims_session
 
     flm_sess = get_flexilims_session(project_id=project_id)
@@ -60,9 +72,9 @@ def add_mouse(
     flexilims_username=None,
     mcms_username=None,
 ):
+    """Add a single mouse to a project."""
     from flexiznam import main
 
-    """Add a single mouse to a project."""
     click.echo("Trying to add %s in %s" % (mouse_name, project_id))
     main.add_mouse(
         mouse_name=mouse_name,
@@ -168,7 +180,9 @@ def add_password(app, username, password, password_file):
 @click.option(
     "-p", "--project", default="NOT SPECIFIED", help="Project name on flexilims."
 )
-@click.option("-m", "--mouse", default="NOT SPECIFIED", help="Mouse name on flexilims.")
+@click.option(
+    "-o", "--origin", default="NOT SPECIFIED", help="Origin name on flexilims."
+)
 @click.option(
     "--overwrite/--no-overwrite",
     default=False,
@@ -179,43 +193,20 @@ def add_password(app, username, password, password_file):
     default=False,
     help="After creating the yaml skeleton, should I also parse it?",
 )
-@click.option(
-    "-r",
-    "--raw_data_folder",
-    default=None,
-    help="Path to the root folder containing raw data. Only used with " "`--process`",
-)
-def create_yaml(
-    source_dir, target_yaml, project, mouse, overwrite, process, raw_data_folder
-):
+def create_yaml(source_dir, target_yaml, project, origin, overwrite, process):
     """Create a yaml file by looking recursively in `root_dir`"""
     from flexiznam import camp
-    import pathlib
 
-    target_yaml = pathlib.Path(target_yaml)
-    if (not overwrite) and target_yaml.exists():
-        s = input("File %s already exists. Overwrite (yes/[no])? " % target_yaml)
-        if s == "yes":
-            overwrite = True
-        else:
-            raise (
-                FileExistsError(
-                    "File %s already exists and overwrite is not allowed" % target_yaml
-                )
-            )
-    source_dir = pathlib.Path(source_dir)
-    if not source_dir.is_dir():
-        raise FileNotFoundError("source_dir %s is not a directory" % source_dir)
-    yml_content = camp.sync_data.create_yaml(
+    camp.sync_data.create_yaml(
         root_folder=source_dir,
-        outfile=target_yaml,
+        output_file=target_yaml,
+        origin_name=origin,
         project=project,
-        mouse=mouse,
         overwrite=overwrite,
     )
     click.echo("Created yml skeleton in %s" % target_yaml)
     if process:
-        raise NotImplementedError
+        raise NotImplementedError("Process yaml at creation is not implemented yet")
 
 
 @cli.command()
@@ -316,7 +307,10 @@ def yaml_to_flexilims(source_yaml, raw_data_folder=None, conflicts=None):
 @click.option("-t", "--target_file", default=None, help="Path to write csv output.")
 @click.option("-r", "--root_name", default=None, help="Root entity to start the check.")
 @click.option("--flexilims_username", default=None, help="Your username on flexilims.")
-def check_flexilims_issues(project_id, target_file, root_name, flexilims_username):
+@click.option("--add-path/--no-add-path", default=False, help="Add missing paths.")
+def check_flexilims_issues(
+    project_id, target_file, root_name, flexilims_username, add_path
+):
     """Check that database is properly formatted
 
     This will check recursively all mice if `root_name` is not provided. Elements that
@@ -325,7 +319,6 @@ def check_flexilims_issues(project_id, target_file, root_name, flexilims_usernam
     """
     from flexiznam.main import get_flexilims_session
     from flexiznam import utils
-    import pathlib
     import pandas as pd
 
     flexilims_session = get_flexilims_session(
@@ -350,3 +343,6 @@ def check_flexilims_issues(project_id, target_file, root_name, flexilims_usernam
     else:
         df = pdf
     df.to_csv(target_file)
+    if add_path:
+        print("Adding missing paths")
+        utils.add_missing_paths(flexilims_session, root_name=root_name)
diff --git a/flexiznam/config/config_tools.py b/flexiznam/config/config_tools.py
index bf2d373..660a796 100644
--- a/flexiznam/config/config_tools.py
+++ b/flexiznam/config/config_tools.py
@@ -53,12 +53,24 @@ def _find_file(file_name, config_folder=None, create_if_missing=False):
     raise ConfigurationError("Cannot find %s" % file_name)
 
 
-def load_param(param_folder=None, config_file="config.yml"):
-    """Read parameter file from config folder"""
+def load_param(param_folder=None, config_file="config.yml", verbose=False):
+    """Read parameter file from config folder
+
+    Args:
+        param_folder (str, optional): folder to look for the file. Defaults to None.
+        config_file (str, optional): name of the file to find. Defaults to "config.yml".
+        verbose (bool, optional): if True, print the path of the file being read.
+            Defaults to False.
+
+    Returns:
+        dict: parameters read from the file
+    """
     if param_folder is None:
         param_file = _find_file(config_file)
     else:
         param_file = Path(param_folder) / config_file
+    if verbose:
+        print(f"Reading parameters from {param_file}")
     with open(param_file, "r") as yml_file:
         prm = yaml.safe_load(yml_file)
     return prm
@@ -155,7 +167,7 @@ def update_config(
             project_ids.update(kwargs["project_ids"])
         kwargs["project_ids"] = project_ids
         all_ids = {}
-        for (pname, pid) in kwargs["project_ids"].items():
+        for pname, pid in kwargs["project_ids"].items():
             if pid in all_ids:
                 warnings.warn(f"PIDs {pname} and {all_ids[pid]} have the same ID")
             all_ids[pid] = pname
diff --git a/flexiznam/config/default_config.py b/flexiznam/config/default_config.py
index 5e0d4ae..55c7e2b 100644
--- a/flexiznam/config/default_config.py
+++ b/flexiznam/config/default_config.py
@@ -30,7 +30,7 @@
     # list of all datatypes
     datatypes=["mouse", "session", "recording", "dataset", "sample"],
     # should we limit the valid dataset types?
-    enforce_dataset_types=True,
+    enforce_dataset_types=False,
     # if we enforce, what is the list of valid dataset type
     dataset_types=[
         "scanimage",
diff --git a/flexiznam/gui/__init__.py b/flexiznam/gui/__init__.py
new file mode 100644
index 0000000..428404e
--- /dev/null
+++ b/flexiznam/gui/__init__.py
@@ -0,0 +1 @@
+from . import flexigui
diff --git a/flexiznam/gui/azure.tcl b/flexiznam/gui/azure.tcl
new file mode 100644
index 0000000..fead545
--- /dev/null
+++ b/flexiznam/gui/azure.tcl
@@ -0,0 +1,87 @@
+# Copyright © 2021 rdbende <rdbende@gmail.com>
+
+source [file join [file dirname [info script]] theme light.tcl]
+source [file join [file dirname [info script]] theme dark.tcl]
+
+option add *tearOff 0
+
+proc set_theme {mode} {
+	if {$mode == "dark"} {
+		ttk::style theme use "azure-dark"
+
+		array set colors {
+            -fg             "#ffffff"
+            -bg             "#333333"
+            -disabledfg     "#ffffff"
+            -disabledbg     "#737373"
+            -selectfg       "#ffffff"
+            -selectbg       "#007fff"
+        }
+
+        ttk::style configure . \
+            -background $colors(-bg) \
+            -foreground $colors(-fg) \
+            -troughcolor $colors(-bg) \
+            -focuscolor $colors(-selectbg) \
+            -selectbackground $colors(-selectbg) \
+            -selectforeground $colors(-selectfg) \
+            -insertcolor $colors(-fg) \
+            -insertwidth 1 \
+            -fieldbackground $colors(-selectbg) \
+            -font {"Segoe Ui" 10} \
+            -borderwidth 1 \
+            -relief flat
+
+        tk_setPalette background [ttk::style lookup . -background] \
+            foreground [ttk::style lookup . -foreground] \
+            highlightColor [ttk::style lookup . -focuscolor] \
+            selectBackground [ttk::style lookup . -selectbackground] \
+            selectForeground [ttk::style lookup . -selectforeground] \
+            activeBackground [ttk::style lookup . -selectbackground] \
+            activeForeground [ttk::style lookup . -selectforeground]
+
+        ttk::style map . -foreground [list disabled $colors(-disabledfg)]
+
+        option add *font [ttk::style lookup . -font]
+        option add *Menu.selectcolor $colors(-fg)
+
+	} elseif {$mode == "light"} {
+		ttk::style theme use "azure-light"
+
+        array set colors {
+            -fg             "#000000"
+            -bg             "#ffffff"
+            -disabledfg     "#737373"
+            -disabledbg     "#ffffff"
+            -selectfg       "#ffffff"
+            -selectbg       "#007fff"
+        }
+
+		ttk::style configure . \
+            -background $colors(-bg) \
+            -foreground $colors(-fg) \
+            -troughcolor $colors(-bg) \
+            -focuscolor $colors(-selectbg) \
+            -selectbackground $colors(-selectbg) \
+            -selectforeground $colors(-selectfg) \
+            -insertcolor $colors(-fg) \
+            -insertwidth 1 \
+            -fieldbackground $colors(-selectbg) \
+            -font {"Segoe Ui" 10} \
+            -borderwidth 1 \
+            -relief flat
+
+        tk_setPalette background [ttk::style lookup . -background] \
+            foreground [ttk::style lookup . -foreground] \
+            highlightColor [ttk::style lookup . -focuscolor] \
+            selectBackground [ttk::style lookup . -selectbackground] \
+            selectForeground [ttk::style lookup . -selectforeground] \
+            activeBackground [ttk::style lookup . -selectbackground] \
+            activeForeground [ttk::style lookup . -selectforeground]
+
+        ttk::style map . -foreground [list disabled $colors(-disabledfg)]
+
+        option add *font [ttk::style lookup . -font]
+        option add *Menu.selectcolor $colors(-fg)
+	}
+}
diff --git a/flexiznam/gui/flexigui.py b/flexiznam/gui/flexigui.py
new file mode 100644
index 0000000..eb9b3a8
--- /dev/null
+++ b/flexiznam/gui/flexigui.py
@@ -0,0 +1,430 @@
+import os
+import tkinter as tk
+from ttkwidgets import CheckboxTreeview
+import yaml
+from pathlib import Path
+import flexiznam as flz
+import flexiznam.camp.sync_data
+
+
+class FlexiGui(tk.Tk):
+    FLEXILIMS_ONLY_FIELDS = ("children", "project", "origin_id")
+    RESOURCES = Path(__file__).parent
+
+    def __init__(self):
+        super().__init__()
+
+        self.title("FlexiZnam GUI")
+        self.geometry("800x600")
+
+        self.rowconfigure(1, weight=10)
+        self.columnconfigure(0, weight=1)
+        self.columnconfigure(1, weight=3)
+
+        self.frames = dict()
+        self._create_frames()
+        self._setup_widgets()
+        self._entity_by_itemid = {}
+        self.contains_errors = False
+        self.data = {}
+
+    ############# GUI setup methods #############
+    # These methods are used to create the GUI elements
+
+    def _setup_widgets(self):
+        self._create_frames()
+        self._create_buttons()
+        self._create_treeview()
+        self._create_textview()
+        self._create_statusbar()
+
+    def _create_frames(self):
+        self.frames["T"] = tk.Frame(self)
+        self.frames["T"].grid(
+            row=0, column=0, padx=10, pady=5, columnspan=2, sticky="nwe"
+        )
+        self.frames["T"].rowconfigure(0, weight=1)
+        self.frames["T"].rowconfigure(1, weight=1)
+        for i in range(10):
+            self.frames["T"].columnconfigure(i, weight=1)
+        self.frames["T"].columnconfigure(3, weight=10)
+        self.frames["L"] = tk.Frame(self)
+        self.frames["L"].grid(row=1, column=0, padx=10, pady=5, sticky="nsew")
+        self.frames["L"].rowconfigure(0, weight=1)
+        self.frames["L"].columnconfigure(0, weight=1)
+        self.frames["R"] = tk.Frame(self)
+        self.frames["R"].grid(row=1, column=1, padx=10, pady=5, sticky="nsew")
+        self.frames["R"].rowconfigure(0, weight=1)
+        self.frames["R"].rowconfigure(1, weight=30)
+        self.frames["R"].rowconfigure(2, weight=1)
+        self.frames["R"].columnconfigure(0, weight=1)
+        self.frames["B"] = tk.Frame(self)
+        self.frames["B"].grid(
+            row=2, column=0, columnspan=2, padx=10, pady=5, sticky="sew"
+        )
+        self.frames["B"].rowconfigure(0, weight=1)
+        self.frames["B"].columnconfigure(0, weight=10)
+
+    def _create_treeview(self):
+        # Create the Treeview
+        self.treeview = CheckboxTreeview(
+            self.frames["L"],
+            columns=("datatype",),
+            selectmode="browse",
+        )
+
+        self.treeview.grid(row=0, column=0, sticky="nsew")
+        self.treeview.heading("datatype", text="Datatype")
+        self.treeview.column("datatype", width=200)
+        # Bind the Treeview selection event
+        self.treeview.bind("<<TreeviewSelect>>", self.on_treeview_select)
+        self.treeview.tag_configure("error", background="red")
+
+    def _create_textview(self):
+        # Create the Text widget
+        tk.Label(self.frames["R"], text="Selected item:").grid(
+            row=0,
+            column=0,
+            sticky="nw",
+        )
+        self.selected_item = tk.StringVar()
+        self.selected_item.set("None")
+        l = tk.Label(self.frames["R"], textvariable=self.selected_item)
+        l.grid(row=0, column=1, sticky="new")
+        self.textview = tk.Text(self.frames["R"], width=40, height=10, wrap="none")
+        self.textview.grid(row=1, column=0, sticky="nsew", columnspan=2)
+        self.textview.bind("<<Modified>>", self.on_textview_change)
+        self.update_item_btn = tk.Button(
+            self.frames["R"], text="Update item", command=self.update_item
+        )
+        self.update_item_btn.grid(row=2, column=1, sticky="nsw")
+
+    def _create_buttons(self):
+        topf = self.frames["T"]
+        self.parse_btn = tk.Button(topf, text="Parse", command=self.parse_folder)
+        self.parse_btn.grid(row=0, column=0, sticky="w")
+        self.load_btn = tk.Button(topf, text="Load", command=self.load_yaml)
+        self.load_btn.grid(row=0, column=1, sticky="w")
+        self.write_btn = tk.Button(topf, text="Write", command=self.write_yaml)
+        self.write_btn.grid(row=0, column=2)
+
+        # add project dropdown and label
+        tk.Label(topf, text="Project:").grid(row=0, column=3, sticky="w")
+        self.project = tk.StringVar(self)
+        self.project.set("SELECT PROJECT")
+        self.proj_ddwn = tk.OptionMenu(
+            topf,
+            self.project,
+            "SELECT PROJECT",
+            *flz.PARAMETERS["project_ids"].keys(),
+        ).grid(row=0, column=4, columnspan=3, sticky="w")
+        self.upload_btn = tk.Button(topf, text="Upload", command=self.upload)
+        self.upload_btn.grid(row=0, column=7)
+
+        # add conflicts dropdown and label
+        tk.Label(topf, text="Conflicts:").grid(row=0, column=8, sticky="w")
+        self.conflicts = tk.StringVar(self)
+        self.conflicts.set("abort")
+        self.conflicts_ddwn = tk.OptionMenu(
+            topf, self.conflicts, "abort", "overwrite", "skip"
+        )
+        self.conflicts_ddwn.grid(row=0, column=9, sticky="w")
+        self.quit_btn = tk.Button(topf, text="Quit", command=self.quit)
+        self.quit_btn.grid(row=0, column=10, sticky="e")
+
+        # add origin name and root dir
+        tk.Label(topf, text="Origin name:").grid(row=1, column=0, sticky="w")
+        self.origin_name = tk.StringVar(self)
+        self.origin_name.set("ENTER FLEXILIMS ORIGIN NAME")
+        self.origin_name_entry = tk.Entry(topf, textvariable=self.origin_name)
+        self.origin_name_entry.grid(row=1, column=1, columnspan=2, sticky="nsew")
+        tk.Label(topf, text="Root directory:").grid(row=1, column=3, sticky="w")
+        self.root_folder = tk.StringVar(self)
+        self.root_folder.set(os.getcwd())
+        self.root_folder_entry = tk.Entry(topf, textvariable=self.root_folder)
+        self.root_folder_entry.grid(row=1, column=4, columnspan=6, sticky="nsew")
+        self.chg_dir_btn = tk.Button(topf, text="...", command=self.chg_root_folder)
+        self.chg_dir_btn.grid(row=1, column=10)
+
+    def _create_statusbar(self):
+        self.sb_msg = tk.StringVar()
+        self.statusbar = tk.Label(
+            self.frames["B"], textvariable=self.sb_msg, bd=1, relief=tk.SUNKEN
+        )
+        self.statusbar.grid(row=0, column=0, sticky="sw")
+        self.sb_msg.set("Ready")
+
+    ############# GUI update methods #############
+    # These methods are used to actually do stuff with the GUI elements
+    def get_checked_data(self, item=None, checked_data=None):
+        if checked_data is None:
+            checked_data = dict(children=dict())
+            for k in ["project", "origin_name", "root_folder"]:
+                checked_data[k] = self.data[k]
+
+        for child in self.treeview.get_children(item=item):
+            if self.treeview.tag_has("checked", child) or self.treeview.tag_has(
+                "tristate", child
+            ):
+                name, data = self._entity_by_itemid[child]
+                data = data.copy()
+                if "children" in data:
+                    data["children"] = {}
+                data = self.get_checked_data(item=child, checked_data=data)
+                checked_data["children"][name] = data
+        return checked_data
+
+    def report(self, message):
+        self.sb_msg.set(message)
+        print(message)
+        self.update()
+
+    def _check_options_are_set(self, options=("project", "origin_name")):
+        self.report("Checking options")
+        init_values = dict(project="SELECT", origin_name="ENTER")
+        for option in options:
+            value = getattr(self, option).get()
+            if value.startswith(init_values[option]):
+                tk.messagebox.showerror("Error", f"Error: enter {option} first!")
+                return False
+        self.report("Options are set")
+        return True
+
+    def parse_folder(self):
+        if not self._check_options_are_set():
+            return
+        folder = tk.filedialog.askdirectory(
+            initialdir=self.root_folder.get(), title="Select directory to parse"
+        )
+        self.report(f"Parsing folder {folder}...")
+        self.root_folder.set(folder)
+        data = flz.camp.sync_data.create_yaml_dict(
+            folder_to_parse=folder,
+            project=self.project.get(),
+            origin_name=self.origin_name.get(),
+            format_yaml=True,
+        )
+        self.report("Parsing done. Validating data...")
+        data, errors = flz.camp.sync_data.check_yaml_validity(data)
+        self.data = data
+        self.update_data(remove_unchecked=False)
+        checked = self.get_checked_data(item=None, checked_data=None)
+        assert checked == self.data
+        self.report("Done")
+
+    def chg_root_folder(self):
+        self.report("Changing root folder")
+        self.root_folder.set(
+            tk.filedialog.askdirectory(
+                initialdir=self.root_folder.get(), title="Select root directory"
+            )
+        )
+
+    def on_treeview_select(self, event):
+        item = self.treeview.focus()
+        name, data = self._entity_by_itemid[item]
+        self.report(f"Selected item: {name}")
+        self.selected_item.set(name)
+        display = {k: v for k, v in data.items() if k not in self.FLEXILIMS_ONLY_FIELDS}
+        self.textview.delete(1.0, tk.END)
+        self.textview.insert(tk.END, yaml.dump(display))
+
+    def on_textview_change(self, event):
+        return
+
+    def load_yaml(self):
+        """Load a YAML file and display it in the treeview"""
+        self.report("Select YAML file to load")
+        filetypes = (("Yaml files", "*.yml *.yaml"), ("All files", "*.*"))
+
+        filename = tk.filedialog.askopenfilename(
+            title="Select YAML file to load", filetypes=filetypes
+        )
+        if not filename:
+            return
+        self.report(f"Loading YAML file {filename}...")
+        with open(filename, "r") as f:
+            self.data = yaml.safe_load(f)
+        self.update_data()
+        self.report("Done")
+
+    def update_data(self, name_to_select=None, remove_unchecked=True):
+        """Update GUI data from self.data
+
+        Args:
+            name_to_select (str, optional): Name of item to select in treeview.
+                Defaults to None."""
+        self.report("Updating GUI")
+        if remove_unchecked:
+            self.data = self.get_checked_data()
+        self.textview.delete("1.0", tk.END)
+        self.selected_item.set("None")
+        self.treeview.delete(*self.treeview.get_children())
+        self._entity_by_itemid = {}
+
+        if "project" in self.data:
+            self.project.set(self.data["project"])
+        if "origin_name" in self.data:
+            self.origin_name.set(self.data["origin_name"])
+        if "root_folder" in self.data:
+            self.root_folder.set(self.data["root_folder"])
+
+        self.contains_errors = False
+        self._insert_yaml_data(self.data["children"], name_to_select=name_to_select)
+
+    def _insert_yaml_data(self, data, parent="", name_to_select=None):
+        assert isinstance(data, dict), "data must be a dict"
+        for child, child_data in data.items():
+            assert "type" in child_data, f"datatype missing for {child}"
+            dtype = child_data["type"]
+            item = self.treeview.insert(
+                parent,
+                "end",
+                text=child,
+                values=[dtype],
+                open=True,
+            )
+            if any(
+                [
+                    v.startswith("XXERRORXX")
+                    for v in child_data.values()
+                    if isinstance(v, str)
+                ]
+            ):
+                self.contains_errors = True
+                self.report(f"ERROR: {child} contains errors")
+                self.treeview.item(item, tags=("error", "checked"))
+            self.treeview.change_state(item, "checked")
+            self._entity_by_itemid[item] = (child, child_data)
+            if name_to_select and child == name_to_select:
+                self.treeview.focus(item)
+                self.treeview.selection_set(item)
+
+            if "children" in child_data:
+                self._insert_yaml_data(
+                    child_data["children"], parent=item, name_to_select=name_to_select
+                )
+
+    def write_yaml(self):
+        """Write the current data to a YAML file"""
+        self.report("Select YAML file to write")
+        target = tk.filedialog.asksaveasfilename(
+            initialdir=self.root_folder.get(),
+            title="Select YAML file to write",
+            filetypes=(("Yaml files", "*.yml *.yaml"), ("All files", "*.*")),
+        )
+        if not target:
+            self.report("No file selected. Cancel")
+            return
+        data = dict(self.data)
+        data["project"] = self.project.get()
+        data["root_folder"] = self.root_folder.get()
+        with open(target, "w") as f:
+            yaml.dump(data, f)
+        self.report('Wrote YAML file "{}"'.format(target))
+
+    def upload(self):
+        """Upload data to flexilims"""
+        print("Uploading data to flexilims")
+        if not self._check_options_are_set():
+            return
+
+        if not self.data:
+            tk.messagebox.showerror("Error", "No data loaded")
+            return
+
+        self.report("Validating data...")
+        self.update_data()
+        data, errors = flz.camp.sync_data.check_yaml_validity(self.get_checked_data())
+
+        if self.contains_errors:
+            tk.messagebox.showerror(
+                "Error",
+                "There are still errors. Please fix them before uploading",
+            )
+            return
+
+        data = dict(self.data)
+        # remove unchecked items
+        for item in self.treeview.get_children():
+            if not self.treeview.tag_has("checked", item):
+                name, _ = self._entity_by_itemid[item]
+                self.report(f"Removing item {name}")
+                data["children"].pop(name)
+
+        data["project"] = self.project.get()
+        data["root_folder"] = self.root_folder.get()
+
+        self.report("Validating data...")
+        flz.camp.sync_data.upload_yaml(
+            source_yaml=data,
+            raw_data_folder=data["root_folder"],
+            verbose=True,
+            log_func=print,
+            flexilims_session=None,
+            conflicts=self.conflicts.get(),
+        )
+        self.report("Done")
+
+    def update_item(self):
+        """Update the selected item with the textview contents"""
+
+        text = self.textview.get(1.0, tk.END)
+        if not text.strip():
+            return
+        item = self.treeview.focus()
+        name, original_data = self._entity_by_itemid[item]
+        self.report(f"Updating item {name}")
+        assert name == self.selected_item.get(), "Selected item does not match"
+        data = yaml.safe_load(text)
+        for field in self.FLEXILIMS_ONLY_FIELDS:
+            if field in original_data:
+                data[field] = original_data[field]
+        self._entity_by_itemid[item] = (name, data)
+        parents = []
+        parent_id = item
+        while True:
+            parent = self.treeview.parent(parent_id)
+            if not parent:
+                break
+            parents.append(self._entity_by_itemid[parent][0])
+            parent_id = parent
+        ref = self.data
+        for parent in reversed(parents):
+            ref = ref["children"][parent]
+        ref["children"][name] = data
+        self.update_data(name_to_select=name)
+        self.report("Done")
+
+
+if __name__ == "__main__":
+
+    def diffofdict(d1, d2, diff=None, level=""):
+        """Find differences between 2 dictionary of dictionaries"""
+
+        if diff is None:
+            diff = []
+        all_keys = set(list(d1.keys()) + list(d2.keys()))
+        for k in all_keys:
+            level = level + k + "."
+            if k not in d2:
+                diff.append(f"{level} (missing in d2)")
+            elif k not in d1:
+                diff.append(f"{level} (missing in d1)")
+            elif isinstance(d1[k], dict):
+                diff = diffofdict(d1[k], d2[k], diff, level)
+            elif d1[k] != d2[k]:
+                diff.append(f"{level} ({d1[k]} != {d2[k]})")
+        return diff
+
+    app = FlexiGui()
+    app.root_folder.set(
+        "/Volumes/lab-znamenskiyp/data/instruments/raw_data/projects/blota_onix_pilote/BRYA142.5d/"
+    )
+    app.origin_name.set("BRYA142.5d")
+    app.project.set("blota_onix_pilote")
+    app.mainloop()
+    df = diffofdict(app.data["children"], app.get_checked_data()["children"])
+    a = app.data["children"]["S20230915"]["children"]
+    b = app.get_checked_data()["children"]["S20230915"]["children"]
+    a == b
diff --git a/flexiznam/main.py b/flexiznam/main.py
index 3f24cda..6347b4e 100755
--- a/flexiznam/main.py
+++ b/flexiznam/main.py
@@ -9,7 +9,7 @@
 import flexiznam
 import yaml
 from flexiznam import mcms
-from flexiznam.config import PARAMETERS, get_password, add_password
+from flexiznam.config import PARAMETERS, get_password
 from flexiznam.errors import NameNotUniqueError, FlexilimsError, ConfigurationError
 
 
@@ -45,8 +45,9 @@ def get_data_root(which, project=None, flexilims_session=None):
         project = flexilims_session.project_id
 
     if project not in PARAMETERS["project_ids"]:
-        project = lookup_project(project, prm=None)
-        assert project is not None, f"Invalid project {project}"
+        proj = lookup_project(project, prm=None)
+        assert proj is not None, f"Invalid project {project}"
+        project = proj
 
     if project in PARAMETERS["project_paths"]:
         return Path(PARAMETERS["project_paths"][project][which])
@@ -75,6 +76,7 @@ def get_flexilims_session(
     password=None,
     reuse_token=True,
     timeout=10,
+    offline_mode=None,
 ):
     """Open a new flexilims session by creating a new authentication token.
 
@@ -87,7 +89,12 @@ def get_flexilims_session(
             read from the secrets file, or failing that triggers an input prompt.
         reuse_token (bool): (optional) if True, try to reuse an existing token
         timeout (int): (optional) timeout in seconds for the portalocker lock. Default
-            to 10.
+                to 10.
+        offline_mode (bool): (optional) if True, will use an offline session. In this
+            case, the `offline_yaml` parameter must be set in the config file. If
+            not provided, will look for the `offline_mode` parameter in the config
+            file. Default to None.
+
 
     Returns:
         :py:class:`flexilims.Flexilims`: Flexilims session object.
@@ -97,6 +104,22 @@ def get_flexilims_session(
         project_id = _format_project(project_id, PARAMETERS)
     else:
         warnings.warn("Starting flexilims session without setting project_id.")
+
+    if offline_mode is None:
+        offline_mode = PARAMETERS.get("offline_mode", False)
+
+    if offline_mode:
+        yaml_file = PARAMETERS.get("offline_yaml", None)
+        if yaml_file is None:
+            raise ConfigurationError("offline_mode is set but offline_yaml is not")
+        yaml_file = Path(yaml_file)
+        if not yaml_file.exists():
+            yaml_file = get_data_root("processed", project=project_id) / yaml_file
+        if not yaml_file.exists():
+            raise ConfigurationError(f"offline_yaml file {yaml_file} not found")
+        flexilims_session = flm.OfflineFlexilims(yaml_file, project_id=project_id)
+        return flexilims_session
+
     if username is None:
         username = PARAMETERS["flexilims_username"]
     if password is None:
@@ -138,6 +161,7 @@ def add_mouse(
     mcms_password=None,
     flexilims_username=None,
     flexilims_password=None,
+    conflicts="abort",
 ):
     """Check if a mouse is already in the database and add it if it isn't
 
@@ -161,6 +185,8 @@ def add_mouse(
                                   flexilims session is not provided
         flexilims_password (str): [optional] password for flexilims, used only if
                                   flexilims session is not provided
+        conflicts (str): `abort`, `skip`, `update` or `overwrite` (see update_entity for
+                        detailed description)
 
     Returns (dict):
         flexilims reply
@@ -174,8 +200,14 @@ def add_mouse(
 
     mice_df = get_entities(flexilims_session=flexilims_session, datatype="mouse")
     if mouse_name in mice_df.index:
-        print("Mouse already online")
-        return mice_df.loc[mouse_name]
+        if conflicts.lower() == "skip":
+            print("Mouse already online")
+            return mice_df.loc[mouse_name]
+        elif conflicts.lower() == "abort":
+            raise FlexilimsError("Mouse already online")
+        is_online = True
+    else:
+        is_online = False
 
     if mouse_info is None:
         mouse_info = {}
@@ -222,12 +254,21 @@ def add_mouse(
     mouse_info["genealogy"] = [mouse_name]
     project_name = lookup_project(flexilims_session.project_id, PARAMETERS)
     mouse_info["path"] = str(Path(project_name) / mouse_name)
-    resp = flexilims_session.post(
-        datatype="mouse",
-        name=mouse_name,
-        attributes=mouse_info,
-        strict_validation=False,
-    )
+    if is_online:
+        resp = update_entity(
+            datatype="mouse",
+            name=mouse_name,
+            mode=conflicts,
+            attributes=mouse_info,
+            flexilims_session=flexilims_session,
+        )
+    else:
+        resp = flexilims_session.post(
+            datatype="mouse",
+            name=mouse_name,
+            attributes=mouse_info,
+            strict_validation=False,
+        )
     return resp
 
 
@@ -381,17 +422,16 @@ def add_recording(
             "conflicts must be `skip`, `abort`, `overwrite` or `update`"
         )
 
-    experimental_session = get_entity(
-        datatype="session", flexilims_session=flexilims_session, id=session_id
-    )
+    parent_series = get_entity(flexilims_session=flexilims_session, id=session_id)
     recording_info = {"recording_type": recording_type, "protocol": protocol}
+
     if attributes is None:
         attributes = {}
     if "path" not in attributes:
         attributes["path"] = str(
             Path(
                 get_path(
-                    experimental_session["path"],
+                    parent_series["path"],
                     datatype="session",
                     flexilims_session=flexilims_session,
                 )
@@ -407,20 +447,25 @@ def add_recording(
     recording_info.update(attributes)
 
     if recording_name is None:
-        recording_name = experimental_session["name"] + "_" + protocol + "_0"
+        recording_name = parent_series["name"] + "_" + protocol + "_0"
+
+    if "genealogy" not in attributes:
+        attributes["genealogy"] = list(parent_series["genealogy"]) + [recording_name]
+    rec_full_name = "_".join(attributes["genealogy"])
+
     online_recording = get_entity(
-        datatype="recording", name=recording_name, flexilims_session=flexilims_session
+        datatype="recording", name=rec_full_name, flexilims_session=flexilims_session
     )
     if online_recording is not None:
         if conflicts.lower() == "skip":
-            print("A recording named %s already exists" % (recording_name))
+            print("A recording named %s already exists" % (rec_full_name))
             return online_recording
         elif conflicts.lower() == "abort":
-            raise FlexilimsError("A recording named %s already exists" % recording_name)
+            raise FlexilimsError("A recording named %s already exists" % rec_full_name)
         else:
             resp = update_entity(
                 datatype="recording",
-                name=recording_name,
+                name=rec_full_name,
                 id=online_recording["id"],
                 origin_id=session_id,
                 mode=conflicts,
@@ -432,7 +477,7 @@ def add_recording(
 
     resp = flexilims_session.post(
         datatype="recording",
-        name=recording_name,
+        name=rec_full_name,
         attributes=recording_info,
         origin_id=session_id,
         other_relations=other_relations,
@@ -589,7 +634,6 @@ def add_dataset(
     dataset_type,
     created,
     path,
-    genealogy,
     is_raw="yes",
     project_id=None,
     flexilims_session=None,
@@ -605,8 +649,6 @@ def add_dataset(
         dataset_type (str): dataset_type, must be a type define in the config file
         created (str): date of creation as text, usually in this format: '2021-05-24 14:56:41'
         path (str): path to the data relative to the project folder
-        genealogy (tuple): parents of this dataset from the project (excluded) down to
-                           the dataset name itself (included)
         is_raw (str): `yes` or `no`, used to find the root directory
         project_id (str): hexadecimal ID or name of the project
         flexilims_session (:py:class:`flexilims.Flexilims`): authentication
@@ -632,11 +674,10 @@ def add_dataset(
     if conflicts.lower() not in valid_conflicts:
         raise AttributeError("`conflicts` must be in [%s]" % ", ".join(valid_conflicts))
 
+    parent = get_entity(flexilims_session=flexilims_session, id=parent_id)
+
     if dataset_name is None:
-        parent_name = get_entity(
-            flexilims_session=flexilims_session,
-            id=parent_id,
-        )["name"]
+        parent_name = parent["name"]
         dataset_name = parent_name + "_" + dataset_type + "_0"
 
     dataset_info = {
@@ -644,7 +685,7 @@ def add_dataset(
         "created": created,
         "path": path,
         "is_raw": is_raw,
-        "genealogy": genealogy,
+        "genealogy": list(parent["genealogy"]),
     }
     reserved_attributes = ["dataset_type", "created", "path", "is_raw", "genealogy"]
     if attributes is not None:
@@ -656,32 +697,37 @@ def add_dataset(
         dataset_name = generate_name(
             "dataset", dataset_name, flexilims_session=flexilims_session
         )
+        dataset_info["genealogy"].append(dataset_name)
+        dataset_full_name = "_".join(dataset_info["genealogy"])
     else:
+        dataset_info["genealogy"].append(dataset_name)
+        dataset_full_name = "_".join(dataset_info["genealogy"])
         online_version = get_entity(
-            "dataset", name=dataset_name, flexilims_session=flexilims_session
+            "dataset", name=dataset_full_name, flexilims_session=flexilims_session
         )
         if online_version is not None:
             if conflicts.lower() == "abort":
-                raise FlexilimsError("A dataset named %s already exists" % dataset_name)
+                raise FlexilimsError(
+                    "A dataset named %s already exists" % dataset_full_name
+                )
             elif conflicts.lower() == "skip":
-                print("A dataset named %s already exists" % dataset_name)
+                print("A dataset named %s already exists" % dataset_full_name)
                 return online_version
             else:
                 resp = update_entity(
                     datatype="dataset",
-                    name=dataset_name,
+                    name=dataset_full_name,
                     id=online_version["id"],
                     origin_id=parent_id,
                     mode=conflicts,
                     attributes=dataset_info,
-                    other_relations=None,
                     flexilims_session=flexilims_session,
                 )
                 return resp
 
     resp = flexilims_session.post(
         datatype="dataset",
-        name=dataset_name,
+        name=dataset_full_name,
         origin_id=parent_id,
         attributes=dataset_info,
         strict_validation=strict_validation,
@@ -803,7 +849,7 @@ def get_entities(
         :py:class:`pandas.DataFrame`: containing all matching entities
 
     """
-    assert (project_id is not None) or (flexilims_session is not None)
+    # assert (project_id is not None) or (flexilims_session is not None)
     if flexilims_session is None:
         flexilims_session = get_flexilims_session(project_id)
     results = flexilims_session.get(
@@ -947,6 +993,8 @@ def get_id(name, datatype=None, project_id=None, flexilims_session=None):
     entity = get_entity(
         datatype=datatype, flexilims_session=flexilims_session, name=name
     )
+    if entity is None:
+        raise FlexilimsError("Cannot find entity named `%s`" % name)
     return entity["id"]
 
 
@@ -1086,10 +1134,27 @@ def get_datasets_recursively(
     For example, this is useful if you want to retrieve paths to all *scanimage*
     datasets associated with a given session.
 
+    Args:
+        origin_id (str): hexadecimal ID of the origin session. Not required if
+            origin_name is provided.
+        origin_name (str): text name of the origin session. Not required if origin_id
+            is provided.
+        origin_series (pandas.Series): series of the origin session. Not required if
+            origin_id or origin_name is provided.
+        dataset_type (str): type of the dataseet to filter by. If `None`,
+            will return all datasets.
+        filter_datasets (dict): dictionary of key-value pairs to filter datasets by.
+        parent_type (str): type of the parent entity. If `None`, will return all
+        filter_parents (dict): dictionary of key-value pairs to filter parents by.
+        return_paths (bool): if True, return a list of paths
+        project_id (str): text name of the project. Not required if
+            `flexilims_session` is provided.
+        flexilims_session (:py:class:`flexilims.Flexilims`): Flexylims session object
+        _output (list): internal argument used for recursion.
+
     Returns:
         dict: Dictionary with direct parent id as keys and lists of associated
             datasets, or dataset paths as values
-
     """
     if origin_series is None:
         if origin_id is None:
@@ -1173,7 +1238,7 @@ def get_datasets(
             otherwise ensure that only one dataset exists online and return it.
         return_paths (bool): if True, return a list of paths
         return_dataseries (bool): if True, a dataframe or a dataseries
-        _output (list): internal argument used for recursion.
+
 
 
     """
@@ -1274,3 +1339,33 @@ def format_results(results, return_list=False):
     if return_list:
         return results
     return pd.DataFrame(results)
+
+
+def delete_recursively(source_id, flexilims_session, do_it=False):
+    """Delete an entity and all its children recursively
+
+    Args:
+        source_id (str): hexadecimal ID of the entity to delete
+        flexilims_session (:py:class:`flexilims.Flexilims`): Flexylims session object
+        do_it (bool): if True, will actually delete the entities
+
+    Returns:
+        list: hexadecimal IDs of the entities to delete
+
+    """
+    to_delete = [source_id]
+
+    def _get_children(parent_id):
+        children = get_children(
+            parent_id=parent_id, flexilims_session=flexilims_session
+        )
+        for _, child in children.iterrows():
+            to_delete.append(child["id"])
+            if child["type"] != "dataset":
+                _get_children(child["id"])
+
+    _get_children(source_id)
+    if do_it:
+        for child_id in to_delete:
+            flexilims_session.delete(child_id)
+    return to_delete
diff --git a/flexiznam/mcms.py b/flexiznam/mcms.py
index 2976375..8d756b7 100644
--- a/flexiznam/mcms.py
+++ b/flexiznam/mcms.py
@@ -1,7 +1,8 @@
 import re
 import pandas as pd
+from requests.exceptions import InvalidURL
+from flexiznam.config import get_password
 from pymcms.main import McmsSession
-from flexiznam.config import PARAMETERS, get_password
 
 
 def get_mouse_info(mouse_name, username, password=None):
@@ -18,7 +19,11 @@ def get_mouse_info(mouse_name, username, password=None):
     if password is None:
         password = get_password(username=username, app="mcms")
     mcms_sess = McmsSession(username=username, password=password)
-    original_data = mcms_sess.get_animal(name=mouse_name)
+    try:
+        original_data = mcms_sess.get_animal(name=mouse_name)
+    except InvalidURL:
+        raise InvalidURL(f"Mouse {mouse_name} not found under your PPL")
+
     # convert to camel case for flexlilims
     mouse_data = {}
     pattern = re.compile(r"(?<!^)(?=[A-Z])")
diff --git a/flexiznam/schema/__init__.py b/flexiznam/schema/__init__.py
index c5ba526..d922e26 100644
--- a/flexiznam/schema/__init__.py
+++ b/flexiznam/schema/__init__.py
@@ -29,6 +29,7 @@
 from .microscopy_data import MicroscopyData
 from .onix_data import OnixData
 from .sequencing_data import SequencingData
+from .visstim_data import VisStimData
 
 Dataset.SUBCLASSES["camera"] = CameraData
 Dataset.SUBCLASSES["harp"] = HarpData
@@ -36,3 +37,4 @@
 Dataset.SUBCLASSES["microscopy"] = MicroscopyData
 Dataset.SUBCLASSES["onix"] = OnixData
 Dataset.SUBCLASSES["sequencing"] = SequencingData
+Dataset.SUBCLASSES["visstim"] = VisStimData
diff --git a/flexiznam/schema/camera_data.py b/flexiznam/schema/camera_data.py
index 30d7631..98a2bc7 100644
--- a/flexiznam/schema/camera_data.py
+++ b/flexiznam/schema/camera_data.py
@@ -219,12 +219,14 @@ def video_file(self):
     def video_file(self, value):
         self.extra_attributes["video_file"] = str(value)
 
-    def is_valid(self):
+    def is_valid(self, return_reason=False):
         """Check that video, metadata and timestamps files exist"""
-        if not (pathlib.Path(self.path) / self.timestamp_file).exists():
-            return False
-        if not (pathlib.Path(self.path) / self.metadata_file).exists():
-            return False
-        if not (pathlib.Path(self.path) / self.video_file).exists():
-            return False
-        return True
+        for attr in ["video_file", "timestamp_file", "metadata_file"]:
+            if attr not in self.extra_attributes:
+                msg = f"Missing attribute {attr}"
+                return msg if return_reason else False
+            fname = getattr(self, attr)
+            if not (self.path_full / fname).exists():
+                msg = f"Unvalid {attr}. {self.path_full / fname} does not exist"
+                return msg if return_reason else False
+        return "" if return_reason else True
diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py
index c154f96..2da759e 100644
--- a/flexiznam/schema/datasets.py
+++ b/flexiznam/schema/datasets.py
@@ -1,9 +1,7 @@
 import pathlib
 from datetime import datetime
 from pathlib import Path, PurePosixPath
-import numpy as np
 import pandas as pd
-from flexilims.utils import check_flexilims_validity
 import flexiznam as flz
 from flexiznam import utils
 from flexiznam.errors import FlexilimsError, DatasetError
@@ -133,30 +131,39 @@ def from_origin(
         base_name=None,
         conflicts=None,
         flexilims_session=None,
+        extra_attributes=None,
+        ignore_attributes=(),
+        verbose=False,
     ):
         """Creates a dataset of a given type as a child of a parent entity
 
+        This function will create a dataset with a unique name based on the origin name
+        and the dataset type. If a dataset of this type already exists, the behaviour is
+        defined by the `conflicts` argument. If `extra_attributes` is provided, only
+        consider datasets that have the exact same extra_attributes when resolving
+        conflicts.
+
+
         Args:
             project (str): Name of the project or hexadecimal project_id
             origin_type (str): sample type of the origin
-            origin_id (str): hexadecimal ID of the origin. This or origin_name must be provided
+            origin_id (str): hexadecimal ID of the origin. This or origin_name must be
+                provided
             origin_name (str): name of the origin. This or origin_id must be provided
-            dataset_type (str): type of dataset to create. Must be defined in the config file
+            dataset_type (str): type of dataset to create. Must be defined in the config
+                file
             base_name (str): How is this dataset name? Use dataset_type if base_name is
                              None (default)
-            conflicts (str): What to do if a dataset of this type already exists
-                as a child of the parent entity?
-
-                `append`
-                    Create a new dataset with a new name and path
-                `abort` or None
-                    Through a :py:class:`flexiznam.errors.NameNotUniqueError` and
-                    exit
-                `skip` or `overwrite`
-                    Return a Dataset corresponding to the existing entry if there
-                    is exactly one existing entry, otherwise through a
-                    :py:class:`flexiznam.errors.NameNotUniqueError`
-            flexilims_session (:py:class:`flexilims.Flexilims`): authentication session to connect to flexilims
+            conflicts (str): How to resolve conflicts? One of `abort`, `skip`, `append`,
+                `overwrite`. Default is `abort`
+            flexilims_session (:py:class:`flexilims.Flexilims`): authentication session
+                to connect to flexilims
+            extra_attributes (dict): additional arguments. If provided, change the
+                `conflicts` behaviour to consider only datasets that have the exact
+                same extra_attributes.
+            ignore_attributes (list): list of arguments to ignore when comparing datasets
+                for conflicts resolution. Used only if `extra_attributes` is provided.
+            verbose (bool): print debug information
 
         Returns:
             :py:class:`flexiznam.schema.datasets.Dataset`: a dataset object (WITHOUT updating flexilims)
@@ -185,10 +192,36 @@ def from_origin(
         )
         if len(processed):
             processed = processed[
-                [g[-1].startswith(base_name) for g in processed.genealogy]
+                [g[-1].startswith(base_name + "_") for g in processed.genealogy]
             ]
+
+        # If extra_attributes is provided, only consider datasets that have the exact
+        # same extra_attributes
+        if extra_attributes is not None:
+            valid_processed = []
+            to_compare = utils.clean_recursively(
+                extra_attributes.copy(), keys=ignore_attributes
+            )
+            for _, proc in processed.iterrows():
+                online = Dataset._format_series_to_kwargs(proc)["extra_attributes"]
+                online = utils.clean_recursively(online, keys=ignore_attributes)
+                differences = utils.compare_dictionaries_recursively(to_compare, online)
+                if not differences:
+                    valid_processed.append(proc)
+        else:
+            valid_processed = [ser for _, ser in processed.iterrows()]
+
         already_processed = len(processed) > 0
-        if (not already_processed) or (conflicts == "append"):
+
+        def _create_new_ds(
+            origin,
+            base_name,
+            project,
+            flexilims_session,
+            dataset_type,
+            extra_attributes,
+        ):
+            """Inner function to create a new dataset object"""
             dataset_root = "%s_%s" % (origin["name"], base_name)
             dataset_name = flz.generate_name(
                 "dataset",
@@ -208,23 +241,82 @@ def from_origin(
                 project=project,
                 origin_id=origin["id"],
                 flexilims_session=flexilims_session,
+                extra_attributes=extra_attributes,
             )
-        else:
-            if (conflicts is None) or (conflicts == "abort"):
-                raise flz.errors.DatasetError(
-                    f"Dataset(s) of type {dataset_type} already exist(s):"
-                    + f" {processed.loc[:, 'name']}"
+
+        # CONFLICTS RESOLUTION
+        # There are no datasets, create one
+        if not already_processed:
+            if verbose:
+                print("No datasets of type %s found. Creating new" % dataset_type)
+            return _create_new_ds(
+                origin,
+                base_name,
+                project,
+                flexilims_session,
+                dataset_type,
+                extra_attributes,
+            )
+        # There are some datasets of this type already online and we abort
+        if (conflicts is None) or (conflicts == "abort"):
+            raise flz.errors.DatasetError(
+                f"Dataset(s) of type {dataset_type} already exist(s):"
+                + f" {processed.loc[:, 'name']}"
+            )
+        # Three cases left: skip, append, overwrite
+        if conflicts == "overwrite":
+            # If overwrite, ensure there is only one dataset of this type as we
+            # won't be able to guess which one should be replaced
+            if len(valid_processed) == 1:
+                if verbose:
+                    print("Overwriting dataset %s" % valid_processed[0].name)
+                dataset = Dataset.from_dataseries(dataseries=valid_processed[0])
+                dataset.extra_attributes = extra_attributes
+                return dataset
+            if len(processed) == 1:
+                if verbose:
+                    print("Overwriting dataset %s" % processed.iloc[0].name)
+                dataset = Dataset.from_dataseries(dataseries=processed.iloc[0])
+                dataset.extra_attributes = extra_attributes
+                return dataset
+            raise flz.errors.NameNotUniqueError(
+                f"Multiple datasets of type {dataset_type} already exist(s):"
+                + f" {processed.loc[:, 'name']}"
+            )
+        if conflicts == "skip":
+            # If skip and we have an exact match, return it
+            if len(valid_processed) == 1:
+                if verbose:
+                    print("Skip. Returning dataset %s" % valid_processed[0].name)
+                return Dataset.from_dataseries(dataseries=valid_processed[0])
+            # If there is no match, create a new dataset
+            if len(valid_processed) == 0:
+                if verbose:
+                    print("No matching dataset found. Creating new dataset")
+                return _create_new_ds(
+                    origin,
+                    base_name,
+                    project,
+                    flexilims_session,
+                    dataset_type,
+                    extra_attributes,
                 )
-            elif conflicts == "skip" or conflicts == "overwrite":
-                if len(processed) == 1:
-                    return Dataset.from_dataseries(dataseries=processed.iloc[0])
-                else:
-                    raise flz.errors.NameNotUniqueError(
-                        "{} {} datasets with name starting by {} exists for {}, "
-                        "which one to return?".format(
-                            len(processed), dataset_type, base_name, origin["name"]
-                        )
-                    )
+            raise flz.errors.NameNotUniqueError(
+                f"Multiple datasets of type {dataset_type} already exist(s):"
+                + f" {processed.loc[:, 'name']}"
+            )
+        if conflicts == "append":
+            # Create a new dataset
+            if verbose:
+                print("Appending dataset")
+            return _create_new_ds(
+                origin,
+                base_name,
+                project,
+                flexilims_session,
+                dataset_type,
+                extra_attributes,
+            )
 
     @staticmethod
     def _format_series_to_kwargs(flm_series):
@@ -328,13 +420,16 @@ def __init__(
         elif project_id is not None:
             self.project_id = project_id
 
-    def is_valid(self):
-        """
-        Dummy method definition. Should be reimplemented in children classes
+    def is_valid(self, return_reason=False):
+        """Check if the file path is valid for this dataset
 
+        Should be reimplemented in children classes.
         Should return True if the dataset is found a valid, false otherwise
         """
-        raise NotImplementedError("`is_valid` is not defined for generic datasets")
+        if not self.path_full.exists():
+            msg = f"Path {self.path_full} does not exist"
+            return msg if return_reason else False
+        return "" if return_reason else True
 
     def associated_files(self, folder=None):
         """Give a list of all files associated with this dataset
@@ -431,10 +526,9 @@ def update_flexilims(self, mode="safe"):
             dataset_type=self.dataset_type,
             created=self.created,
             path=str(PurePosixPath(self.path)),
-            genealogy=self.genealogy,
             is_raw="yes" if self.is_raw else "no",
             project_id=self.project_id,
-            dataset_name=self.full_name,
+            dataset_name=self.dataset_name,
             attributes=attributes,
             flexilims_session=self.flexilims_session,
             conflicts="abort",
@@ -596,7 +690,7 @@ def flexilims_session(self, value):
         self._flexilims_session = value
         if value is None:
             return
-        if hasattr(value, "project_id"):
+        if hasattr(value, "project_id") and (value.project_id is not None):
             if self.project_id is None:
                 self.project_id = value.project_id
             elif self.project_id != value.project_id:
diff --git a/flexiznam/schema/harp_data.py b/flexiznam/schema/harp_data.py
index 70a9f4f..9a53b76 100644
--- a/flexiznam/schema/harp_data.py
+++ b/flexiznam/schema/harp_data.py
@@ -161,11 +161,18 @@ def csv_files(self):
     def csv_files(self, value):
         self.extra_attributes["csv_files"] = str(value)
 
-    def is_valid(self):
-        """Check that video, metadata and timestamps files exist"""
-        if not (pathlib.Path(self.path) / self.binary_file).exists():
-            return False
+    def is_valid(self, return_reason=False):
+        """Check that video, metadata and timestamps files exist
+
+        Args:
+            return_reason (bool): if True, return a string with the reason why the
+                                  dataset is not valid
+        Returns:"""
+        if not (self.path_full / self.binary_file).exists():
+            msg = f"Missing file {self.binary_file}"
+            return msg if return_reason else False
         for _, file_path in self.csv_files.items():
-            if not (pathlib.Path(self.path) / file_path).exists():
-                return False
-        return True
+            if not (self.path_full / file_path).exists():
+                msg = f"Missing file {file_path}"
+                return msg if return_reason else False
+        return "" if return_reason else True
diff --git a/flexiznam/schema/microscopy_data.py b/flexiznam/schema/microscopy_data.py
index dc0c44b..ab7d99c 100644
--- a/flexiznam/schema/microscopy_data.py
+++ b/flexiznam/schema/microscopy_data.py
@@ -140,8 +140,14 @@ def __init__(
             flexilims_session=flexilims_session,
         )
 
-    def is_valid(self):
-        """Check that the file exist"""
-        if not (pathlib.Path(self.path)).exists():
-            return False
-        return True
+    def is_valid(self, return_reason=False):
+        """Check that file exist
+
+        Args:
+            return_reason (bool): if True, return a string with the reason why the
+                                  dataset is not valid
+        Returns:"""
+        if not self.path_full.exists():
+            msg = f"{self.path_full} does not exist"
+            return msg if return_reason else False
+        return "" if return_reason else True
diff --git a/flexiznam/schema/onix_data.py b/flexiznam/schema/onix_data.py
index 57c59b6..1d34ec1 100644
--- a/flexiznam/schema/onix_data.py
+++ b/flexiznam/schema/onix_data.py
@@ -79,31 +79,34 @@ def from_folder(
 
         data = pd.DataFrame(data)
         output = dict()
-        for ts, df in data.groupby("timestamp"):
-            if (
-                enforce_validity
-                and ("rhd2164" not in df.device_name.values)
-                or ("breakout" not in df.device_name.values)
-            ):
-                if verbose:
-                    print(
-                        "Skipping partial onix dataset %s"
-                        % ts.strftime("%Y-%m-%d_%H_%M_%S")
-                    )
-                continue
-            onix_name = "onix_data_%s" % ts.strftime("%Y-%m-%d_%H_%M_%S")
-            extra_attributes = dict()
-            for device, dev_df in df.groupby("device_name"):
-                extra_attributes[device] = {s.subname: s.file for s in dev_df.itertuples()}
-            output[onix_name] = OnixData(
-                path=folder,
-                genealogy=folder_genealogy + (onix_name,),
-                extra_attributes=extra_attributes,
-                created=ts.strftime("%Y-%m-%d " "%H:%M:%S"),
-                flexilims_session=flexilims_session,
-                project=project,
-                is_raw=is_raw,
-            )
+        if max(data.timestamp - data.timestamp.min()).total_seconds() > 2:
+            raise IOError(f"Multiple timestamps found in folder {folder}")
+
+        ts = data.timestamp.min()
+        if (
+            enforce_validity
+            and ("rhd2164" not in data.device_name.values)
+            or ("breakout" not in data.device_name.values)
+        ):
+            if verbose:
+                print(
+                    "Skipping partial onix dataset %s"
+                    % ts.strftime("%Y-%m-%d_%H_%M_%S")
+                )
+            return
+        onix_name = "onix_data_%s" % ts.strftime("%Y-%m-%d_%H_%M_%S")
+        extra_attributes = dict()
+        for device, dev_df in data.groupby("device_name"):
+            extra_attributes[device] = {s.subname: s.file for s in dev_df.itertuples()}
+        output[onix_name] = OnixData(
+            path=folder,
+            genealogy=folder_genealogy + (onix_name,),
+            extra_attributes=extra_attributes,
+            created=ts.strftime("%Y-%m-%d " "%H:%M:%S"),
+            flexilims_session=flexilims_session,
+            project=project,
+            is_raw=is_raw,
+        )
         return output
 
     def __init__(
@@ -150,3 +153,30 @@ def __init__(
             id=id,
             flexilims_session=flexilims_session,
         )
+
+    def is_valid(self, return_reason=False):
+        """Check that the onix dataset is valid
+
+        Args:
+            return_reason (bool): if True, return a string with the reason why the
+                dataset is not valid. If False, return True or False
+
+        Returns:
+            bool or str: True if valid, False if not. If return_reason is True, return
+                a string with the reason why the dataset is not valid."""
+
+        ndevices = 0
+        for device_name in OnixData.DEVICE_NAMES:
+            if device_name not in self.extra_attributes:
+                continue
+            ndevices += 1
+            dev_dict = self.extra_attributes[device_name]
+            for v in dev_dict.values():
+                p = self.path_full / v
+                if not p.exists():
+                    msg = f"File {p} does not exist"
+                    return msg if return_reason else False
+        if ndevices == 0:
+            msg = "No devices found"
+            return msg if return_reason else False
+        return "" if return_reason else True
diff --git a/flexiznam/schema/scanimage_data.py b/flexiznam/schema/scanimage_data.py
index 7c3ab94..9468890 100644
--- a/flexiznam/schema/scanimage_data.py
+++ b/flexiznam/schema/scanimage_data.py
@@ -233,21 +233,23 @@ def tif_files(self, value):
             )
         self.extra_attributes["tif_files"] = value
 
-    def is_valid(self, tif_files=None):
+    def is_valid(self, return_reason=False, tif_files=None):
         """Check that associated files exist"""
         if tif_files is None:
             tif_files = self.tif_files
         # checking file one by one is long, compare sets
         tif_files = set(tif_files)
         existing_file = {
-            f for f in os.listdir(self.path) if f.endswith(("tif", ".tiff"))
+            f for f in os.listdir(self.path_full) if f.endswith(("tif", ".tiff"))
         }
         if tif_files - existing_file:
-            return False
+            msg = "Some tif files do not exist: %s" % (tif_files - existing_file)
+            return msg if return_reason else False
         for _, file_path in self.csv_files.items():
-            if not (pathlib.Path(self.path) / file_path).exists():
-                return False
-        return True
+            if not (self.path_full / file_path).exists():
+                msg = "Csv file does not exist: %s" % file_path
+                return msg if return_reason else False
+        return "" if return_reason else True
 
     def __len__(self):
         """Number of tif files in the dataset"""
diff --git a/flexiznam/schema/sequencing_data.py b/flexiznam/schema/sequencing_data.py
index 4f4ab0f..ca453b4 100644
--- a/flexiznam/schema/sequencing_data.py
+++ b/flexiznam/schema/sequencing_data.py
@@ -1,7 +1,5 @@
 import datetime
-import os
 import pathlib
-import re
 import warnings
 
 from flexiznam.schema.datasets import Dataset
@@ -126,8 +124,14 @@ def __init__(
             project_id=project_id,
         )
 
-    def is_valid(self):
-        """Check that the file exist"""
+    def is_valid(self, return_reason=False):
+        """Check that file exist
+
+        Args:
+            return_reason (bool): if True, return a string with the reason why the
+                                  dataset is not valid
+        Returns:"""
         if not self.path_full.exists():
-            return False
-        return True
+            msg = f"{self.path_full} does not exist"
+            return msg if return_reason else False
+        return "" if return_reason else True
diff --git a/flexiznam/schema/visstim_data.py b/flexiznam/schema/visstim_data.py
new file mode 100644
index 0000000..fe0ff8e
--- /dev/null
+++ b/flexiznam/schema/visstim_data.py
@@ -0,0 +1,137 @@
+import datetime
+import pathlib
+
+from flexiznam.schema.datasets import Dataset
+
+
+class VisStimData(Dataset):
+    DATASET_TYPE = "visstim"
+
+    @classmethod
+    def from_folder(
+        cls,
+        folder,
+        folder_genealogy=None,
+        is_raw=None,
+        verbose=True,
+        flexilims_session=None,
+        project=None,
+    ):
+        """Create a visual stimulation dataset by loading info from folder
+
+        A visual stimulation dataset is a folder containing at least a `FrameLog.csv`
+        file and any number of other associated csvs.
+
+        Args:
+            folder (str): path to the folder
+            folder_genealogy (tuple): genealogy of the folder, if None assume that
+                                      the genealogy is just (folder,), i.e. no parents
+            is_raw (bool): does this folder contain raw data?
+            verbose (bool=True): print info about what is found
+            flexilims_session (flm.Session): session to interact with flexilims
+            project (str): project ID or name
+
+        Returns:
+            dict of dataset (flz.schema.harp_data.HarpData)
+        """
+
+        csv_files = list(pathlib.Path(folder).glob("*.csv"))
+
+        fnames = [f.name for f in csv_files]
+        if "framelog.csv" not in [f.lower() for f in fnames]:
+            raise IOError("Cannot find FrameLog.csv file")
+
+        log_file = [f for f in csv_files if f.name.lower() == "framelog.csv"][0]
+        if verbose:
+            print(f"Found FrameLog.csv file: {log_file}")
+
+        if folder_genealogy is None:
+            folder_genealogy = (pathlib.Path(folder).stem,)
+        elif isinstance(folder_genealogy, list):
+            folder_genealogy = tuple(folder_genealogy)
+        output = {}
+        extra_attributes = dict(csv_files={f.stem: f.name for f in csv_files})
+        genealogy = folder_genealogy + ("visstim",)
+        created = datetime.datetime.fromtimestamp(log_file.stat().st_mtime)
+        output["visstim"] = VisStimData(
+            genealogy=genealogy,
+            is_raw=is_raw,
+            path=folder,
+            extra_attributes=extra_attributes,
+            created=created.strftime("%Y-%m-%d %H:%M:%S"),
+            flexilims_session=flexilims_session,
+            project=project,
+        )
+        return output
+
+    def __init__(
+        self,
+        path,
+        is_raw=None,
+        genealogy=None,
+        extra_attributes=None,
+        created=None,
+        project=None,
+        project_id=None,
+        origin_id=None,
+        id=None,
+        flexilims_session=None,
+    ):
+        """Create a VisStim dataset
+
+        Args:
+            path: folder containing the dataset or path to file (valid only for single
+                  file datasets)
+            is_raw: bool, used to sort in raw and processed subfolders
+            genealogy (tuple): parents of this dataset from the project (excluded) down to
+                               the dataset name itself (included)
+            extra_attributes: dict, optional attributes.
+            created: Creation date, in "YYYY-MM-DD HH:mm:SS"
+            project: name of the project. Must be in config, can be guessed from
+                     project_id
+            project_id: hexadecimal code for the project. Must be in config, can be
+                        guessed from project
+            origin_id: hexadecimal code for the origin on flexilims.
+            id: hexadecimal code for the dataset on flexilims.
+            flexilims_session: authentication session to connect to flexilims
+
+        Expected extra_attributes:
+            csv_files (optional): Dictionary of csv files associated to the binary file.
+                                  Keys are identifier provided for convenience,
+                                  values are the full file name
+        """
+
+        super().__init__(
+            genealogy=genealogy,
+            path=path,
+            is_raw=is_raw,
+            dataset_type=VisStimData.DATASET_TYPE,
+            extra_attributes=extra_attributes,
+            created=created,
+            project=project,
+            project_id=project_id,
+            origin_id=origin_id,
+            id=id,
+            flexilims_session=flexilims_session,
+        )
+
+    @property
+    def csv_files(self):
+        return self.extra_attributes.get("csv_files", None)
+
+    @csv_files.setter
+    def csv_files(self, value):
+        self.extra_attributes["csv_files"] = str(value)
+
+    def is_valid(self, return_reason=False):
+        """Check that all csv files exist
+
+        Args:
+            return_reason (bool): if True, return a string with the reason why the
+                                  dataset is not valid
+        Returns:"""
+        for _, file_path in self.csv_files.items():
+            if not (self.path_full / file_path).exists():
+                msg = f"Missing file {file_path}"
+                return msg if return_reason else False
+        return "" if return_reason else True
diff --git a/notebooks/01-Setup.ipynb b/notebooks/01-Setup.ipynb
index ad0fdc0..f749f7d 100644
--- a/notebooks/01-Setup.ipynb
+++ b/notebooks/01-Setup.ipynb
@@ -139,9 +139,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "!cat ~/.flexiznam/secret_password.yml"
@@ -172,9 +170,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
+   "language": "python"
   },
   "language_info": {
    "codemirror_mode": {
@@ -185,8 +181,7 @@
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "pygments_lexer": "ipython3"
   },
   "toc": {
    "base_numbering": 1,
diff --git a/notebooks/02-Add Data.ipynb b/notebooks/02-Add Data.ipynb
index 2b8524f..3b9e85d 100644
--- a/notebooks/02-Add Data.ipynb	
+++ b/notebooks/02-Add Data.ipynb	
@@ -241,9 +241,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
+   "language": "python"
   },
   "language_info": {
    "codemirror_mode": {
@@ -254,8 +252,7 @@
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "pygments_lexer": "ipython3"
   },
   "toc": {
    "base_numbering": 1,
diff --git a/notebooks/03-Using the database.ipynb b/notebooks/03-Using the database.ipynb
index d839c14..ad48e98 100644
--- a/notebooks/03-Using the database.ipynb	
+++ b/notebooks/03-Using the database.ipynb	
@@ -290,9 +290,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
+   "language": "python"
   },
   "language_info": {
    "codemirror_mode": {
@@ -303,8 +301,7 @@
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "pygments_lexer": "ipython3"
   },
   "toc": {
    "base_numbering": 1,
diff --git a/requirements.txt b/requirements.txt
index 21b31d0..ad16b0d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,12 +1,12 @@
-pytest
-setuptools
-pandas
-webbot
+black
 click
 git+ssh://git@github.com/znamlab/flexilims.git#egg=flexilims
+pandas
+pytest
 pyyaml
-tifffile
+setuptools
 sphinx
-sphinx-rtd-theme
 sphinx-click
-black
+sphinx-rtd-theme
+tifffile
+webbot
diff --git a/setup.py b/setup.py
index 6644af0..cfcb3e9 100755
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name="flexiznam",
-    version="v0.3.11",
+    version="v0.4",
     url="https://github.com/znamlab/flexznam",
     license="MIT",
     author="Antonin Blot",
@@ -19,6 +19,7 @@
         "flexilims @ git+ssh://git@github.com/znamlab/flexilims.git#egg=flexilims",
         "pymcms @ git+ssh://git@github.com/znamlab/pymcms.git#egg=pymcms",
         "tifffile",
+        "ttkwidgets",
     ],
     entry_points="""
         [console_scripts]
diff --git a/tests/ReadMe.md b/tests/ReadMe.md
index edaf613..10036bd 100644
--- a/tests/ReadMe.md
+++ b/tests/ReadMe.md
@@ -4,37 +4,37 @@
 
 Tests are separated in two:
 
-- Main use cases found in the main test folder 
+- Main use cases found in the main test folder
 - Test of individual components found in `test_components`
- 
-The `test_components` should cover most of the code but are not user friendly. The 
+
+The `test_components` should cover most of the code but are not user friendly. The
 main use cases are example scripts that could be use for a real experiment.
 
 ## Data
 
-Example datasets are available in the 
+Example datasets are available in the
 raw data folder on camp `data/instruments/raw_data/projects/demo_project/`.
 A corresponding preprocessed folder is also used by tests.
 
 ## Notes:
 
 ### MCMS
-To test the MCMS part, you need a graphical interface and a browser. It is also 
+To test the MCMS part, you need a graphical interface and a browser. It is also
 particularly slow.
 
-To avoid having to run it every time, the tests are marked as slow and require the 
+To avoid having to run it every time, the tests are marked as slow and require the
 `--runslow` flag to be executed. This is False by default
 
 ### Flexilims
-For interaction with flexilims, you need to be connected via the crick network 
-(vpn or from the crick). Neither is easily doable on github workflow. Furthermore 
-flexilims does not have an API to delete entries. You will have clean it manually 
+For interaction with flexilims, you need to be connected via the crick network
+(vpn or from the crick). Neither is easily doable on github workflow. Furthermore
+flexilims does not have an API to delete entries. You will have clean it manually
 before running the tests
 
-To make things simpler, the tests requiring flexilims or mcms are marked as integration 
+To make things simpler, the tests requiring flexilims or mcms are marked as integration
 tests. They can be skipped by running `pytest -m "not integtest"`.
 
-To test the upload to flexilims properly, you need to clear flexilims yourself 
-(as there is no API to delete stuff). There should be a flag `FLM_IS_WIPED` at 
+To test the upload to flexilims properly, you need to clear flexilims yourself
+(as there is no API to delete stuff). There should be a flag `FLM_IS_WIPED` at
 the beginning of each test file. If set to `False` (default), then tests involving
 flexilims will run with `conflicts=skip`.
diff --git a/tests/test-results/pytest_in_tests.xml b/tests/test-results/pytest_in_tests.xml
index e3838f1..d3c28e6 100644
--- a/tests/test-results/pytest_in_tests.xml
+++ b/tests/test-results/pytest_in_tests.xml
@@ -1 +1 @@
-<?xml version="1.0" encoding="utf-8"?><testsuites><testsuite name="pytest" errors="0" failures="0" skipped="0" tests="54" time="678.628" timestamp="2023-07-17T23:11:47.609985" hostname="FM70R4W0XG"><testcase classname="tests.test_2p" name="test_create_yaml" time="9.791" /><testcase classname="tests.test_2p" name="test_parse_yaml" time="4.791" /><testcase classname="tests.test_2p" name="test_flm" time="21.164" /><testcase classname="tests.test_barseq" name="test_create_yaml" time="2.402" /><testcase classname="tests.test_barseq" name="test_parse_yaml" time="11.728" /><testcase classname="tests.test_barseq" name="test_flm" time="38.502" /><testcase classname="tests.test_components.test_cli" name="test_config" time="0.264" /><testcase classname="tests.test_components.test_cli" name="test_add_password" time="0.028" /><testcase classname="tests.test_components.test_cli" name="test_create_yaml" time="0.801" /><testcase classname="tests.test_components.test_cli" name="test_make_full_yaml" time="10.921" /><testcase classname="tests.test_components.test_cli" name="test_upload" time="28.840" /><testcase classname="tests.test_components.test_cli" name="test_flm_issues" time="64.297" /><testcase classname="tests.test_components.test_main" name="test_get_path" time="0.212" /><testcase classname="tests.test_components.test_main" name="test_get_flexilims_session" time="0.276" /><testcase classname="tests.test_components.test_main" name="test_format_results" time="0.021" /><testcase classname="tests.test_components.test_main" name="test_get_experimental_sessions" time="0.709" /><testcase classname="tests.test_components.test_main" name="test_get_entities" time="1.006" /><testcase classname="tests.test_components.test_main" name="test_get_entity" time="0.073" /><testcase classname="tests.test_components.test_main" name="test_get_mouse_id" time="0.315" /><testcase classname="tests.test_components.test_main" name="test_get_datasets" time="11.690" /><testcase classname="tests.test_components.test_main" name="test_get_datasets_recursively" time="17.020" /><testcase classname="tests.test_components.test_main" name="test_add_mouse" time="4.284" /><testcase classname="tests.test_components.test_main" name="test_generate_name" time="3.180" /><testcase classname="tests.test_components.test_main" name="test_get_children" time="5.837" /><testcase classname="tests.test_components.test_main" name="test_add_entity" time="1.935" /><testcase classname="tests.test_components.test_main" name="test_update_entity" time="5.820" /><testcase classname="tests.test_components.test_mcms" name="test_get_mouse_df" time="0.676" /><testcase classname="tests.test_components.test_mcms" name="test_get_procedures" time="0.558" /><testcase classname="tests.test_components.test_utils" name="test_create_config" time="0.060" /><testcase classname="tests.test_components.test_utils" name="test_update_config" time="0.364" /><testcase classname="tests.test_components.test_utils" name="test_passwd_creation" time="0.063" /><testcase classname="tests.test_components.test_utils" name="test_check_flexilims_paths" time="72.021" /><testcase classname="tests.test_components.test_utils" name="test_check_flexilims_names" time="70.026" /><testcase classname="tests.test_components.test_utils" name="test_add_genealogy" time="149.076" /><testcase classname="tests.test_components.test_utils" name="test_clean_recursively" time="0.010" /><testcase classname="tests.test_components.test_utils" name="test_add_missing_paths" time="31.141" /><testcase classname="tests.test_components.test_utils" name="test_check_attribute" time="67.307" /><testcase classname="tests.test_components.tests_schema.test_camera_data" name="test_create_directly" time="0.009" /><testcase classname="tests.test_components.tests_schema.test_camera_data" name="test_create_from_folder" time="1.188" /><testcase classname="tests.test_components.tests_schema.test_camera_data" name="test_create_from_flexilims" time="1.107" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset" time="0.016" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_constructor" time="0.021" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset_flexilims_integration" time="0.900" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_from_flexilims" time="1.103" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_from_dataseries" time="0.018" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_from_origin" time="27.100" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_update_flexilims" time="7.381" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset_paths" time="0.245" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_project_project_id" time="0.013" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset_type_enforcer" time="0.011" /><testcase classname="tests.test_components.tests_schema.test_harp" name="test_harp" time="0.762" /><testcase classname="tests.test_components.tests_schema.test_microscopy_data" name="test_from_folder" time="0.107" /><testcase classname="tests.test_components.tests_schema.test_scanimage_data" name="test_scanimage" time="0.565" /><testcase classname="tests.test_components.tests_schema.test_sequencing_data" name="test_from_folder" time="0.628" /></testsuite></testsuites>
\ No newline at end of file
+<?xml version="1.0" encoding="utf-8"?><testsuites><testsuite name="pytest" errors="0" failures="0" skipped="0" tests="54" time="678.628" timestamp="2023-07-17T23:11:47.609985" hostname="FM70R4W0XG"><testcase classname="tests.test_2p" name="test_create_yaml" time="9.791" /><testcase classname="tests.test_2p" name="test_parse_yaml" time="4.791" /><testcase classname="tests.test_2p" name="test_flm" time="21.164" /><testcase classname="tests.test_barseq" name="test_create_yaml" time="2.402" /><testcase classname="tests.test_barseq" name="test_parse_yaml" time="11.728" /><testcase classname="tests.test_barseq" name="test_flm" time="38.502" /><testcase classname="tests.test_components.test_cli" name="test_config" time="0.264" /><testcase classname="tests.test_components.test_cli" name="test_add_password" time="0.028" /><testcase classname="tests.test_components.test_cli" name="test_create_yaml" time="0.801" /><testcase classname="tests.test_components.test_cli" name="test_make_full_yaml" time="10.921" /><testcase classname="tests.test_components.test_cli" name="test_upload" time="28.840" /><testcase classname="tests.test_components.test_cli" name="test_flm_issues" time="64.297" /><testcase classname="tests.test_components.test_main" name="test_get_path" time="0.212" /><testcase classname="tests.test_components.test_main" name="test_get_flexilims_session" time="0.276" /><testcase classname="tests.test_components.test_main" name="test_format_results" time="0.021" /><testcase classname="tests.test_components.test_main" name="test_get_experimental_sessions" time="0.709" /><testcase classname="tests.test_components.test_main" name="test_get_entities" time="1.006" /><testcase classname="tests.test_components.test_main" name="test_get_entity" time="0.073" /><testcase classname="tests.test_components.test_main" name="test_get_mouse_id" time="0.315" /><testcase classname="tests.test_components.test_main" name="test_get_datasets" time="11.690" /><testcase classname="tests.test_components.test_main" name="test_get_datasets_recursively" time="17.020" /><testcase classname="tests.test_components.test_main" name="test_add_mouse" time="4.284" /><testcase classname="tests.test_components.test_main" name="test_generate_name" time="3.180" /><testcase classname="tests.test_components.test_main" name="test_get_children" time="5.837" /><testcase classname="tests.test_components.test_main" name="test_add_entity" time="1.935" /><testcase classname="tests.test_components.test_main" name="test_update_entity" time="5.820" /><testcase classname="tests.test_components.test_mcms" name="test_get_mouse_df" time="0.676" /><testcase classname="tests.test_components.test_mcms" name="test_get_procedures" time="0.558" /><testcase classname="tests.test_components.test_utils" name="test_create_config" time="0.060" /><testcase classname="tests.test_components.test_utils" name="test_update_config" time="0.364" /><testcase classname="tests.test_components.test_utils" name="test_passwd_creation" time="0.063" /><testcase classname="tests.test_components.test_utils" name="test_check_flexilims_paths" time="72.021" /><testcase classname="tests.test_components.test_utils" name="test_check_flexilims_names" time="70.026" /><testcase classname="tests.test_components.test_utils" name="test_add_genealogy" time="149.076" /><testcase classname="tests.test_components.test_utils" name="test_clean_recursively" time="0.010" /><testcase classname="tests.test_components.test_utils" name="test_add_missing_paths" time="31.141" /><testcase classname="tests.test_components.test_utils" name="test_check_attribute" time="67.307" /><testcase classname="tests.test_components.tests_schema.test_camera_data" name="test_create_directly" time="0.009" /><testcase classname="tests.test_components.tests_schema.test_camera_data" name="test_create_from_folder" time="1.188" /><testcase classname="tests.test_components.tests_schema.test_camera_data" name="test_create_from_flexilims" time="1.107" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset" time="0.016" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_constructor" time="0.021" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset_flexilims_integration" time="0.900" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_from_flexilims" time="1.103" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_from_dataseries" time="0.018" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_from_origin" time="27.100" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_update_flexilims" time="7.381" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset_paths" time="0.245" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_project_project_id" time="0.013" /><testcase classname="tests.test_components.tests_schema.test_datasets" name="test_dataset_type_enforcer" time="0.011" /><testcase classname="tests.test_components.tests_schema.test_harp" name="test_harp" time="0.762" /><testcase classname="tests.test_components.tests_schema.test_microscopy_data" name="test_from_folder" time="0.107" /><testcase classname="tests.test_components.tests_schema.test_scanimage_data" name="test_scanimage" time="0.565" /><testcase classname="tests.test_components.tests_schema.test_sequencing_data" name="test_from_folder" time="0.628" /></testsuite></testsuites>
diff --git a/tests/test_2p.py b/tests/test_2p.py
index bcf476c..01dbd1d 100644
--- a/tests/test_2p.py
+++ b/tests/test_2p.py
@@ -20,7 +20,6 @@
     TEST_PROJECT,
 )
 import flexiznam as fzn
-from flexiznam import camp
 
 MOUSE = "mouse_physio_2p"
 SESSION = "S20211102"
diff --git a/tests/test_barseq.py b/tests/test_barseq.py
index 7863fca..fd34425 100644
--- a/tests/test_barseq.py
+++ b/tests/test_barseq.py
@@ -17,7 +17,6 @@
     TEST_PROJECT,
 )
 import flexiznam as fzn
-from flexiznam import camp
 
 MOUSE = "mouse_barseq"
 YAML = "yaml_automatic_skeleton.yml"
diff --git a/tests/test_components/test_cli.py b/tests/test_components/test_cli.py
index 2dc0ccb..a1e5f36 100644
--- a/tests/test_components/test_cli.py
+++ b/tests/test_components/test_cli.py
@@ -1,4 +1,3 @@
-import pytest
 import pathlib
 import yaml
 from click.testing import CliRunner
diff --git a/tests/test_components/test_main.py b/tests/test_components/test_main.py
index 468ea68..bbd4927 100644
--- a/tests/test_components/test_main.py
+++ b/tests/test_components/test_main.py
@@ -6,7 +6,7 @@
 import pytest
 import flexiznam as flz
 import yaml
-from flexiznam.config import PARAMETERS, get_password
+from flexiznam.config import PARAMETERS
 from flexiznam.errors import FlexilimsError, NameNotUniqueError
 from tests.tests_resources.data_for_testing import MOUSE_ID, SESSION
 
diff --git a/tests/test_components/test_utils.py b/tests/test_components/test_utils.py
index 9384073..1a37254 100644
--- a/tests/test_components/test_utils.py
+++ b/tests/test_components/test_utils.py
@@ -2,7 +2,6 @@
 import pytest
 import numpy as np
 from pathlib import Path
-import pandas as pd
 import tempfile
 from flexiznam.config import config_tools, DEFAULT_CONFIG
 from flexiznam import utils
diff --git a/tests/test_components/tests_schema/test_camera_data.py b/tests/test_components/tests_schema/test_camera_data.py
index 2c3a3d8..c691996 100644
--- a/tests/test_components/tests_schema/test_camera_data.py
+++ b/tests/test_components/tests_schema/test_camera_data.py
@@ -1,4 +1,3 @@
-import pytest
 from flexiznam.schema.camera_data import CameraData
 from flexiznam.schema.datasets import Dataset
 from tests.tests_resources.data_for_testing import DATA_ROOT, TEST_PROJECT
diff --git a/tests/test_components/tests_schema/test_harp.py b/tests/test_components/tests_schema/test_harp.py
index e31679f..2a0574a 100644
--- a/tests/test_components/tests_schema/test_harp.py
+++ b/tests/test_components/tests_schema/test_harp.py
@@ -1,4 +1,3 @@
-import pytest
 from flexiznam.schema.harp_data import HarpData
 from tests.tests_resources.data_for_testing import DATA_ROOT
 
diff --git a/tests/test_components/tests_schema/test_microscopy_data.py b/tests/test_components/tests_schema/test_microscopy_data.py
index 2eeb05d..4f3a6b1 100644
--- a/tests/test_components/tests_schema/test_microscopy_data.py
+++ b/tests/test_components/tests_schema/test_microscopy_data.py
@@ -1,4 +1,3 @@
-import pytest
 from flexiznam.schema.microscopy_data import MicroscopyData
 from tests.tests_resources.data_for_testing import DATA_ROOT
 
diff --git a/tests/test_components/tests_schema/test_scanimage_data.py b/tests/test_components/tests_schema/test_scanimage_data.py
index 675190a..eb07c24 100644
--- a/tests/test_components/tests_schema/test_scanimage_data.py
+++ b/tests/test_components/tests_schema/test_scanimage_data.py
@@ -1,4 +1,3 @@
-import pytest
 from flexiznam.schema.scanimage_data import ScanimageData
 from tests.tests_resources.data_for_testing import DATA_ROOT
 
diff --git a/tests/test_components/tests_schema/test_sequencing_data.py b/tests/test_components/tests_schema/test_sequencing_data.py
index e066926..d129f59 100644
--- a/tests/test_components/tests_schema/test_sequencing_data.py
+++ b/tests/test_components/tests_schema/test_sequencing_data.py
@@ -1,6 +1,5 @@
-import pytest
 from flexiznam.schema.sequencing_data import SequencingData
-from tests.tests_resources.data_for_testing import DATA_ROOT, PROJECT_ID
+from tests.tests_resources.data_for_testing import DATA_ROOT
 
 # Test creation of all dataset types.
 #
diff --git a/tests/test_components/tests_schema/test_visstim.py b/tests/test_components/tests_schema/test_visstim.py
new file mode 100644
index 0000000..97b3f4a
--- /dev/null
+++ b/tests/test_components/tests_schema/test_visstim.py
@@ -0,0 +1,23 @@
+from flexiznam.schema.visstim_data import VisStimData
+from tests.tests_resources.data_for_testing import DATA_ROOT
+
+
+def test_vistim():
+    folder_genealogy = ["mouse_onix", "S20230915", "R165222_SpheresPermTubeReward"]
+    data_dir = DATA_ROOT.joinpath(*folder_genealogy)
+    ds = VisStimData.from_folder(data_dir, verbose=False)
+    assert len(ds) == 1
+    ds_name = "visstim"
+    d = ds[ds_name]
+    assert d.full_name == folder_genealogy[-1] + "_" + ds_name
+    d.project = "demo_project"
+    assert d.is_valid()
+    assert len(d.csv_files) == 4
+    ds = VisStimData.from_folder(
+        data_dir, verbose=False, folder_genealogy=folder_genealogy
+    )
+    d = ds[ds_name]
+    d.project = "demo_project"
+    assert d.full_name == "_".join(folder_genealogy + [ds_name])
+    assert d.is_valid()
+    assert len(d.csv_files) == 4
diff --git a/tests/tests_resources/data_for_testing.py b/tests/tests_resources/data_for_testing.py
index dd6bb0a..d43a6ce 100644
--- a/tests/tests_resources/data_for_testing.py
+++ b/tests/tests_resources/data_for_testing.py
@@ -1,11 +1,10 @@
 """A list of file coming from one experiment"""
 from pathlib import Path
-import datetime
 from flexiznam.config import PARAMETERS
 
 
 MOUSE_ID = "6437dcb13ded9c65df142a12"  # actual physio2p mouse
-MOUSE_TEMP = "647a1aec7ddb34517470d3e6" # some random mouse where I can change data
+MOUSE_TEMP = "647a1aec7ddb34517470d3e6"  # some random mouse where I can change data
 TEST_PROJECT = "demo_project"
 PROJECT_ID = "610989f9a651ff0b6237e0f6"
 SESSION = "mouse_physio_2p_S20211102"