From 91211da86379251978da300f463552c1687feac4 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Sat, 27 May 2023 15:35:26 +0100 Subject: [PATCH 01/73] [camp] huge refactor of yaml creation --- flexiznam/camp/sync_data.py | 798 +++++++++--------------------------- 1 file changed, 190 insertions(+), 608 deletions(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index 99a33a3..0cd5faf 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -4,6 +4,7 @@ from pathlib import Path, PurePosixPath import re import copy +import warnings import yaml from yaml.parser import ParserError @@ -14,134 +15,116 @@ from flexiznam.utils import clean_recursively -def create_yaml( +def create_yaml_dict( root_folder, - outfile=None, - project="NOT SPECIFIED", - mouse="NOT SPECIFIED", - overwrite=False, + project, + genealogy, + format_yaml=True, ): - """Automatically create a yaml file skeleton + """Create a yaml dict from a folder - Goes recursively in root folder and create a set of nested structure + Recursively parse a folder and create a yaml dict with the structure of the folder. Args: - root_folder (str or Path): base folder, usually a session but can be a sample - outfile (str or Path): target to write the yaml. Do not write file if `None` - project (str): name of the project - mouse (str): name of the mouse - overwrite (bool): overwrite outfile if it exists. Default False. + root_folder (str): Path to the folder to parse + project (str): Name of the project, used as root of the path in the output + genealogy (list): List of strings with the genealogy of root_folder. If + root_folder is a recording for instance, genealogy should be (mouse, + session). + format_yaml (bool, optional): Format the output to be yaml compatible if True, + otherwise keep dataset as Dataset object and path as pathlib.Path. Defaults + to True. Returns: - yaml_dict (dict): created structure + dict: Dictionary with the structure of the folder and automatically detected + datasets """ - root_folder = pathlib.Path(root_folder) - assert root_folder.is_dir() - assert isinstance(project, str) - assert isinstance(mouse, str) - yaml_dict = dict(project=project, mouse=mouse) - yaml_dict["session"] = None - # check if we were given a session folder - if re.match(r"S\d*", root_folder.stem): - yaml_dict["session"] = root_folder.stem - - _find_yaml_struct(root_folder, yaml_dict) - - if outfile is not None: - if outfile.is_file() and not overwrite: - raise IOError( - "File %s already exists. Use `overwrite` to replace." % outfile - ) - with open(outfile, "w") as writer: - yaml.dump(yaml_dict, writer) - - return yaml_dict - - -def _find_yaml_struct(path, current_dict): - """Parse one level of yaml structure for autogenerating yaml + if isinstance(genealogy, str): + genealogy = [genealogy] + data = _create_yaml_dict( + level_folder=root_folder, + project=project, + genealogy=genealogy, + format_yaml=format_yaml, + parent_dict=dict(), + ) + out = dict(root_folder=root_folder, root_genealogy=genealogy, children=data) + return out - Args: - path: path to the dir to parse - current_dict: current level - Returns: - current_dict (do changes in place) - """ - path = Path(path) - for el in os.listdir(path): - if not (path / el).is_dir(): - continue - # match known recording format - m = re.fullmatch(r"R\d\d\d\d\d\d_?(.*)?", el) - if m: - el_type = "recordings" - protocol = m[1] if m[1] is not None else "PROTOCOL NOT SPECIFIED" - else: - el_type = "samples" - subdict = current_dict.get(el_type, {}) - subdict[el] = dict() - if el_type == "recordings": - subdict[el]["protocol"] = protocol - current_dict[el_type] = subdict - _find_yaml_struct(path / el, current_dict[el_type][el]) - return current_dict +def _create_yaml_dict( + level_folder, + project, + genealogy, + format_yaml, + parent_dict, +): + """Private function to create a yaml dict from a folder + Add a private function to hide the arguments that are used only for recursion + (parent_dict) -def parse_yaml(path_to_yaml, raw_data_folder=None, verbose=True): - """Read an acquisition yaml and create corresponding datasets + See `create_yaml_dict` for documentation Args: - path_to_yaml (str or dict): path to the file to parse or dict of yaml contect - raw_data_folder (str): root folder. Typically project folder or folder - containing the mice subfolders - verbose (bool): print info while looking for datasets - - Returns: - dict: A yaml dictionary with dataset classes - + level_folder (Path): folder to parse + project (str): name of the project + genealogy (tuple): genealogy of the current folder + format_yaml (bool): format results to be yaml compatible or keep Dataset + and pathlib.Path objects + parent_dict (dict): dict of the parent folder. Used for recursion """ - session_data = _clean_yaml(path_to_yaml) - if raw_data_folder is None: - raw_data_folder = Path(PARAMETERS["data_root"]["raw"]) - raw_data_folder /= session_data["project"] - - if session_data["path"] is not None: - home_folder = Path(raw_data_folder) / session_data["path"] - elif session_data["session"] is not None: - home_folder = ( - Path(raw_data_folder) / session_data["mouse"] / session_data["session"] + level_folder = Path(level_folder) + assert level_folder.is_dir(), "root_folder must be a directory" + level_dict = dict() + genealogy = list(genealogy) + + level_name = level_folder.stem + m = re.fullmatch(r"R\d\d\d\d\d\d_?(.*)?", level_name) + if m: + level_dict["datatype"] = "recording" + level_dict["protocol"] = ( + m[1] if m[1] is not None else "XXERRORXX PROTOCOL NOT SPECIFIED" ) - else: - home_folder = Path(raw_data_folder) / session_data["mouse"] - # first load datasets in the session level - if not home_folder.is_dir(): - raise FileNotFoundError("Session directory %s does not exist" % home_folder) - session_data["path"] = home_folder - session_data["datasets"] = create_dataset( - dataset_infos=session_data["datasets"], - verbose=verbose, - parent=session_data, - raw_data_folder=raw_data_folder, - error_handling="report", - ) + level_dict["recording_type"] = "XXERRORXX error RECORDING TYPE NOT SPECIFIED" - for rec_name, recording in session_data["recordings"].items(): - recording["path"] = str(PurePosixPath(home_folder / rec_name)) - recording["datasets"] = create_dataset( - dataset_infos=recording["datasets"], - parent=recording, - raw_data_folder=raw_data_folder, - verbose=verbose, - error_handling="report", - ) - - session_data["samples"] = _create_sample_datasets(session_data, raw_data_folder) - - # remove the full path that are not needed - clean_recursively(session_data) - return session_data + elif re.fullmatch(r"S\d*", level_name): + level_dict["datatype"] = "session" + else: + level_dict["datatype"] = "sample" + level_dict["genealogy"] = genealogy + [level_name] + level_dict["path"] = Path(project, *level_dict["genealogy"]) + if format_yaml: + level_dict["path"] = str(PurePosixPath(level_dict["path"])) + children = dict() + datasets = Dataset.from_folder(level_folder) + if datasets: + for ds_name, ds in datasets.items(): + ds.genealogy = genealogy + list(ds.genealogy) + if format_yaml: + # find path root + proot = str(level_folder)[: -len(level_dict["path"])] + ds.path = ds.path.relative_to(proot) + children[ds_name] = ds.format(mode="yaml") + children[ds_name]["path"] = str( + PurePosixPath(children[ds_name]["path"]) + ) + else: + children[ds_name] = ds + + for child in level_folder.glob("*"): + if child.is_dir(): + _create_yaml_dict( + child, + project=project, + genealogy=genealogy + [level_name], + format_yaml=format_yaml, + parent_dict=children, + ) + level_dict["children"] = children + parent_dict[level_folder.stem] = level_dict + return parent_dict def upload_yaml( @@ -170,518 +153,117 @@ def upload_yaml( list of names of entities created/updated """ - output = [] - # if there are errors, I cannot safely parse the yaml - errors = find_xxerrorxx(yml_file=source_yaml) - if errors: - raise SyncYmlError("The yaml file still contains error. Fix it") - session_data = parse_yaml(source_yaml, raw_data_folder, verbose) - # parsing can created errors, check again - errors = find_xxerrorxx(yml_file=source_yaml) - if errors: - raise SyncYmlError("Invalid yaml. Use `parse_yaml` and fix errors manually.") + with open(source_yaml, "r") as f: + yaml_data = yaml.safe_load(f) + + # first find the origin - # first find the mouse if flexilims_session is None: - flexilims_session = flz.get_flexilims_session( - project_id=session_data["project"] - ) - mouse = flz.get_entity( - datatype="mouse", - name=session_data["mouse"], + flexilims_session = flz.get_flexilims_session(project_id=yaml_data["project"]) + + origin_name = "_".join(yaml_data["root_genealogy"]) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + origin = flz.get_entity(name=origin_name, flexilims_session=flexilims_session) + assert origin is not None, f"`{origin_name}` not found on flexilims" + if verbose: + print(f"Found origin `{origin_name}` with id `{origin.id}`") + # then upload the data recursively + _upload_yaml_dict( + yaml_data["children"], + origin=origin, + raw_data_folder=raw_data_folder, + log_func=log_func, flexilims_session=flexilims_session, - format_reply=False, + conflicts=conflicts, + verbose=verbose, ) - if mouse is None: - raise SyncYmlError("Mouse not on flexilims. You must add it manually first") - - # deal with the session - if session_data["session"] is not None: - m = re.match(r"S(\d{4})(\d\d)(\d\d)", session_data["session"]) - if m: - date = "-".join(m.groups()) - else: - log_func("Cannot parse date for session %s." % session_data["session"]) - date = "N/A" - - session_data = _trim_paths(session_data, raw_data_folder) - - attributes = session_data.get("attributes", None) - if attributes is None: - attributes = {} - for field in ("path", "notes"): - value = session_data.get(field, None) - if value is not None: - attributes[field] = value - - # if session is not specified, then entries will be added directly as - # children of the mouse - if session_data["session"] is not None: - session = flz.add_experimental_session( - parent_name=mouse["name"], - session_name=session_data["session"], - flexilims_session=flexilims_session, - date=date, - attributes=attributes, - conflicts=conflicts, - ) - root_id = session["id"] - output.append(session["name"]) - else: - root_id = mouse["id"] - # session datasets - for ds_name, ds in session_data.get("datasets", {}).items(): - ds.genealogy = [mouse["name"], session_data["session"], ds_name] - ds.project = session_data["project"] - ds.origin_id = root_id - ds.flexilims_session = flexilims_session - ds.update_flexilims(mode="safe") - output.append(ds.full_name) - # now deal with recordings - for short_rec_name, rec_data in session_data.get("recordings", {}).items(): - rec_name = session["name"] + "_" + short_rec_name - attributes = rec_data.get("attributes", None) - if attributes is None: - attributes = {} - for field in ["notes", "path", "timestamp"]: - value = rec_data.get(field, "") - attributes[field] = value if value is not None else "" - attributes["genealogy"] = session["attributes"]["genealogy"] + [short_rec_name] - rec_type = rec_data.get("recording_type", "unspecified") - if not rec_type: - rec_type = "unspecified" - rec_rep = flz.add_recording( - session_id=root_id, - recording_type=rec_type, - protocol=rec_data.get("protocol", ""), - attributes=attributes, - recording_name=rec_name, - other_relations=None, - flexilims_session=flexilims_session, - conflicts=conflicts, - ) - output.append(rec_rep["name"]) - - # now deal with recordings' datasets - for ds_name, ds in rec_data.get("datasets", {}).items(): - ds.genealogy = [ - mouse["name"], - session_data["session"], - short_rec_name, - ds_name, - ] - ds.project = session_data["project"] - ds.origin_id = rec_rep["id"] - ds.flexilims_session = flexilims_session - ds.update_flexilims(mode="safe") - output.append(ds.full_name) - - # now deal with samples - def add_samples(samples, parent, output=None): - # we'll need a utility function to deal with recursion - for short_sample_name, sample_data in samples.items(): - - # we always use `skip` to add samples - sample_rep = flz.add_sample( - parent["id"], - attributes=attributes, - sample_name=short_sample_name, - conflicts="skip", - flexilims_session=flexilims_session, - ) - if output is not None: - output.append(sample_rep["name"]) - # deal with datasets attached to this sample - for ds_name, ds in sample_data.get("datasets", {}).items(): - ds.genealogy = sample_rep["attributes"]["genealogy"] + [ds_name] - ds.project = session_data["project"] - ds.origin_id = sample_rep["id"] - ds.flexilims_session = flexilims_session - ds.update_flexilims(mode="safe") - if output is not None: - output.append(ds.full_name) - # now add child samples - add_samples(sample_data["samples"], sample_rep, output) - - # samples are attached to mice, not sessions - add_samples(session_data["samples"], mouse, output=output) - return output - - -def write_session_data_as_yaml(session_data, target_file=None, overwrite=False): - """Write a session_data dictionary into a yaml - - Args: - session_data (dict): dictionary with Dataset instances, as returned by parse_yaml - target_file (str): path to the output file (if None, does not write to disk) - overwrite (bool): replace target file if it already exists (default False) - - Returns: - dict: the pure yaml dictionary - - """ - out_dict = copy.deepcopy(session_data) - clean_recursively(out_dict, keys=["name"], format_dataset=True) - if target_file is not None: - target_file = Path(target_file) - if target_file.exists() and not overwrite: - raise IOError("Target file %s already exists" % target_file) - with open(target_file, "w") as writer: - yaml.dump(out_dict, writer) - # temp check: - with open(target_file, "r") as reader: - writen = yaml.safe_load(reader) - return out_dict - - -def create_dataset( - dataset_infos, parent, raw_data_folder, verbose=True, error_handling="crash" +def _upload_yaml_dict( + yaml_dict, origin, raw_data_folder, log_func, flexilims_session, conflicts, verbose ): - """Create dictionary of datasets - - Args: - dataset_infos: extra information for reading dataset outside of raw_data_folder - or adding optional arguments - parent (dict): yaml dictionary of the parent level - raw_data_folder (str): folder where to look for data - verbose (bool): (True) Print info about dataset found - error_handling (str) `crash` or `report`. When something goes wrong, raise an - error if `crash` otherwise replace the dataset instance by the error - message in the output dictionary - - Returns: - dict: dictionary of dataset instances - - """ - - # autoload datasets - datasets = Dataset.from_folder(parent["path"], verbose=verbose) - error_handling = error_handling.lower() - if error_handling not in ("crash", "report"): - raise IOError("error_handling must be `crash` or `report`") - - # check dataset_infos for extra datasets - for ds_name, ds_data in dataset_infos.items(): - ds_path = Path(raw_data_folder) / ds_data["path"] - # first deal with dataset that are not in parent path - ds_class = Dataset.SUBCLASSES.get(ds_data["dataset_type"], Dataset) - if ds_path.is_dir() and (ds_path != parent["path"]): - ds = ds_class.from_folder(ds_path, verbose=verbose) - elif ds_path.is_file() and (ds_path.parent != parent["path"]): - ds = ds_class.from_folder(ds_path.parent, verbose=verbose) - elif not ds_path.exists(): - err_msg = "Dataset not found. Path %s does not exist" % ds_path - if error_handling == "crash": - raise FileNotFoundError(err_msg) - datasets[ds_name] = "XXERRORXX!! " + err_msg - continue - else: - # if it is in the parent['path'] folder, I already loaded it. - ds = {k: v for k, v in datasets.items() if isinstance(v, ds_class)} - if not ds: - err_msg = 'Dataset "%s" not found in %s' % (ds_name, ds_path) - if error_handling == "crash": - raise SyncYmlError(err_msg) - datasets[ds_name] = "XXERRORXX!! " + err_msg - - # match by name - if ds_name in ds: - ds = ds[ds_name] - else: # now we're in trouble. - err_msg = 'Could not find dataset "%s". Found "%s" instead' % ( - ds_name, - ", ".join(ds.keys()), + for entity, entity_data in yaml_dict.items(): + children = entity_data.pop("children", {}) + datatype = entity_data.pop("datatype") + if datatype == "session": + if verbose: + print(f"Adding session `{entity}`") + new_entity = flz.add_experimental_session( + date=entity[1:], + flexilims_session=flexilims_session, + parent_id=origin["id"], + attributes=entity_data, + session_name=entity, + conflicts=conflicts, ) - if error_handling == "crash": - raise SyncYmlError(err_msg) - datasets[ds_name] = "XXERRORXX!! " + err_msg - continue - if ds_data["attributes"] is not None: - ds.extra_attributes.update(ds_data["attributes"]) - if ds_data["notes"] is not None: - ds.extra_attributes["notes"] = ds_data["notes"] - datasets[ds_name] = ds - return datasets - - -def _trim_paths(session_data, raw_data_folder): - """Parses paths to make them relative to `raw_data_folder` - - Args: - session_data (dict): dictionary containing children of the session - raw_data_folder (str): part of the path to be omitted from on flexilims - - Returns: - dict: `session_data` after trimming the paths - - """ - - def trim_sample_paths(samples): - # utility function to recurse into samples - for sample_name, sample_data in samples.items(): - samples[sample_name]["path"] = str( - PurePosixPath( - Path(samples[sample_name]["path"]).relative_to(raw_data_folder) + elif datatype == "recording": + rec_type = entity_data.pop("recording_type", "Not specified") + prot = entity_data.pop("protocol", "Not specified") + if verbose: + print( + f"Adding recording `{entity}`, type `{rec_type}`, protocol `{prot}`" ) + new_entity = flz.add_recording( + session_id=origin["id"], + recording_type=rec_type, + protocol=prot, + attributes=entity_data, + recording_name=entity, + conflicts=conflicts, + flexilims_session=flexilims_session, ) - for ds_name, ds in sample_data.get("datasets", {}).items(): - ds.path = PurePosixPath(ds.path.relative_to(raw_data_folder)) - trim_sample_paths(sample_data["samples"]) - - if raw_data_folder is None: - raw_data_folder = Path(PARAMETERS["data_root"]["raw"]) - if "path" in session_data.keys(): - session_data["path"] = str( - PurePosixPath(Path(session_data["path"]).relative_to(raw_data_folder)) - ) - for ds_name, ds in session_data.get("datasets", {}).items(): - ds.path = ds.path.relative_to(raw_data_folder) - for rec_name, rec_data in session_data["recordings"].items(): - session_data["recordings"][rec_name]["path"] = str( - PurePosixPath( - Path(session_data["recordings"][rec_name]["path"]).relative_to( - raw_data_folder - ) + elif datatype == "sample": + if verbose: + print(f"Adding sample `{entity}`") + new_entity = flz.add_sample( + parent_id=origin["id"], + attributes=entity_data, + sample_name=entity, + conflicts=conflicts, + flexilims_session=flexilims_session, + ) + elif datatype == "dataset": + created = entity_data.pop("created") + dataset_type = entity_data.pop("dataset_type") + path = entity_data.pop("path") + genealogy = entity_data.pop("genealogy") + if verbose: + print(f"Adding dataset `{entity}`, type `{dataset_type}`") + new_entity = flz.add_dataset( + parent_id=origin["id"], + dataset_type=dataset_type, + created=created, + path=path, + genealogy=genealogy, + is_raw="yes", + project_id=None, + flexilims_session=None, + dataset_name=None, + attributes=None, + strict_validation=False, + conflicts="append", ) - ) - for ds_name, ds in rec_data.get("datasets", {}).items(): - ds.path = PurePosixPath(ds.path.relative_to(raw_data_folder)) - trim_sample_paths(session_data["samples"]) - return session_data - - -def _create_sample_datasets(parent, raw_data_folder): - """Recursively index samples creating a nested dictionary and generate - corresponding datasets - - Args: - parent (dict): Dictionary corresponding to the parent entity - - Return: - dict: dictionary of child samples - """ - if "samples" not in parent: - return dict() - for sample_name, sample in parent["samples"].items(): - sample["path"] = parent["path"] / sample_name - sample["datasets"] = create_dataset( - dataset_infos=sample["datasets"], - parent=sample, + _upload_yaml_dict( + yaml_dict=children, + origin=new_entity, raw_data_folder=raw_data_folder, - error_handling="report", - ) - - # recurse into child samples - sample["samples"] = _create_sample_datasets(sample, raw_data_folder) - # we update in place but we also return the dictionary of samples to make - # for more readable code - return parent["samples"] - - -def _clean_yaml(path_to_yaml): - """Read a yaml file and check that it is correctly formatted - - This does not do any processing, just make sure that I can read the whole yaml and - generate dictionary will all expected fields - - Args: - path_to_yaml (str): path to the YAML file, or dict of the yaml content - - Returns: - dict: nested dictionary containing entries in the YAML file - - """ - - if isinstance(path_to_yaml, dict): - yml_data = path_to_yaml - else: - with open(path_to_yaml, "r") as yml_file: - try: - yml_data = yaml.safe_load(yml_file) - except ParserError as e: - raise IOError("Invalid yaml. Parser returned an error: %s" % e) - - session, nested_levels = _read_level(yml_data) - - session["datasets"] = {} - for dataset_name, dataset_dict in nested_levels["datasets"].items(): - session["datasets"][dataset_name] = _read_dataset( - name=dataset_name, data=dataset_dict - ) - - session["recordings"] = {} - for rec_name, rec_dict in nested_levels["recordings"].items(): - session["recordings"][rec_name] = _read_recording(name=rec_name, data=rec_dict) - - session["samples"] = {} - for sample_name, sample_dict in nested_levels["samples"].items(): - session["samples"][sample_name] = _read_sample( - name=sample_name, data=sample_dict - ) - - return session - - -def _read_sample(name, data): - """Read YAML information corresponding to a sample - - Args: - name (str): the name of the sample - data (dict): data for this sample only - - Returns: - dict: the sample read from the yaml - - """ - if data is None: - data = {} - sample, nested_levels = _read_level( - data, - mandatory_args=(), - optional_args=("notes", "attributes", "path"), - nested_levels=("datasets", "samples"), - ) - sample["name"] = name - - sample["datasets"] = dict() - for ds_name, ds_data in nested_levels["datasets"].items(): - sample["datasets"][ds_name] = _read_dataset(name=ds_name, data=ds_data) - sample["samples"] = dict() - for sample_name, sample_data in nested_levels["samples"].items(): - sample["samples"][sample_name] = _read_sample( - name=sample_name, data=sample_data + log_func=log_func, + flexilims_session=flexilims_session, + conflicts=conflicts, + verbose=verbose, ) - return sample - - -def _read_recording(name, data): - """Read YAML information corresponding to a recording - - Args: - name (str): the name of the recording - data (dict): data for this dataset only - - Returns: - dict: the recording read from the yaml - - """ - recording, datasets = _read_level( - data, - mandatory_args=("protocol",), - optional_args=("notes", "attributes", "path", "recording_type", "timestamp"), - nested_levels=("datasets",), - ) - recording["name"] = name - - # if timestamps is None, the name must start with RHHMMSS - if recording["timestamp"] is None: - m = re.match(r"R(\d\d\d\d\d\d)", recording["name"]) - if not m: - raise SyncYmlError( - "Timestamp must be provided if recording name is not " - "properly formatted" - ) - recording["timestamp"] = m.groups()[0] - recording["datasets"] = dict() - for ds_name, ds_data in datasets["datasets"].items(): - recording["datasets"][ds_name] = _read_dataset(name=ds_name, data=ds_data) - - return recording - - -def _read_dataset(name, data): - """Read YAML information corresponding to a dataset - - Args: - name (str): the name of the dataset, will be composed with parent names to - generate an identifier - data (dict): data for this dataset only - Returns: - dict: a formatted dictionary including, 'dataset_type', 'path', 'notes', - 'attributes' and 'name' - """ - level, _ = _read_level( - data, - mandatory_args=("dataset_type", "path"), - optional_args=( - "notes", - "attributes", - "created", - "is_raw", - "origin_id", - "genealogy", - ), - nested_levels=(), +if __name__ == "__main__": + data = create_yaml_dict( + "/Volumes/lab-znamenskiyp/data/instruments/raw_data/projects/blota_onix_pilote/BRAC7448.2d/S20230412", + project="blota_onix_pilote", + genealogy="BRAC7448.2d", ) - level["name"] = name - return level - - -def _read_level( - yml_level, - mandatory_args=("project", "mouse", "session"), - optional_args=("path", "notes", "attributes", "genealogy"), - nested_levels=("recordings", "datasets", "samples"), -): - """Read one layer of the yml file (i.e. a dictionary) - - Args: - yml_level (dict): a dictionary containing the yml level to analyse (and all sublevels) - mandatory_args: arguments that must be in this level - optional_args: arguments that are expected but not mandatory, will be `None` if - absent - nested_levels: name of any nested level that should not be parsed - - Returns: - (tuple): a tuple containing two dictionaries: - level (dict): dictionary of top level attributes - nested_levels (dict): dictionary of nested dictionaries - """ - # make a copy to not change original version - yml_level = yml_level.copy() - is_absent = [m not in yml_level for m in mandatory_args] - if any(is_absent): - absents = ", ".join(["%s" % a for a, m in zip(mandatory_args, is_absent) if m]) - raise SyncYmlError("%s must be provided in the YAML file." % absents) - level = {m: yml_level.pop(m) for m in mandatory_args} - - for opt in optional_args: - level[opt] = yml_level.pop(opt, None) - - nested_levels = {n: yml_level.pop(n, {}) for n in nested_levels} - - # the rest is unexpected - if len(yml_level): - raise SyncYmlError( - "Got unexpected attribute(s): %s" % (", ".join(yml_level.keys())) - ) - return level, nested_levels - - -def find_xxerrorxx(yml_file=None, yml_data=None, pattern="XXERRORXX", _output=None): - """Utility to find where things went wrong - - Look through a `yml_file` or the corresponding `yml_Data` dictionary recursively. - Returns a dictionary with all entries containing the error `pattern` - - _output is used for recursive calling. - """ - if yml_file is not None: - if yml_data is not None: - raise IOError("Set either yml_file OR yml_data") - with open(yml_file, "r") as reader: - yml_data = yaml.safe_load(reader) - - if _output is None: - _output = dict() - for k, v in yml_data.items(): - if isinstance(v, dict): - _output = find_xxerrorxx(yml_data=v, pattern=pattern, _output=_output) - elif isinstance(v, str) and (pattern in v): - _output[k] = v - return _output + with open("test.yml", "w") as writer: + yaml.safe_dump(data, writer) + print("done") + flm_sess = flz.get_flexilims_session(project_id="blota_onix_pilote") + upload_yaml("test.yml", conflicts="overwrite", flexilims_session=flm_sess) From 6d9e94349d11c93aab4b565beac62e81a3cde8f9 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Sun, 28 May 2023 09:14:04 +0100 Subject: [PATCH 02/73] Add GUI module with tkinter --- CHANGELOG.md | 1 + flexiznam/gui/azure.tcl | 87 ++++++++++++ flexiznam/gui/flexigui.py | 277 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 365 insertions(+) create mode 100644 flexiznam/gui/azure.tcl create mode 100644 flexiznam/gui/flexigui.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 74a5ce8..3c7bd61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - `flz.get_datasets` can return `Dataset` objects instead of path strings if `return_paths=False` - New `OnixData` class to handle Onix data +- Add a GUI module. ### Minor - `CameraDataset` metadata can also be `.yml`, not only `.txt`. diff --git a/flexiznam/gui/azure.tcl b/flexiznam/gui/azure.tcl new file mode 100644 index 0000000..3e75502 --- /dev/null +++ b/flexiznam/gui/azure.tcl @@ -0,0 +1,87 @@ +# Copyright © 2021 rdbende + +source [file join [file dirname [info script]] theme light.tcl] +source [file join [file dirname [info script]] theme dark.tcl] + +option add *tearOff 0 + +proc set_theme {mode} { + if {$mode == "dark"} { + ttk::style theme use "azure-dark" + + array set colors { + -fg "#ffffff" + -bg "#333333" + -disabledfg "#ffffff" + -disabledbg "#737373" + -selectfg "#ffffff" + -selectbg "#007fff" + } + + ttk::style configure . \ + -background $colors(-bg) \ + -foreground $colors(-fg) \ + -troughcolor $colors(-bg) \ + -focuscolor $colors(-selectbg) \ + -selectbackground $colors(-selectbg) \ + -selectforeground $colors(-selectfg) \ + -insertcolor $colors(-fg) \ + -insertwidth 1 \ + -fieldbackground $colors(-selectbg) \ + -font {"Segoe Ui" 10} \ + -borderwidth 1 \ + -relief flat + + tk_setPalette background [ttk::style lookup . -background] \ + foreground [ttk::style lookup . -foreground] \ + highlightColor [ttk::style lookup . -focuscolor] \ + selectBackground [ttk::style lookup . -selectbackground] \ + selectForeground [ttk::style lookup . -selectforeground] \ + activeBackground [ttk::style lookup . -selectbackground] \ + activeForeground [ttk::style lookup . -selectforeground] + + ttk::style map . -foreground [list disabled $colors(-disabledfg)] + + option add *font [ttk::style lookup . -font] + option add *Menu.selectcolor $colors(-fg) + + } elseif {$mode == "light"} { + ttk::style theme use "azure-light" + + array set colors { + -fg "#000000" + -bg "#ffffff" + -disabledfg "#737373" + -disabledbg "#ffffff" + -selectfg "#ffffff" + -selectbg "#007fff" + } + + ttk::style configure . \ + -background $colors(-bg) \ + -foreground $colors(-fg) \ + -troughcolor $colors(-bg) \ + -focuscolor $colors(-selectbg) \ + -selectbackground $colors(-selectbg) \ + -selectforeground $colors(-selectfg) \ + -insertcolor $colors(-fg) \ + -insertwidth 1 \ + -fieldbackground $colors(-selectbg) \ + -font {"Segoe Ui" 10} \ + -borderwidth 1 \ + -relief flat + + tk_setPalette background [ttk::style lookup . -background] \ + foreground [ttk::style lookup . -foreground] \ + highlightColor [ttk::style lookup . -focuscolor] \ + selectBackground [ttk::style lookup . -selectbackground] \ + selectForeground [ttk::style lookup . -selectforeground] \ + activeBackground [ttk::style lookup . -selectbackground] \ + activeForeground [ttk::style lookup . -selectforeground] + + ttk::style map . -foreground [list disabled $colors(-disabledfg)] + + option add *font [ttk::style lookup . -font] + option add *Menu.selectcolor $colors(-fg) + } +} diff --git a/flexiznam/gui/flexigui.py b/flexiznam/gui/flexigui.py new file mode 100644 index 0000000..eea1fd2 --- /dev/null +++ b/flexiznam/gui/flexigui.py @@ -0,0 +1,277 @@ +import os +import tkinter as tk +from tkinter import ttk +from ttkwidgets import CheckboxTreeview +import yaml +from pathlib import Path +import flexiznam as flz +import flexiznam.camp.sync_data + + +class FlexiGui(tk.Tk): + + FLEXILIMS_ONLY_FIELDS = ("children", "project", "origin_id") + + def __init__(self): + super().__init__() + + self.title("FlexiZnam GUI") + self.geometry("800x600") + + self.rowconfigure(1, weight=10) + self.columnconfigure(0, weight=1) + self.columnconfigure(1, weight=3) + + self.frames = dict() + self._create_frames() + self._setup_widgets() + self._entity_by_itemid = {} + + def _setup_widgets(self): + self._create_frames() + self._create_buttons() + self._create_treeview() + self._create_textview() + + def _create_frames(self): + self.frames["t"] = tk.Frame(self) + self.frames["t"].grid( + row=0, column=0, padx=10, pady=5, columnspan=2, sticky="nwe" + ) + self.frames["t"].rowconfigure(0, weight=1) + self.frames["t"].rowconfigure(1, weight=1) + for i in range(10): + self.frames["t"].columnconfigure(i, weight=1) + self.frames["t"].columnconfigure(3, weight=10) + self.frames["bl"] = tk.Frame(self) + self.frames["bl"].grid(row=1, column=0, padx=10, pady=5, sticky="nsew") + self.frames["bl"].rowconfigure(0, weight=1) + self.frames["bl"].columnconfigure(0, weight=1) + self.frames["br"] = tk.Frame(self) + self.frames["br"].grid(row=1, column=1, padx=10, pady=5, sticky="nsew") + self.frames["br"].rowconfigure(0, weight=1) + self.frames["br"].rowconfigure(1, weight=30) + self.frames["br"].rowconfigure(2, weight=1) + self.frames["br"].columnconfigure(0, weight=1) + + def _create_treeview(self): + # Create the Treeview + self.treeview = CheckboxTreeview( + self.frames["bl"], + columns=("datatype",), + selectmode="browse", + ) + + self.treeview.grid(row=0, column=0, sticky="nsew") + self.treeview.heading("datatype", text="Datatype") + self.treeview.column("datatype", width=200) + # Bind the Treeview selection event + self.treeview.bind("<>", self.on_treeview_select) + self.treeview.tag_configure("error", background="red") + + def _create_textview(self): + + # Create the Text widget + tk.Label(self.frames["br"], text="Selected item:").grid( + row=0, + column=0, + sticky="nw", + ) + self.selected_item = tk.StringVar() + self.selected_item.set("None") + l = tk.Label(self.frames["br"], textvariable=self.selected_item) + l.grid(row=0, column=1, sticky="new") + self.textview = tk.Text(self.frames["br"], width=40, height=10, wrap="none") + self.textview.grid(row=1, column=0, sticky="nsew", columnspan=2) + self.textview.bind("<>", self.on_textview_change) + self.update_item_btn = tk.Button( + self.frames["br"], text="Update item", command=self.update_item + ) + self.update_item_btn.grid(row=2, column=1, sticky="nsw") + + def parse_folder(self): + genealogy = self.genealogy.get() + if genealogy.startswith("ENTER COMMA"): + tk.messagebox.showerror("Error", "Error: enter genealogy first!") + return + project = self.project.get() + if project == "SELECT PROJECT": + tk.messagebox.showerror("Error", "Error: select project first!") + return + genealogy = [g.strip() for g in genealogy.split(",")] + self.root_folder.set( + tk.filedialog.askdirectory( + initialdir=self.root_folder.get(), title="Select directory to parse" + ) + ) + data = flz.camp.sync_data.create_yaml_dict( + root_folder=self.root_folder.get(), + project=project, + genealogy=genealogy, + format_yaml=True, + ) + self.data = data + self.update_data() + + def _create_buttons(self): + topf = self.frames["t"] + self.parse_btn = tk.Button(topf, text="Parse folder", command=self.parse_folder) + self.parse_btn.grid(row=0, column=0, sticky="w") + self.load_btn = tk.Button(topf, text="Load yaml", command=self.load_yaml) + self.load_btn.grid(row=0, column=1, sticky="w") + self.write_btn = tk.Button(topf, text="Write yaml", command=self.write_yaml) + self.write_btn.grid(row=0, column=2) + + # add project dropdown and label + tk.Label(topf, text="Project:").grid(row=0, column=3, sticky="w") + self.project = tk.StringVar(self) + self.project.set("SELECT PROJECT") + self.proj_ddwn = tk.OptionMenu( + topf, + self.project, + "SELECT PROJECT", + *flz.PARAMETERS["project_ids"].keys(), + ).grid(row=0, column=4, columnspan=3, sticky="w") + self.upload_btn = tk.Button(topf, text="Upload to flexilims") + self.upload_btn.grid(row=0, column=7) + + self.quit_btn = tk.Button(topf, text="Quit", command=self.quit) + self.quit_btn.grid(row=0, column=10, sticky="e") + + # add genealogy and root dir + tk.Label(topf, text="Genealogy:").grid(row=1, column=0, sticky="w") + self.genealogy = tk.StringVar(self) + self.genealogy.set("ENTER COMMA SEPARATED GENEALOGY") + self.genealogy_entry = tk.Entry(topf, textvariable=self.genealogy) + self.genealogy_entry.grid(row=1, column=1, columnspan=3, sticky="nsew") + tk.Label(topf, text="Root directory:").grid(row=1, column=4, sticky="w") + self.root_folder = tk.StringVar(self) + self.root_folder.set(os.getcwd()) + self.root_folder_entry = tk.Entry(topf, textvariable=self.root_folder) + self.root_folder_entry.grid(row=1, column=5, columnspan=5, sticky="nsew") + self.chg_dir_btn = tk.Button(topf, text="...", command=self.chg_root_folder) + self.chg_dir_btn.grid(row=1, column=10) + + def chg_root_folder(self): + self.root_folder.set( + tk.filedialog.askdirectory( + initialdir=self.root_folder.get(), title="Select root directory" + ) + ) + + def on_treeview_select(self, event): + item = self.treeview.focus() + name, data = self._entity_by_itemid[item] + self.selected_item.set(name) + display = {k: v for k, v in data.items() if k not in self.FLEXILIMS_ONLY_FIELDS} + self.textview.delete(1.0, tk.END) + self.textview.insert(tk.END, yaml.dump(display)) + + def on_textview_change(self, event): + print('Textview changed: "{}"'.format(event)) + + def load_yaml(self): + """Load a YAML file and display it in the treeview""" + print("Select YAML file to load") + filetypes = (("Yaml files", "*.yml *.yaml"), ("All files", "*.*")) + + self.filename = tk.filedialog.askopenfilename( + title="Select YAML file to load", filetypes=filetypes + ) + with open(self.filename, "r") as f: + self.data = yaml.safe_load(f) + print('Loaded YAML file "{}"'.format(self.filename)) + self.update_data() + + def update_data(self, name_to_select=None): + """Update GUI data from self.data + + Args: + name_to_select (str, optional): Name of item to select in treeview. + Defaults to None.""" + self.textview.delete("1.0", tk.END) + self.selected_item.set("None") + self.treeview.delete(*self.treeview.get_children()) + self._entity_by_itemid = {} + self._insert_yaml_data(self.data["children"], name_to_select=name_to_select) + + def _insert_yaml_data(self, data, parent="", name_to_select=None): + assert isinstance(data, dict), "data must be a dict" + for child, child_data in data.items(): + assert "type" in child_data, f"datatype missing for {child}" + dtype = child_data["type"] + item = self.treeview.insert( + parent, + "end", + text=child, + values=[dtype], + open=True, + ) + if any( + [ + v.startswith("XXERRORXX") + for v in child_data.values() + if isinstance(v, str) + ] + ): + self.treeview.item(item, tags=("error",)) + + self._entity_by_itemid[item] = (child, child_data) + if name_to_select and child == name_to_select: + self.treeview.focus(item) + self.treeview.selection_set(item) + + if "children" in child_data: + self._insert_yaml_data( + child_data["children"], parent=item, name_to_select=name_to_select + ) + + def write_yaml(self): + """Write the current data to a YAML file""" + target = tk.filedialog.asksaveasfilename( + initialdir=self.root_folder.get(), + title="Select YAML file to write", + filetypes=(("Yaml files", "*.yml *.yaml"), ("All files", "*.*")), + ) + data = dict(self.data) + data["project"] = self.project.get() + data["root_folder"] = self.root_folder.get() + with open(target, "w") as f: + yaml.dump(data, f) + print('Wrote YAML file "{}"'.format(target)) + + def update_item(self): + """Update the selected item with the textview contents""" + text = self.textview.get(1.0, tk.END) + if not text.strip(): + return + item = self.treeview.focus() + name, original_data = self._entity_by_itemid[item] + assert name == self.selected_item.get(), "Selected item does not match" + data = yaml.safe_load(text) + for field in self.FLEXILIMS_ONLY_FIELDS: + if field in original_data: + data[field] = original_data[field] + self._entity_by_itemid[item] = (name, data) + parents = [] + parent_id = item + while True: + parent = self.treeview.parent(parent_id) + if not parent: + break + parents.append(self._entity_by_itemid[parent][0]) + parent_id = parent + ref = self.data + for parent in reversed(parents): + ref = ref["children"][parent] + ref["children"][name] = data + self.update_data(name_to_select=name) + + +if __name__ == "__main__": + app = FlexiGui() + with open("test.yml", "r") as f: + data = yaml.safe_load(f) + app.data = data + app.update_data() + app.mainloop() From 6cbf2dca6e7f2a57abd99d3379a3292698ff4d38 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Sun, 28 May 2023 09:16:14 +0100 Subject: [PATCH 03/73] Use "type" instead of "datatype" As flexililims does --- flexiznam/camp/sync_data.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index 0cd5faf..abcf05a 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -83,16 +83,16 @@ def _create_yaml_dict( level_name = level_folder.stem m = re.fullmatch(r"R\d\d\d\d\d\d_?(.*)?", level_name) if m: - level_dict["datatype"] = "recording" + level_dict["type"] = "recording" level_dict["protocol"] = ( m[1] if m[1] is not None else "XXERRORXX PROTOCOL NOT SPECIFIED" ) level_dict["recording_type"] = "XXERRORXX error RECORDING TYPE NOT SPECIFIED" elif re.fullmatch(r"S\d*", level_name): - level_dict["datatype"] = "session" + level_dict["type"] = "session" else: - level_dict["datatype"] = "sample" + level_dict["type"] = "sample" level_dict["genealogy"] = genealogy + [level_name] level_dict["path"] = Path(project, *level_dict["genealogy"]) if format_yaml: @@ -185,7 +185,7 @@ def _upload_yaml_dict( ): for entity, entity_data in yaml_dict.items(): children = entity_data.pop("children", {}) - datatype = entity_data.pop("datatype") + datatype = entity_data.pop("type") if datatype == "session": if verbose: print(f"Adding session `{entity}`") From f9ecaaf551b3991487ae3fcc6f19e22ad694c63d Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Sun, 28 May 2023 09:16:58 +0100 Subject: [PATCH 04/73] create_yaml uses origin_name instead of genealogy --- flexiznam/camp/sync_data.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index abcf05a..699f39a 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -18,7 +18,7 @@ def create_yaml_dict( root_folder, project, - genealogy, + origin_name, format_yaml=True, ): """Create a yaml dict from a folder @@ -28,9 +28,8 @@ def create_yaml_dict( Args: root_folder (str): Path to the folder to parse project (str): Name of the project, used as root of the path in the output - genealogy (list): List of strings with the genealogy of root_folder. If - root_folder is a recording for instance, genealogy should be (mouse, - session). + origin_name (str): Name of the origin on flexilims. Must be online and have + genealogy set. format_yaml (bool, optional): Format the output to be yaml compatible if True, otherwise keep dataset as Dataset object and path as pathlib.Path. Defaults to True. @@ -39,8 +38,12 @@ def create_yaml_dict( dict: Dictionary with the structure of the folder and automatically detected datasets """ - if isinstance(genealogy, str): - genealogy = [genealogy] + flm_sess = flz.Session(project=project) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + origin = flm_sess.get_origin(origin_name) + genealogy = origin.genealogy + data = _create_yaml_dict( level_folder=root_folder, project=project, From 13dd5867738d27fc30393a61b6fe2a463ab96d83 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Sun, 28 May 2023 09:17:52 +0100 Subject: [PATCH 05/73] Remove from yaml flexilims only fields These would be erased by info from other source when uploading --- flexiznam/camp/sync_data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index 699f39a..1c2448b 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -110,6 +110,9 @@ def _create_yaml_dict( proot = str(level_folder)[: -len(level_dict["path"])] ds.path = ds.path.relative_to(proot) children[ds_name] = ds.format(mode="yaml") + # remove fields that are not needed + for field in ["origin_id", "project_id", "name"]: + children[ds_name].pop(field, None) children[ds_name]["path"] = str( PurePosixPath(children[ds_name]["path"]) ) From 0ef9f6c991d41aada4541accd320ad2de05bd43a Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Sun, 28 May 2023 09:18:48 +0100 Subject: [PATCH 06/73] Add skeleton of check_yaml_validity --- flexiznam/camp/sync_data.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index 1c2448b..908b6a4 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -28,7 +28,7 @@ def create_yaml_dict( Args: root_folder (str): Path to the folder to parse project (str): Name of the project, used as root of the path in the output - origin_name (str): Name of the origin on flexilims. Must be online and have + origin_name (str): Name of the origin on flexilims. Must be online and have genealogy set. format_yaml (bool, optional): Format the output to be yaml compatible if True, otherwise keep dataset as Dataset object and path as pathlib.Path. Defaults @@ -43,7 +43,7 @@ def create_yaml_dict( warnings.simplefilter("ignore") origin = flm_sess.get_origin(origin_name) genealogy = origin.genealogy - + data = _create_yaml_dict( level_folder=root_folder, project=project, @@ -262,6 +262,18 @@ def _upload_yaml_dict( ) +def check_yaml_validity(yaml, root_folder, origin_name): + if isinstance(yaml, str): + with open(yaml, "r") as f: + yaml = yaml.safe_load(f) + assert yaml["root_folder"] == root_folder, f"root_folder should be {root_folder}" + _check_recursively(yaml["children"], root_folder, origin_name) + + +def _check_recursively(yaml, root_folder): + raise NotImplementedError + + if __name__ == "__main__": data = create_yaml_dict( "/Volumes/lab-znamenskiyp/data/instruments/raw_data/projects/blota_onix_pilote/BRAC7448.2d/S20230412", From 078c5c0ef38fbc1d4be573ee92acac1edcd508bb Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Sun, 28 May 2023 17:55:05 +0100 Subject: [PATCH 07/73] GUI can upload to flexilims --- flexiznam/gui/flexigui.py | 112 ++++++++++++++++++++++++++------------ 1 file changed, 78 insertions(+), 34 deletions(-) diff --git a/flexiznam/gui/flexigui.py b/flexiznam/gui/flexigui.py index eea1fd2..f4ddf5f 100644 --- a/flexiznam/gui/flexigui.py +++ b/flexiznam/gui/flexigui.py @@ -11,6 +11,7 @@ class FlexiGui(tk.Tk): FLEXILIMS_ONLY_FIELDS = ("children", "project", "origin_id") + RESOURCES = Path(__file__).parent def __init__(self): super().__init__() @@ -27,6 +28,8 @@ def __init__(self): self._setup_widgets() self._entity_by_itemid = {} + self.data = {} + def _setup_widgets(self): self._create_frames() self._create_buttons() @@ -89,25 +92,26 @@ def _create_textview(self): ) self.update_item_btn.grid(row=2, column=1, sticky="nsw") + def _check_options_are_set(self, options=("project", "origin_name")): + init_values = dict(project="SELECT", origin_name="ENTER") + for option in options: + value = getattr(self, option).get() + if value.startswith(init_values[option]): + tk.messagebox.showerror("Error", f"Error: enter {option} first!") + return False + return True + def parse_folder(self): - genealogy = self.genealogy.get() - if genealogy.startswith("ENTER COMMA"): - tk.messagebox.showerror("Error", "Error: enter genealogy first!") - return - project = self.project.get() - if project == "SELECT PROJECT": - tk.messagebox.showerror("Error", "Error: select project first!") + if not self._check_options_are_set(): return - genealogy = [g.strip() for g in genealogy.split(",")] - self.root_folder.set( - tk.filedialog.askdirectory( - initialdir=self.root_folder.get(), title="Select directory to parse" - ) + folder = tk.filedialog.askdirectory( + initialdir=self.root_folder.get(), title="Select directory to parse" ) + self.root_folder.set(folder) data = flz.camp.sync_data.create_yaml_dict( - root_folder=self.root_folder.get(), - project=project, - genealogy=genealogy, + root_folder=folder, + project=self.project.get(), + origin_name=self.origin_name.get(), format_yaml=True, ) self.data = data @@ -115,11 +119,11 @@ def parse_folder(self): def _create_buttons(self): topf = self.frames["t"] - self.parse_btn = tk.Button(topf, text="Parse folder", command=self.parse_folder) + self.parse_btn = tk.Button(topf, text="Parse", command=self.parse_folder) self.parse_btn.grid(row=0, column=0, sticky="w") - self.load_btn = tk.Button(topf, text="Load yaml", command=self.load_yaml) + self.load_btn = tk.Button(topf, text="Load", command=self.load_yaml) self.load_btn.grid(row=0, column=1, sticky="w") - self.write_btn = tk.Button(topf, text="Write yaml", command=self.write_yaml) + self.write_btn = tk.Button(topf, text="Write", command=self.write_yaml) self.write_btn.grid(row=0, column=2) # add project dropdown and label @@ -132,23 +136,33 @@ def _create_buttons(self): "SELECT PROJECT", *flz.PARAMETERS["project_ids"].keys(), ).grid(row=0, column=4, columnspan=3, sticky="w") - self.upload_btn = tk.Button(topf, text="Upload to flexilims") + fllogo = tk.PhotoImage(file=str(self.RESOURCES / "flexilims_logo.png")) + fllogo = fllogo.subsample(10, 10) + self.upload_btn = tk.Button(topf, text="Upload", command=self.upload) self.upload_btn.grid(row=0, column=7) + # add conflicts dropdown and label + tk.Label(topf, text="Conflicts:").grid(row=0, column=8, sticky="w") + self.conflicts = tk.StringVar(self) + self.conflicts.set("abort") + self.conflicts_ddwn = tk.OptionMenu( + topf, self.conflicts, "abort", "overwrite", "skip" + ) + self.conflicts_ddwn.grid(row=0, column=9, sticky="w") self.quit_btn = tk.Button(topf, text="Quit", command=self.quit) self.quit_btn.grid(row=0, column=10, sticky="e") - # add genealogy and root dir - tk.Label(topf, text="Genealogy:").grid(row=1, column=0, sticky="w") - self.genealogy = tk.StringVar(self) - self.genealogy.set("ENTER COMMA SEPARATED GENEALOGY") - self.genealogy_entry = tk.Entry(topf, textvariable=self.genealogy) - self.genealogy_entry.grid(row=1, column=1, columnspan=3, sticky="nsew") - tk.Label(topf, text="Root directory:").grid(row=1, column=4, sticky="w") + # add origin name and root dir + tk.Label(topf, text="Origin name:").grid(row=1, column=0, sticky="w") + self.origin_name = tk.StringVar(self) + self.origin_name.set("ENTER FLEXILIMS ORIGIN NAME") + self.origin_name_entry = tk.Entry(topf, textvariable=self.origin_name) + self.origin_name_entry.grid(row=1, column=1, columnspan=2, sticky="nsew") + tk.Label(topf, text="Root directory:").grid(row=1, column=3, sticky="w") self.root_folder = tk.StringVar(self) self.root_folder.set(os.getcwd()) self.root_folder_entry = tk.Entry(topf, textvariable=self.root_folder) - self.root_folder_entry.grid(row=1, column=5, columnspan=5, sticky="nsew") + self.root_folder_entry.grid(row=1, column=4, columnspan=6, sticky="nsew") self.chg_dir_btn = tk.Button(topf, text="...", command=self.chg_root_folder) self.chg_dir_btn.grid(row=1, column=10) @@ -175,12 +189,14 @@ def load_yaml(self): print("Select YAML file to load") filetypes = (("Yaml files", "*.yml *.yaml"), ("All files", "*.*")) - self.filename = tk.filedialog.askopenfilename( + filename = tk.filedialog.askopenfilename( title="Select YAML file to load", filetypes=filetypes ) - with open(self.filename, "r") as f: + if not filename: + return + with open(filename, "r") as f: self.data = yaml.safe_load(f) - print('Loaded YAML file "{}"'.format(self.filename)) + print('Loaded YAML file "{}"'.format(filename)) self.update_data() def update_data(self, name_to_select=None): @@ -193,6 +209,12 @@ def update_data(self, name_to_select=None): self.selected_item.set("None") self.treeview.delete(*self.treeview.get_children()) self._entity_by_itemid = {} + if "project" in self.data: + self.project.set(self.data["project"]) + if "origin_name" in self.data: + self.origin_name.set(self.data["origin_name"]) + if "root_folder" in self.data: + self.root_folder.set(self.data["root_folder"]) self._insert_yaml_data(self.data["children"], name_to_select=name_to_select) def _insert_yaml_data(self, data, parent="", name_to_select=None): @@ -207,6 +229,7 @@ def _insert_yaml_data(self, data, parent="", name_to_select=None): values=[dtype], open=True, ) + self.treeview.change_state(item, "checked") if any( [ v.startswith("XXERRORXX") @@ -240,6 +263,31 @@ def write_yaml(self): yaml.dump(data, f) print('Wrote YAML file "{}"'.format(target)) + def upload(self): + """Upload data to flexilims""" + print("Uploading data to flexilims") + if not self._check_options_are_set(): + return + + data = dict(self.data) + if not data: + tk.messagebox.showerror("Error", "No data loaded") + return + data["project"] = self.project.get() + data["root_folder"] = self.root_folder.get() + if data["project"].startswith("XXERRORXX"): + print("Project name not set") + return + flz.camp.sync_data.upload_yaml( + source_yaml=data, + raw_data_folder=data["root_folder"], + verbose=True, + log_func=print, + flexilims_session=None, + conflicts=self.conflicts.get(), + ) + print("Done") + def update_item(self): """Update the selected item with the textview contents""" text = self.textview.get(1.0, tk.END) @@ -270,8 +318,4 @@ def update_item(self): if __name__ == "__main__": app = FlexiGui() - with open("test.yml", "r") as f: - data = yaml.safe_load(f) - app.data = data - app.update_data() app.mainloop() From b0312b0fe874b084cd3dd6f8b470f0040d977eaf Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Sun, 28 May 2023 17:56:39 +0100 Subject: [PATCH 08/73] bugfix & switch to origin_name specifiying genealogy is a bit cumbersome and the entity must exist online to upload anyway. Just ask for the name and get genealogy fromn there --- flexiznam/camp/sync_data.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index 908b6a4..bf2710a 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -38,11 +38,13 @@ def create_yaml_dict( dict: Dictionary with the structure of the folder and automatically detected datasets """ - flm_sess = flz.Session(project=project) + flm_sess = flz.get_flexilims_session(project_id=project) with warnings.catch_warnings(): warnings.simplefilter("ignore") - origin = flm_sess.get_origin(origin_name) - genealogy = origin.genealogy + origin = flz.get_entity(name=origin_name, flexilims_session=flm_sess) + assert origin is not None, f"Origin {origin_name} not found in project {project}" + assert "genealogy" in origin, f"Origin {origin_name} has no genealogy" + genealogy = origin["genealogy"] data = _create_yaml_dict( level_folder=root_folder, @@ -51,7 +53,7 @@ def create_yaml_dict( format_yaml=format_yaml, parent_dict=dict(), ) - out = dict(root_folder=root_folder, root_genealogy=genealogy, children=data) + out = dict(root_folder=root_folder, origin_name=origin_name, children=data) return out @@ -144,7 +146,7 @@ def upload_yaml( """Upload data from one yaml to flexilims Args: - source_yaml (str): path to clean yaml + source_yaml (dict or str): path to clean yaml or yaml dict raw_data_folder (str): path to the folder containing the data. Default to data_root['raw'] verbose (bool): print progress information @@ -159,15 +161,20 @@ def upload_yaml( list of names of entities created/updated """ - with open(source_yaml, "r") as f: - yaml_data = yaml.safe_load(f) + if isinstance(source_yaml, str): + source_yaml = Path(source_yaml) + with open(source_yaml, "r") as f: + yaml_data = yaml.safe_load(f) + else: + assert isinstance(source_yaml, dict), "source_yaml must be a dict or a path" + yaml_data = source_yaml # first find the origin if flexilims_session is None: flexilims_session = flz.get_flexilims_session(project_id=yaml_data["project"]) - origin_name = "_".join(yaml_data["root_genealogy"]) + origin_name = yaml_data["origin_name"] with warnings.catch_warnings(): warnings.simplefilter("ignore") origin = flz.get_entity(name=origin_name, flexilims_session=flexilims_session) @@ -243,12 +250,11 @@ def _upload_yaml_dict( path=path, genealogy=genealogy, is_raw="yes", - project_id=None, - flexilims_session=None, - dataset_name=None, - attributes=None, + flexilims_session=flexilims_session, + dataset_name=entity, + attributes=entity_data['extra_attributes'], strict_validation=False, - conflicts="append", + conflicts=conflicts, ) _upload_yaml_dict( From 94c9be9bc41b2d9d4d8f35ce73a73adc19543351 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Sun, 28 May 2023 21:14:22 +0100 Subject: [PATCH 09/73] [minor] black and remove __main__ part That was for debuging --- flexiznam/camp/sync_data.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index bf2710a..c48ef6a 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -252,7 +252,7 @@ def _upload_yaml_dict( is_raw="yes", flexilims_session=flexilims_session, dataset_name=entity, - attributes=entity_data['extra_attributes'], + attributes=entity_data["extra_attributes"], strict_validation=False, conflicts=conflicts, ) @@ -278,16 +278,3 @@ def check_yaml_validity(yaml, root_folder, origin_name): def _check_recursively(yaml, root_folder): raise NotImplementedError - - -if __name__ == "__main__": - data = create_yaml_dict( - "/Volumes/lab-znamenskiyp/data/instruments/raw_data/projects/blota_onix_pilote/BRAC7448.2d/S20230412", - project="blota_onix_pilote", - genealogy="BRAC7448.2d", - ) - with open("test.yml", "w") as writer: - yaml.safe_dump(data, writer) - print("done") - flm_sess = flz.get_flexilims_session(project_id="blota_onix_pilote") - upload_yaml("test.yml", conflicts="overwrite", flexilims_session=flm_sess) From 055df0a7087d41638729bc0d9128afadf531bbd5 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Sun, 28 May 2023 23:18:34 +0100 Subject: [PATCH 10/73] change minor version number Because of large change --- CHANGELOG.md | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e6ca1a..d1eba92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Change log -## v0.3.5 +## v0.4.0 ### Main changes - `flz.get_datasets` can return `Dataset` objects instead of path strings if diff --git a/setup.py b/setup.py index 73edfee..e0cb181 100755 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="flexiznam", - version="v0.3.5", + version="v0.4.0", url="https://github.com/znamlab/flexznam", license="MIT", author="Antonin Blot", From 4634006a4fc5669c2c74c1b76707c23e6b128a8c Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 1 Jun 2023 12:07:56 +0100 Subject: [PATCH 11/73] [bugfix] add genealogy from parent add_dataset does not require the genealogy --- flexiznam/camp/sync_data.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index c48ef6a..86d2b93 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -197,6 +197,7 @@ def _upload_yaml_dict( yaml_dict, origin, raw_data_folder, log_func, flexilims_session, conflicts, verbose ): for entity, entity_data in yaml_dict.items(): + entity_data = entity_data.copy() children = entity_data.pop("children", {}) datatype = entity_data.pop("type") if datatype == "session": @@ -240,7 +241,8 @@ def _upload_yaml_dict( created = entity_data.pop("created") dataset_type = entity_data.pop("dataset_type") path = entity_data.pop("path") - genealogy = entity_data.pop("genealogy") + is_raw = entity_data.pop("is_raw") + if verbose: print(f"Adding dataset `{entity}`, type `{dataset_type}`") new_entity = flz.add_dataset( @@ -248,8 +250,7 @@ def _upload_yaml_dict( dataset_type=dataset_type, created=created, path=path, - genealogy=genealogy, - is_raw="yes", + is_raw=is_raw, flexilims_session=flexilims_session, dataset_name=entity, attributes=entity_data["extra_attributes"], From b50f755fedf4ea9346dd01e21ff1381b8eff9b11 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 1 Jun 2023 12:10:11 +0100 Subject: [PATCH 12/73] [bugfix] add with full names add_recording and add_sample --- CHANGELOG.md | 5 ++++- flexiznam/main.py | 53 ++++++++++++++++++++++++++--------------------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1eba92..a419183 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,8 @@ - `Dataset.format(mode='yaml')` ensure yaml compatibility. (path to str, tuple to list, etc...) - `add_experimental_session` can be done with `parent_id` (or `parent_name`). -- `add_dataset` can add a dataset to a mouse. +- `add_dataset` can add a dataset to a mouse and does not require genealogy. + ### Bugfixes - Fix [#68](https://github.com/znamlab/flexiznam/issues/68). Dataset.format returns @@ -24,6 +25,8 @@ - Fix [#88](https://github.com/znamlab/flexiznam/issues/88). Now make attributes JSON compatible before uploading to flexilims. This will replace special characters in attribute names by `_` in the database. +- `add_recording` and `add_sample` add the value online with the full name (including + genealogy) rather than the short name. ## v0.3.4 diff --git a/flexiznam/main.py b/flexiznam/main.py index 46b8fe9..a8436fc 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -321,17 +321,16 @@ def add_recording( "conflicts must be `skip`, `abort`, `overwrite` or `update`" ) - experimental_session = get_entity( - datatype="session", flexilims_session=flexilims_session, id=session_id - ) + parent_series = get_entity(flexilims_session=flexilims_session, id=session_id) recording_info = {"recording_type": recording_type, "protocol": protocol} + if attributes is None: attributes = {} if "path" not in attributes: attributes["path"] = str( Path( get_path( - experimental_session["path"], + parent_series["path"], datatype="session", flexilims_session=flexilims_session, ) @@ -347,20 +346,25 @@ def add_recording( recording_info.update(attributes) if recording_name is None: - recording_name = experimental_session["name"] + "_" + protocol + "_0" + recording_name = parent_series["name"] + "_" + protocol + "_0" + + if "genealogy" not in attributes: + attributes["genealogy"] = list(parent_series["genealogy"]) + [recording_name] + rec_full_name = "_".join(attributes["genealogy"]) + online_recording = get_entity( - datatype="recording", name=recording_name, flexilims_session=flexilims_session + datatype="recording", name=rec_full_name, flexilims_session=flexilims_session ) if online_recording is not None: if conflicts.lower() == "skip": - print("A recording named %s already exists" % (recording_name)) + print("A recording named %s already exists" % (rec_full_name)) return online_recording elif conflicts.lower() == "abort": - raise FlexilimsError("A recording named %s already exists" % recording_name) + raise FlexilimsError("A recording named %s already exists" % rec_full_name) else: resp = update_entity( datatype="recording", - name=recording_name, + name=rec_full_name, id=online_recording["id"], origin_id=session_id, mode=conflicts, @@ -372,7 +376,7 @@ def add_recording( resp = flexilims_session.post( datatype="recording", - name=recording_name, + name=rec_full_name, attributes=recording_info, origin_id=session_id, other_relations=other_relations, @@ -529,7 +533,6 @@ def add_dataset( dataset_type, created, path, - genealogy, is_raw="yes", project_id=None, flexilims_session=None, @@ -545,8 +548,6 @@ def add_dataset( dataset_type (str): dataset_type, must be a type define in the config file created (str): date of creation as text, usually in this format: '2021-05-24 14:56:41' path (str): path to the data relative to the project folder - genealogy (tuple): parents of this dataset from the project (excluded) down to - the dataset name itself (included) is_raw (str): `yes` or `no`, used to find the root directory project_id (str): hexadecimal ID or name of the project flexilims_session (:py:class:`flexilims.Flexilims`): authentication @@ -572,11 +573,10 @@ def add_dataset( if conflicts.lower() not in valid_conflicts: raise AttributeError("`conflicts` must be in [%s]" % ", ".join(valid_conflicts)) + parent = get_entity(flexilims_session=flexilims_session, id=parent_id) + if dataset_name is None: - parent_name = get_entity( - flexilims_session=flexilims_session, - id=parent_id, - )["name"] + parent_name = parent["name"] dataset_name = parent_name + "_" + dataset_type + "_0" dataset_info = { @@ -584,7 +584,7 @@ def add_dataset( "created": created, "path": path, "is_raw": is_raw, - "genealogy": genealogy, + "genealogy": list(parent["genealogy"]), } reserved_attributes = ["dataset_type", "created", "path", "is_raw", "genealogy"] if attributes is not None: @@ -596,32 +596,37 @@ def add_dataset( dataset_name = generate_name( "dataset", dataset_name, flexilims_session=flexilims_session ) + dataset_info["genealogy"].append(dataset_name) + dataset_full_name = "_".join(dataset_info["genealogy"]) else: + dataset_info["genealogy"].append(dataset_name) + dataset_full_name = "_".join(dataset_info["genealogy"]) online_version = get_entity( - "dataset", name=dataset_name, flexilims_session=flexilims_session + "dataset", name=dataset_full_name, flexilims_session=flexilims_session ) if online_version is not None: if conflicts.lower() == "abort": - raise FlexilimsError("A dataset named %s already exists" % dataset_name) + raise FlexilimsError( + "A dataset named %s already exists" % dataset_full_name + ) elif conflicts.lower() == "skip": - print("A dataset named %s already exists" % dataset_name) + print("A dataset named %s already exists" % dataset_full_name) return online_version else: resp = update_entity( datatype="dataset", - name=dataset_name, + name=dataset_full_name, id=online_version["id"], origin_id=parent_id, mode=conflicts, attributes=dataset_info, - other_relations=None, flexilims_session=flexilims_session, ) return resp resp = flexilims_session.post( datatype="dataset", - name=dataset_name, + name=dataset_full_name, origin_id=parent_id, attributes=dataset_info, strict_validation=strict_validation, From da64162e60b73970242fd1e0f36ed4368a704831 Mon Sep 17 00:00:00 2001 From: BenitaTB <62141042+BenitaTB@users.noreply.github.com> Date: Thu, 1 Jun 2023 14:35:13 +0100 Subject: [PATCH 13/73] add dependancy for ttkwidgets --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index e0cb181..b0e2ebe 100755 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ "flexilims @ git+ssh://git@github.com/znamlab/flexilims.git#egg=flexilims", "pymcms @ git+ssh://git@github.com/znamlab/pymcms.git#egg=pymcms", "tifffile", + "ttkwidgets", ], entry_points=""" [console_scripts] From 99d3e404f9c9f578cf8fd4c84b59c4cbe7cffeac Mon Sep 17 00:00:00 2001 From: BenitaTB <62141042+BenitaTB@users.noreply.github.com> Date: Thu, 1 Jun 2023 14:35:33 +0100 Subject: [PATCH 14/73] bug fix removed logo.png --- flexiznam/gui/flexigui.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/flexiznam/gui/flexigui.py b/flexiznam/gui/flexigui.py index f4ddf5f..a3f4aa1 100644 --- a/flexiznam/gui/flexigui.py +++ b/flexiznam/gui/flexigui.py @@ -9,7 +9,6 @@ class FlexiGui(tk.Tk): - FLEXILIMS_ONLY_FIELDS = ("children", "project", "origin_id") RESOURCES = Path(__file__).parent @@ -73,7 +72,6 @@ def _create_treeview(self): self.treeview.tag_configure("error", background="red") def _create_textview(self): - # Create the Text widget tk.Label(self.frames["br"], text="Selected item:").grid( row=0, @@ -136,8 +134,6 @@ def _create_buttons(self): "SELECT PROJECT", *flz.PARAMETERS["project_ids"].keys(), ).grid(row=0, column=4, columnspan=3, sticky="w") - fllogo = tk.PhotoImage(file=str(self.RESOURCES / "flexilims_logo.png")) - fllogo = fllogo.subsample(10, 10) self.upload_btn = tk.Button(topf, text="Upload", command=self.upload) self.upload_btn.grid(row=0, column=7) From 8efd593e0b1660acc115d02706e168f9ee7aa588 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 1 Jun 2023 17:08:23 +0100 Subject: [PATCH 15/73] [feature] dataset validity Each dataset subclass has a is_valid method checking if the expected files actually exist --- flexiznam/schema/camera_data.py | 18 +++++++++-------- flexiznam/schema/datasets.py | 11 ++++++---- flexiznam/schema/harp_data.py | 21 ++++++++++++------- flexiznam/schema/microscopy_data.py | 6 ------ flexiznam/schema/onix_data.py | 31 ++++++++++++++++++++++++++++- flexiznam/schema/scanimage_data.py | 12 ++++++----- 6 files changed, 68 insertions(+), 31 deletions(-) diff --git a/flexiznam/schema/camera_data.py b/flexiznam/schema/camera_data.py index 30d7631..98a2bc7 100644 --- a/flexiznam/schema/camera_data.py +++ b/flexiznam/schema/camera_data.py @@ -219,12 +219,14 @@ def video_file(self): def video_file(self, value): self.extra_attributes["video_file"] = str(value) - def is_valid(self): + def is_valid(self, return_reason=False): """Check that video, metadata and timestamps files exist""" - if not (pathlib.Path(self.path) / self.timestamp_file).exists(): - return False - if not (pathlib.Path(self.path) / self.metadata_file).exists(): - return False - if not (pathlib.Path(self.path) / self.video_file).exists(): - return False - return True + for attr in ["video_file", "timestamp_file", "metadata_file"]: + if attr not in self.extra_attributes: + msg = f"Missing attribute {attr}" + return msg if return_reason else False + fname = getattr(self, attr) + if not (self.path_full / fname).exists(): + msg = f"Unvalid {attr}. {self.path_full / fname} does not exist" + return msg if return_reason else False + return "" if return_reason else True diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index 1121c1c..c6c91d7 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -325,13 +325,16 @@ def __init__( elif project_id is not None: self.project_id = project_id - def is_valid(self): - """ - Dummy method definition. Should be reimplemented in children classes + def is_valid(self, return_reason=False): + """Check if the file path is valid for this dataset + Should be reimplemented in children classes. Should return True if the dataset is found a valid, false otherwise """ - raise NotImplementedError("`is_valid` is not defined for generic datasets") + if not self.path_full.exists(): + msg = f"Path {self.path_full} does not exist" + return msg if return_reason else False + return "" if return_reason else True def associated_files(self, folder=None): """Give a list of all files associated with this dataset diff --git a/flexiznam/schema/harp_data.py b/flexiznam/schema/harp_data.py index 3613cd1..e105f00 100644 --- a/flexiznam/schema/harp_data.py +++ b/flexiznam/schema/harp_data.py @@ -163,11 +163,18 @@ def csv_files(self): def csv_files(self, value): self.extra_attributes["csv_files"] = str(value) - def is_valid(self): - """Check that video, metadata and timestamps files exist""" - if not (pathlib.Path(self.path) / self.binary_file).exists(): - return False + def is_valid(self, return_reason=False): + """Check that video, metadata and timestamps files exist + + Args: + return_reason (bool): if True, return a string with the reason why the + dataset is not valid + Returns:""" + if not (self.path_full / self.binary_file).exists(): + msg = f"Missing file {self.binary_file}" + return msg if return_reason else False for _, file_path in self.csv_files.items(): - if not (pathlib.Path(self.path) / file_path).exists(): - return False - return True + if not (self.path_full / file_path).exists(): + msg = f"Missing file {file_path}" + return msg if return_reason else False + return "" if return_reason else True diff --git a/flexiznam/schema/microscopy_data.py b/flexiznam/schema/microscopy_data.py index dc0c44b..eda32cb 100644 --- a/flexiznam/schema/microscopy_data.py +++ b/flexiznam/schema/microscopy_data.py @@ -139,9 +139,3 @@ def __init__( id=id, flexilims_session=flexilims_session, ) - - def is_valid(self): - """Check that the file exist""" - if not (pathlib.Path(self.path)).exists(): - return False - return True diff --git a/flexiznam/schema/onix_data.py b/flexiznam/schema/onix_data.py index 57c59b6..cc09ac0 100644 --- a/flexiznam/schema/onix_data.py +++ b/flexiznam/schema/onix_data.py @@ -94,7 +94,9 @@ def from_folder( onix_name = "onix_data_%s" % ts.strftime("%Y-%m-%d_%H_%M_%S") extra_attributes = dict() for device, dev_df in df.groupby("device_name"): - extra_attributes[device] = {s.subname: s.file for s in dev_df.itertuples()} + extra_attributes[device] = { + s.subname: s.file for s in dev_df.itertuples() + } output[onix_name] = OnixData( path=folder, genealogy=folder_genealogy + (onix_name,), @@ -150,3 +152,30 @@ def __init__( id=id, flexilims_session=flexilims_session, ) + + def is_valid(self, return_reason=False): + """Check that the onix dataset is valid + + Args: + return_reason (bool): if True, return a string with the reason why the + dataset is not valid. If False, return True or False + + Returns: + bool or str: True if valid, False if not. If return_reason is True, return + a string with the reason why the dataset is not valid.""" + + ndevices = 0 + for device_name in OnixData.DEVICE_NAMES: + if device_name not in self.extra_attributes: + continue + ndevices += 1 + dev_dict = self.extra_attributes[device_name] + for v in dev_dict.values(): + p = self.path_full / v + if not p.exists(): + msg = f"File {p} does not exist" + return msg if return_reason else False + if ndevices == 0: + msg = "No devices found" + return msg if return_reason else False + return "" if return_reason else True diff --git a/flexiznam/schema/scanimage_data.py b/flexiznam/schema/scanimage_data.py index 681c48f..d299ebe 100644 --- a/flexiznam/schema/scanimage_data.py +++ b/flexiznam/schema/scanimage_data.py @@ -234,7 +234,7 @@ def tif_files(self, value): ) self.extra_attributes["tif_files"] = value - def is_valid(self, tif_files=None): + def is_valid(self, return_reason=False, tif_files=None): """Check that associated files exist""" if tif_files is None: tif_files = self.tif_files @@ -244,11 +244,13 @@ def is_valid(self, tif_files=None): f for f in os.listdir(self.path) if f.endswith(("tif", ".tiff")) } if tif_files - existing_file: - return False + msg = "Some tif files do not exist: %s" % (tif_files - existing_file) + return msg if return_reason else False for _, file_path in self.csv_files.items(): - if not (pathlib.Path(self.path) / file_path).exists(): - return False - return True + if not (self.path_full / file_path).exists(): + msg = "Csv file does not exist: %s" % file_path + return msg if return_reason else False + return "" if return_reason else True def __len__(self): """Number of tif files in the dataset""" From c58cb237c1ac0b9a4383fbc9836a4ba23622e6f6 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 2 Jun 2023 11:20:21 +0100 Subject: [PATCH 16/73] [minor] Move upload_yaml up in the file To keep private functions together at the bottom --- flexiznam/camp/sync_data.py | 121 ++++++++++++++++++------------------ 1 file changed, 62 insertions(+), 59 deletions(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index 86d2b93..349ee61 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -45,6 +45,8 @@ def create_yaml_dict( assert origin is not None, f"Origin {origin_name} not found in project {project}" assert "genealogy" in origin, f"Origin {origin_name} has no genealogy" genealogy = origin["genealogy"] + root_folder = Path(root_folder) + assert root_folder.is_dir(), f"Folder {root_folder} does not exist" data = _create_yaml_dict( level_folder=root_folder, @@ -53,10 +55,69 @@ def create_yaml_dict( format_yaml=format_yaml, parent_dict=dict(), ) - out = dict(root_folder=root_folder, origin_name=origin_name, children=data) + out = dict(root_folder=root_folder.parent, origin_name=origin_name, children=data) return out + +def upload_yaml( + source_yaml, + raw_data_folder=None, + verbose=False, + log_func=print, + flexilims_session=None, + conflicts="abort", +): + """Upload data from one yaml to flexilims + + Args: + source_yaml (dict or str): path to clean yaml or yaml dict + raw_data_folder (str): path to the folder containing the data. Default to + data_root['raw'] + verbose (bool): print progress information + log_func: function to deal with warnings and messages + flexilims_session (Flexilims): session to avoid recreating a token + conflicts (str): `abort` to crash if there is already a session or recording + existing on flexilims, `skip` to ignore and proceed. Samples + are always updated with `skip` and datasets always have + mode=`safe` + + Returns: + list of names of entities created/updated + + """ + if isinstance(source_yaml, str): + source_yaml = Path(source_yaml) + with open(source_yaml, "r") as f: + yaml_data = yaml.safe_load(f) + else: + assert isinstance(source_yaml, dict), "source_yaml must be a dict or a path" + yaml_data = source_yaml + + # first find the origin + + if flexilims_session is None: + flexilims_session = flz.get_flexilims_session(project_id=yaml_data["project"]) + + origin_name = yaml_data["origin_name"] + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + origin = flz.get_entity(name=origin_name, flexilims_session=flexilims_session) + assert origin is not None, f"`{origin_name}` not found on flexilims" + if verbose: + print(f"Found origin `{origin_name}` with id `{origin.id}`") + # then upload the data recursively + _upload_yaml_dict( + yaml_data["children"], + origin=origin, + raw_data_folder=raw_data_folder, + log_func=log_func, + flexilims_session=flexilims_session, + conflicts=conflicts, + verbose=verbose, + ) + + def _create_yaml_dict( level_folder, project, @@ -135,64 +196,6 @@ def _create_yaml_dict( return parent_dict -def upload_yaml( - source_yaml, - raw_data_folder=None, - verbose=False, - log_func=print, - flexilims_session=None, - conflicts="abort", -): - """Upload data from one yaml to flexilims - - Args: - source_yaml (dict or str): path to clean yaml or yaml dict - raw_data_folder (str): path to the folder containing the data. Default to - data_root['raw'] - verbose (bool): print progress information - log_func: function to deal with warnings and messages - flexilims_session (Flexilims): session to avoid recreating a token - conflicts (str): `abort` to crash if there is already a session or recording - existing on flexilims, `skip` to ignore and proceed. Samples - are always updated with `skip` and datasets always have - mode=`safe` - - Returns: - list of names of entities created/updated - - """ - if isinstance(source_yaml, str): - source_yaml = Path(source_yaml) - with open(source_yaml, "r") as f: - yaml_data = yaml.safe_load(f) - else: - assert isinstance(source_yaml, dict), "source_yaml must be a dict or a path" - yaml_data = source_yaml - - # first find the origin - - if flexilims_session is None: - flexilims_session = flz.get_flexilims_session(project_id=yaml_data["project"]) - - origin_name = yaml_data["origin_name"] - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - origin = flz.get_entity(name=origin_name, flexilims_session=flexilims_session) - assert origin is not None, f"`{origin_name}` not found on flexilims" - if verbose: - print(f"Found origin `{origin_name}` with id `{origin.id}`") - # then upload the data recursively - _upload_yaml_dict( - yaml_data["children"], - origin=origin, - raw_data_folder=raw_data_folder, - log_func=log_func, - flexilims_session=flexilims_session, - conflicts=conflicts, - verbose=verbose, - ) - - def _upload_yaml_dict( yaml_dict, origin, raw_data_folder, log_func, flexilims_session, conflicts, verbose ): From f3efb75751fd54d63087097fc4fe330d92381f93 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 2 Jun 2023 11:20:58 +0100 Subject: [PATCH 17/73] [feature] add check_yaml_validity Iteratively validates datasets --- flexiznam/camp/sync_data.py | 92 +++++++++++++++++++++++++++++++++---- 1 file changed, 84 insertions(+), 8 deletions(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index 349ee61..6b106cd 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -5,6 +5,7 @@ import re import copy import warnings +import pandas as pd import yaml from yaml.parser import ParserError @@ -59,6 +60,46 @@ def create_yaml_dict( return out +def check_yaml_validity(yaml_data, root_folder=None, origin_name=None, project=None): + if isinstance(yaml_data, str) or isinstance(yaml_data, Path): + with open(yaml_data, "r") as f: + yaml_data = yaml.safe_load(f) + if root_folder is not None: + assert yaml_data["root_folder"] == str( + root_folder + ), f"root_folder is {yaml_data['root_folder']}. Expected {root_folder}" + else: + root_folder = yaml_data["root_folder"] + + if project is not None: + assert ( + yaml_data["project"] == project + ), f"project is {yaml_data['project']}. Expected {project}" + else: + project = yaml_data["project"] + + if origin_name is not None: + assert ( + yaml_data["origin_name"] == origin_name + ), f"origin_name is {yaml_data['origin_name']}. Expected {origin_name}" + else: + origin_name = yaml_data["origin_name"] + + flm_sess = flz.get_flexilims_session(project_id=project) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + origin = flz.get_entity(name=origin_name, flexilims_session=flm_sess) + assert hasattr(origin, "genealogy"), f"Origin {origin_name} has no genealogy" + + _check_recursively( + yaml_data["children"], + origin_genealogy=origin["genealogy"], + root_folder=root_folder, + project=project, + genealogy=[], + ) + return yaml_data + def upload_yaml( source_yaml, @@ -272,13 +313,48 @@ def _upload_yaml_dict( ) -def check_yaml_validity(yaml, root_folder, origin_name): - if isinstance(yaml, str): - with open(yaml, "r") as f: - yaml = yaml.safe_load(f) - assert yaml["root_folder"] == root_folder, f"root_folder should be {root_folder}" - _check_recursively(yaml["children"], root_folder, origin_name) +def _check_recursively( + yaml_data, origin_genealogy, root_folder, project, genealogy, fixerrors=False +): + root_folder = Path(root_folder) + + for child, child_dict in yaml_data.items(): + fname = root_folder / Path(*genealogy) / child + child_genealogy = genealogy + [child] + + if child_dict["type"] != "dataset": + if not fname.is_dir(): + child_dict["PATH_ERROR"] = f"XXERRORXX folder {fname} does not exist" + else: + data_series = pd.Series(child_dict) + for k, v in data_series.pop("extra_attributes").items(): + data_series[k] = v + data_series.id = None + data_series.name = "_".join(origin_genealogy + child_genealogy) + ds = flz.Dataset.from_flexilims(data_series=data_series) + msg = ds.is_valid(return_reason=True) + if msg: + child_dict["VALIDATION_ERROR"] = f"XXERRORXX {msg}" + + if child_dict["genealogy"] != origin_genealogy + child_genealogy: + if fixerrors: + print(f"Fixing genealogy for {child}") + child_dict["genealogy"] = origin_genealogy + child_genealogy + else: + child_dict["GENEALOGY_ERROR"] = f"XXERRORXX genealogy is not correct" + if "children" in child_dict: + _check_recursively( + child_dict["children"], + origin_genealogy, + root_folder, + project, + genealogy=genealogy + [child], + ) -def _check_recursively(yaml, root_folder): - raise NotImplementedError +if __name__ == "__main__": + rel = "blota_onix_pilote/BRAC7448.2d/" + root_folder = Path(flz.PARAMETERS["data_root"]["raw"]) / rel + yaml_file = Path(flz.PARAMETERS["data_root"]["processed"]) / rel / "S20230421.yml" + origin_name = "BRAC7448.2d" + check_yaml_validity(yaml_file, root_folder, origin_name) From 79909a3935553a01cc2801066b311f8d86d27056 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 2 Jun 2023 11:21:26 +0100 Subject: [PATCH 18/73] [gui] gui checks for erros before uploading It is slow. SHould add progress indication --- flexiznam/gui/flexigui.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/flexiznam/gui/flexigui.py b/flexiznam/gui/flexigui.py index a3f4aa1..0190e9b 100644 --- a/flexiznam/gui/flexigui.py +++ b/flexiznam/gui/flexigui.py @@ -26,7 +26,7 @@ def __init__(self): self._create_frames() self._setup_widgets() self._entity_by_itemid = {} - + self.contains_errors = False self.data = {} def _setup_widgets(self): @@ -112,6 +112,7 @@ def parse_folder(self): origin_name=self.origin_name.get(), format_yaml=True, ) + data = flz.camp.sync_data.check_yaml_dict(data) self.data = data self.update_data() @@ -211,6 +212,8 @@ def update_data(self, name_to_select=None): self.origin_name.set(self.data["origin_name"]) if "root_folder" in self.data: self.root_folder.set(self.data["root_folder"]) + + self.contains_errors = False self._insert_yaml_data(self.data["children"], name_to_select=name_to_select) def _insert_yaml_data(self, data, parent="", name_to_select=None): @@ -233,6 +236,7 @@ def _insert_yaml_data(self, data, parent="", name_to_select=None): if isinstance(v, str) ] ): + self.contains_errors = True self.treeview.item(item, tags=("error",)) self._entity_by_itemid[item] = (child, child_data) @@ -265,15 +269,23 @@ def upload(self): if not self._check_options_are_set(): return - data = dict(self.data) - if not data: + if not self.data: tk.messagebox.showerror("Error", "No data loaded") return + + self.data = flz.camp.sync_data.check_yaml_validity(self.data) + + if self.contains_errors: + tk.messagebox.showerror( + "Error", + "There are still errors. Please fix them before uploading", + ) + return + + data = dict(self.data) data["project"] = self.project.get() data["root_folder"] = self.root_folder.get() - if data["project"].startswith("XXERRORXX"): - print("Project name not set") - return + flz.camp.sync_data.upload_yaml( source_yaml=data, raw_data_folder=data["root_folder"], From 47060b473dc6080f024fe65942aa74f3219be42b Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 2 Jun 2023 11:30:29 +0100 Subject: [PATCH 19/73] [gui] add status bar and refactor Put gui creation function together --- flexiznam/gui/flexigui.py | 118 ++++++++++++++++++++++---------------- 1 file changed, 70 insertions(+), 48 deletions(-) diff --git a/flexiznam/gui/flexigui.py b/flexiznam/gui/flexigui.py index 0190e9b..103a3f4 100644 --- a/flexiznam/gui/flexigui.py +++ b/flexiznam/gui/flexigui.py @@ -29,37 +29,47 @@ def __init__(self): self.contains_errors = False self.data = {} + ############# GUI setup methods ############# + # These methods are used to create the GUI elements + def _setup_widgets(self): self._create_frames() self._create_buttons() self._create_treeview() self._create_textview() + self._create_statusbar() def _create_frames(self): - self.frames["t"] = tk.Frame(self) - self.frames["t"].grid( + self.frames["T"] = tk.Frame(self) + self.frames["T"].grid( row=0, column=0, padx=10, pady=5, columnspan=2, sticky="nwe" ) - self.frames["t"].rowconfigure(0, weight=1) - self.frames["t"].rowconfigure(1, weight=1) + self.frames["T"].rowconfigure(0, weight=1) + self.frames["T"].rowconfigure(1, weight=1) for i in range(10): - self.frames["t"].columnconfigure(i, weight=1) - self.frames["t"].columnconfigure(3, weight=10) - self.frames["bl"] = tk.Frame(self) - self.frames["bl"].grid(row=1, column=0, padx=10, pady=5, sticky="nsew") - self.frames["bl"].rowconfigure(0, weight=1) - self.frames["bl"].columnconfigure(0, weight=1) - self.frames["br"] = tk.Frame(self) - self.frames["br"].grid(row=1, column=1, padx=10, pady=5, sticky="nsew") - self.frames["br"].rowconfigure(0, weight=1) - self.frames["br"].rowconfigure(1, weight=30) - self.frames["br"].rowconfigure(2, weight=1) - self.frames["br"].columnconfigure(0, weight=1) + self.frames["T"].columnconfigure(i, weight=1) + self.frames["T"].columnconfigure(3, weight=10) + self.frames["L"] = tk.Frame(self) + self.frames["L"].grid(row=1, column=0, padx=10, pady=5, sticky="nsew") + self.frames["L"].rowconfigure(0, weight=1) + self.frames["L"].columnconfigure(0, weight=1) + self.frames["R"] = tk.Frame(self) + self.frames["R"].grid(row=1, column=1, padx=10, pady=5, sticky="nsew") + self.frames["R"].rowconfigure(0, weight=1) + self.frames["R"].rowconfigure(1, weight=30) + self.frames["R"].rowconfigure(2, weight=1) + self.frames["R"].columnconfigure(0, weight=1) + self.frames["B"] = tk.Frame(self) + self.frames["B"].grid( + row=2, column=0, columnspan=2, padx=10, pady=5, sticky="sew" + ) + self.frames["B"].rowconfigure(0, weight=1) + self.frames["B"].columnconfigure(0, weight=10) def _create_treeview(self): # Create the Treeview self.treeview = CheckboxTreeview( - self.frames["bl"], + self.frames["L"], columns=("datatype",), selectmode="browse", ) @@ -73,51 +83,25 @@ def _create_treeview(self): def _create_textview(self): # Create the Text widget - tk.Label(self.frames["br"], text="Selected item:").grid( + tk.Label(self.frames["R"], text="Selected item:").grid( row=0, column=0, sticky="nw", ) self.selected_item = tk.StringVar() self.selected_item.set("None") - l = tk.Label(self.frames["br"], textvariable=self.selected_item) + l = tk.Label(self.frames["R"], textvariable=self.selected_item) l.grid(row=0, column=1, sticky="new") - self.textview = tk.Text(self.frames["br"], width=40, height=10, wrap="none") + self.textview = tk.Text(self.frames["R"], width=40, height=10, wrap="none") self.textview.grid(row=1, column=0, sticky="nsew", columnspan=2) self.textview.bind("<>", self.on_textview_change) self.update_item_btn = tk.Button( - self.frames["br"], text="Update item", command=self.update_item + self.frames["R"], text="Update item", command=self.update_item ) self.update_item_btn.grid(row=2, column=1, sticky="nsw") - def _check_options_are_set(self, options=("project", "origin_name")): - init_values = dict(project="SELECT", origin_name="ENTER") - for option in options: - value = getattr(self, option).get() - if value.startswith(init_values[option]): - tk.messagebox.showerror("Error", f"Error: enter {option} first!") - return False - return True - - def parse_folder(self): - if not self._check_options_are_set(): - return - folder = tk.filedialog.askdirectory( - initialdir=self.root_folder.get(), title="Select directory to parse" - ) - self.root_folder.set(folder) - data = flz.camp.sync_data.create_yaml_dict( - root_folder=folder, - project=self.project.get(), - origin_name=self.origin_name.get(), - format_yaml=True, - ) - data = flz.camp.sync_data.check_yaml_dict(data) - self.data = data - self.update_data() - def _create_buttons(self): - topf = self.frames["t"] + topf = self.frames["T"] self.parse_btn = tk.Button(topf, text="Parse", command=self.parse_folder) self.parse_btn.grid(row=0, column=0, sticky="w") self.load_btn = tk.Button(topf, text="Load", command=self.load_yaml) @@ -163,6 +147,44 @@ def _create_buttons(self): self.chg_dir_btn = tk.Button(topf, text="...", command=self.chg_root_folder) self.chg_dir_btn.grid(row=1, column=10) + def _create_statusbar(self): + self.statusbar = tk.Label( + self.frames["B"], text="Ready", bd=1, relief=tk.SUNKEN + ) + self.statusbar.grid(row=0, column=0, sticky="sw") + + ############# GUI update methods ############# + # These methods are used to actually do stuff with the GUI elements + def report(self, message): + self.statusbar["text"] = message + print(message) + + def _check_options_are_set(self, options=("project", "origin_name")): + init_values = dict(project="SELECT", origin_name="ENTER") + for option in options: + value = getattr(self, option).get() + if value.startswith(init_values[option]): + tk.messagebox.showerror("Error", f"Error: enter {option} first!") + return False + return True + + def parse_folder(self): + if not self._check_options_are_set(): + return + folder = tk.filedialog.askdirectory( + initialdir=self.root_folder.get(), title="Select directory to parse" + ) + self.root_folder.set(folder) + data = flz.camp.sync_data.create_yaml_dict( + root_folder=folder, + project=self.project.get(), + origin_name=self.origin_name.get(), + format_yaml=True, + ) + data = flz.camp.sync_data.check_yaml_dict(data) + self.data = data + self.update_data() + def chg_root_folder(self): self.root_folder.set( tk.filedialog.askdirectory( From acdb3201fffbd9d0677e85206e87c59eb2c645ec Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 2 Jun 2023 12:01:49 +0100 Subject: [PATCH 20/73] [gui] update status bar to indicate progress --- flexiznam/gui/flexigui.py | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/flexiznam/gui/flexigui.py b/flexiznam/gui/flexigui.py index 103a3f4..01bee85 100644 --- a/flexiznam/gui/flexigui.py +++ b/flexiznam/gui/flexigui.py @@ -148,24 +148,29 @@ def _create_buttons(self): self.chg_dir_btn.grid(row=1, column=10) def _create_statusbar(self): + self.sb_msg = tk.StringVar() self.statusbar = tk.Label( - self.frames["B"], text="Ready", bd=1, relief=tk.SUNKEN + self.frames["B"], textvariable=self.sb_msg, bd=1, relief=tk.SUNKEN ) self.statusbar.grid(row=0, column=0, sticky="sw") + self.sb_msg.set("Ready") ############# GUI update methods ############# # These methods are used to actually do stuff with the GUI elements def report(self, message): - self.statusbar["text"] = message + self.sb_msg.set(message) print(message) + self.update() def _check_options_are_set(self, options=("project", "origin_name")): + self.report("Checking options") init_values = dict(project="SELECT", origin_name="ENTER") for option in options: value = getattr(self, option).get() if value.startswith(init_values[option]): tk.messagebox.showerror("Error", f"Error: enter {option} first!") return False + self.report("Options are set") return True def parse_folder(self): @@ -174,6 +179,7 @@ def parse_folder(self): folder = tk.filedialog.askdirectory( initialdir=self.root_folder.get(), title="Select directory to parse" ) + self.report(f"Parsing folder {folder}...") self.root_folder.set(folder) data = flz.camp.sync_data.create_yaml_dict( root_folder=folder, @@ -181,11 +187,14 @@ def parse_folder(self): origin_name=self.origin_name.get(), format_yaml=True, ) + self.report("Parsing done. Validating data...") data = flz.camp.sync_data.check_yaml_dict(data) self.data = data self.update_data() + self.report("Done") def chg_root_folder(self): + self.report("Changing root folder") self.root_folder.set( tk.filedialog.askdirectory( initialdir=self.root_folder.get(), title="Select root directory" @@ -195,17 +204,18 @@ def chg_root_folder(self): def on_treeview_select(self, event): item = self.treeview.focus() name, data = self._entity_by_itemid[item] + self.report(f"Selected item: {name}") self.selected_item.set(name) display = {k: v for k, v in data.items() if k not in self.FLEXILIMS_ONLY_FIELDS} self.textview.delete(1.0, tk.END) self.textview.insert(tk.END, yaml.dump(display)) def on_textview_change(self, event): - print('Textview changed: "{}"'.format(event)) + return def load_yaml(self): """Load a YAML file and display it in the treeview""" - print("Select YAML file to load") + self.report("Select YAML file to load") filetypes = (("Yaml files", "*.yml *.yaml"), ("All files", "*.*")) filename = tk.filedialog.askopenfilename( @@ -213,10 +223,11 @@ def load_yaml(self): ) if not filename: return + self.report(f"Loading YAML file {filename}...") with open(filename, "r") as f: self.data = yaml.safe_load(f) - print('Loaded YAML file "{}"'.format(filename)) self.update_data() + self.report("Done") def update_data(self, name_to_select=None): """Update GUI data from self.data @@ -224,6 +235,7 @@ def update_data(self, name_to_select=None): Args: name_to_select (str, optional): Name of item to select in treeview. Defaults to None.""" + self.report("Updating GUI") self.textview.delete("1.0", tk.END) self.selected_item.set("None") self.treeview.delete(*self.treeview.get_children()) @@ -259,6 +271,7 @@ def _insert_yaml_data(self, data, parent="", name_to_select=None): ] ): self.contains_errors = True + self.report(f"ERROR: {child} contains errors") self.treeview.item(item, tags=("error",)) self._entity_by_itemid[item] = (child, child_data) @@ -273,17 +286,21 @@ def _insert_yaml_data(self, data, parent="", name_to_select=None): def write_yaml(self): """Write the current data to a YAML file""" + self.report("Select YAML file to write") target = tk.filedialog.asksaveasfilename( initialdir=self.root_folder.get(), title="Select YAML file to write", filetypes=(("Yaml files", "*.yml *.yaml"), ("All files", "*.*")), ) + if not target: + self.report("No file selected. Cancel") + return data = dict(self.data) data["project"] = self.project.get() data["root_folder"] = self.root_folder.get() with open(target, "w") as f: yaml.dump(data, f) - print('Wrote YAML file "{}"'.format(target)) + self.report('Wrote YAML file "{}"'.format(target)) def upload(self): """Upload data to flexilims""" @@ -295,6 +312,7 @@ def upload(self): tk.messagebox.showerror("Error", "No data loaded") return + self.report("Validating data...") self.data = flz.camp.sync_data.check_yaml_validity(self.data) if self.contains_errors: @@ -307,7 +325,7 @@ def upload(self): data = dict(self.data) data["project"] = self.project.get() data["root_folder"] = self.root_folder.get() - + self.report("Validating data...") flz.camp.sync_data.upload_yaml( source_yaml=data, raw_data_folder=data["root_folder"], @@ -316,15 +334,17 @@ def upload(self): flexilims_session=None, conflicts=self.conflicts.get(), ) - print("Done") + self.report("Done") def update_item(self): """Update the selected item with the textview contents""" + text = self.textview.get(1.0, tk.END) if not text.strip(): return item = self.treeview.focus() name, original_data = self._entity_by_itemid[item] + self.report(f"Updating item {name}") assert name == self.selected_item.get(), "Selected item does not match" data = yaml.safe_load(text) for field in self.FLEXILIMS_ONLY_FIELDS: @@ -344,6 +364,7 @@ def update_item(self): ref = ref["children"][parent] ref["children"][name] = data self.update_data(name_to_select=name) + self.report("Done") if __name__ == "__main__": From 0f07d125df85ee4ceaaab2227fe39be8d1005356 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 2 Jun 2023 16:12:16 +0100 Subject: [PATCH 21/73] [bugfix] typo --- flexiznam/gui/flexigui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexiznam/gui/flexigui.py b/flexiznam/gui/flexigui.py index 01bee85..91adf41 100644 --- a/flexiznam/gui/flexigui.py +++ b/flexiznam/gui/flexigui.py @@ -188,7 +188,7 @@ def parse_folder(self): format_yaml=True, ) self.report("Parsing done. Validating data...") - data = flz.camp.sync_data.check_yaml_dict(data) + data = flz.camp.sync_data.check_yaml_validity(data) self.data = data self.update_data() self.report("Done") From e31b8a78f3389e9dc6b3a8cc8bb8f40d525df5a2 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 2 Jun 2023 16:15:16 +0100 Subject: [PATCH 22/73] [bugfix] add_dataset has no arg genealogy --- flexiznam/schema/datasets.py | 1 - 1 file changed, 1 deletion(-) diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index c6c91d7..6216f6b 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -441,7 +441,6 @@ def update_flexilims(self, mode="safe"): dataset_type=self.dataset_type, created=self.created, path=str(PurePosixPath(self.path)), - genealogy=self.genealogy, is_raw="yes" if self.is_raw else "no", project_id=self.project_id, dataset_name=self.full_name, From 16e31dcc624db9c38ad9151db395798b075d7187 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 2 Jun 2023 17:28:53 +0100 Subject: [PATCH 23/73] [feature] add ds validation to seq and mic data --- flexiznam/schema/microscopy_data.py | 12 ++++++++++++ flexiznam/schema/sequencing_data.py | 14 ++++++++++---- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/flexiznam/schema/microscopy_data.py b/flexiznam/schema/microscopy_data.py index eda32cb..ab7d99c 100644 --- a/flexiznam/schema/microscopy_data.py +++ b/flexiznam/schema/microscopy_data.py @@ -139,3 +139,15 @@ def __init__( id=id, flexilims_session=flexilims_session, ) + + def is_valid(self, return_reason=False): + """Check that file exist + + Args: + return_reason (bool): if True, return a string with the reason why the + dataset is not valid + Returns:""" + if not self.path_full.exists(): + msg = f"{self.path_full} does not exist" + return msg if return_reason else False + return "" if return_reason else True diff --git a/flexiznam/schema/sequencing_data.py b/flexiznam/schema/sequencing_data.py index d226b68..6fad91d 100644 --- a/flexiznam/schema/sequencing_data.py +++ b/flexiznam/schema/sequencing_data.py @@ -126,8 +126,14 @@ def __init__( project_id=project_id, ) - def is_valid(self): - """Check that the file exist""" + def is_valid(self, return_reason=False): + """Check that file exist + + Args: + return_reason (bool): if True, return a string with the reason why the + dataset is not valid + Returns:""" if not self.path_full.exists(): - return False - return True + msg = f"{self.path_full} does not exist" + return msg if return_reason else False + return "" if return_reason else True From f04a54dbd6fe203650c636aa7f83a1bafe093f9e Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 2 Jun 2023 17:33:17 +0100 Subject: [PATCH 24/73] [sync] add project to new yaml syntax --- flexiznam/camp/sync_data.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index 6b106cd..3ab80fd 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -56,7 +56,12 @@ def create_yaml_dict( format_yaml=format_yaml, parent_dict=dict(), ) - out = dict(root_folder=root_folder.parent, origin_name=origin_name, children=data) + out = dict( + root_folder=root_folder.parent, + origin_name=origin_name, + children=data, + project=project, + ) return out From 78b9dee6524cd0ed5579669f693b0960fac32818 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Sat, 26 Aug 2023 18:21:08 +0100 Subject: [PATCH 25/73] [bugfix] Parse correctly folder with . in name --- flexiznam/camp/sync_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index 3ab80fd..ae06d87 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -192,7 +192,7 @@ def _create_yaml_dict( level_dict = dict() genealogy = list(genealogy) - level_name = level_folder.stem + level_name = level_folder.name m = re.fullmatch(r"R\d\d\d\d\d\d_?(.*)?", level_name) if m: level_dict["type"] = "recording" @@ -238,7 +238,7 @@ def _create_yaml_dict( parent_dict=children, ) level_dict["children"] = children - parent_dict[level_folder.stem] = level_dict + parent_dict[level_name] = level_dict return parent_dict From daaf215aa6b1ed54fd8ad5574407b32513dbb73e Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Tue, 29 Aug 2023 14:45:15 +0100 Subject: [PATCH 26/73] [feature] Add CLI entry point to GUI --- flexiznam/cli.py | 14 +++++++++++++- flexiznam/gui/__init__.py | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 flexiznam/gui/__init__.py diff --git a/flexiznam/cli.py b/flexiznam/cli.py index 6cc4dfc..b80e62e 100644 --- a/flexiznam/cli.py +++ b/flexiznam/cli.py @@ -6,6 +6,17 @@ def cli(): pass +@cli.command() +@click.argument("root_folder", type=click.Path(exists=True), default=".") +def gui(root_folder): + """Start the GUI""" + from flexiznam.gui import flexigui + + app = flexigui.FlexiGui() + app.root_folder.set(root_folder) + app.mainloop() + + @cli.command() @click.option("-p", "--project_id", prompt="Enter the project ID", help="Project ID.") @click.option( @@ -28,6 +39,7 @@ def cli(): show_default=True, ) def add_genealogy(project_id, name, recursive, verbose): + """Add genealogy to a flexilims entity""" from flexiznam import get_flexilims_session flm_sess = get_flexilims_session(project_id=project_id) @@ -60,9 +72,9 @@ def add_mouse( flexilims_username=None, mcms_username=None, ): + """Add a single mouse to a project.""" from flexiznam import main - """Add a single mouse to a project.""" click.echo("Trying to add %s in %s" % (mouse_name, project_id)) main.add_mouse( mouse_name=mouse_name, diff --git a/flexiznam/gui/__init__.py b/flexiznam/gui/__init__.py new file mode 100644 index 0000000..428404e --- /dev/null +++ b/flexiznam/gui/__init__.py @@ -0,0 +1 @@ +from . import flexigui From 3467daf10bab64181e5093e0304ba209b726a85d Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Tue, 29 Aug 2023 14:45:15 +0100 Subject: [PATCH 27/73] [feature] Add CLI entry point to GUI --- CHANGELOG.md | 1 + flexiznam/cli.py | 14 +++++++++++++- flexiznam/gui/__init__.py | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 flexiznam/gui/__init__.py diff --git a/CHANGELOG.md b/CHANGELOG.md index f6e5ca7..c99bd2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### Main changes - New `SequencingData` class to handle sequencing data +- GUI can now be used to add data to flexilims with `flexiznam gui` ## v0.3.5 diff --git a/flexiznam/cli.py b/flexiznam/cli.py index 6cc4dfc..b80e62e 100644 --- a/flexiznam/cli.py +++ b/flexiznam/cli.py @@ -6,6 +6,17 @@ def cli(): pass +@cli.command() +@click.argument("root_folder", type=click.Path(exists=True), default=".") +def gui(root_folder): + """Start the GUI""" + from flexiznam.gui import flexigui + + app = flexigui.FlexiGui() + app.root_folder.set(root_folder) + app.mainloop() + + @cli.command() @click.option("-p", "--project_id", prompt="Enter the project ID", help="Project ID.") @click.option( @@ -28,6 +39,7 @@ def cli(): show_default=True, ) def add_genealogy(project_id, name, recursive, verbose): + """Add genealogy to a flexilims entity""" from flexiznam import get_flexilims_session flm_sess = get_flexilims_session(project_id=project_id) @@ -60,9 +72,9 @@ def add_mouse( flexilims_username=None, mcms_username=None, ): + """Add a single mouse to a project.""" from flexiznam import main - """Add a single mouse to a project.""" click.echo("Trying to add %s in %s" % (mouse_name, project_id)) main.add_mouse( mouse_name=mouse_name, diff --git a/flexiznam/gui/__init__.py b/flexiznam/gui/__init__.py new file mode 100644 index 0000000..428404e --- /dev/null +++ b/flexiznam/gui/__init__.py @@ -0,0 +1 @@ +from . import flexigui From 78c341fe13d0e3cbc7a763b9bad5c8b8653ecdbc Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Tue, 29 Aug 2023 18:22:36 +0100 Subject: [PATCH 28/73] [bugfix] adapt gui to new from_dataseries --- flexiznam/camp/sync_data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index ae06d87..e1833ee 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -336,7 +336,8 @@ def _check_recursively( data_series[k] = v data_series.id = None data_series.name = "_".join(origin_genealogy + child_genealogy) - ds = flz.Dataset.from_flexilims(data_series=data_series) + ds = flz.Dataset.from_dataseries(data_series) + ds.project = project msg = ds.is_valid(return_reason=True) if msg: child_dict["VALIDATION_ERROR"] = f"XXERRORXX {msg}" From c9850e546b47b7d7e7a6a3ade4d402efcc0ba6d7 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Tue, 29 Aug 2023 18:30:51 +0100 Subject: [PATCH 29/73] [bugfix] Onix data from_folder timestamp issue If the protocol start at then end of a second some timestamps are not exactly the same. Now can load these dataset if the timestamps are not off by more than 2s --- flexiznam/schema/onix_data.py | 55 ++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/flexiznam/schema/onix_data.py b/flexiznam/schema/onix_data.py index cc09ac0..1d34ec1 100644 --- a/flexiznam/schema/onix_data.py +++ b/flexiznam/schema/onix_data.py @@ -79,33 +79,34 @@ def from_folder( data = pd.DataFrame(data) output = dict() - for ts, df in data.groupby("timestamp"): - if ( - enforce_validity - and ("rhd2164" not in df.device_name.values) - or ("breakout" not in df.device_name.values) - ): - if verbose: - print( - "Skipping partial onix dataset %s" - % ts.strftime("%Y-%m-%d_%H_%M_%S") - ) - continue - onix_name = "onix_data_%s" % ts.strftime("%Y-%m-%d_%H_%M_%S") - extra_attributes = dict() - for device, dev_df in df.groupby("device_name"): - extra_attributes[device] = { - s.subname: s.file for s in dev_df.itertuples() - } - output[onix_name] = OnixData( - path=folder, - genealogy=folder_genealogy + (onix_name,), - extra_attributes=extra_attributes, - created=ts.strftime("%Y-%m-%d " "%H:%M:%S"), - flexilims_session=flexilims_session, - project=project, - is_raw=is_raw, - ) + if max(data.timestamp - data.timestamp.min()).total_seconds() > 2: + raise IOError(f"Multiple timestamps found in folder {folder}") + + ts = data.timestamp.min() + if ( + enforce_validity + and ("rhd2164" not in data.device_name.values) + or ("breakout" not in data.device_name.values) + ): + if verbose: + print( + "Skipping partial onix dataset %s" + % ts.strftime("%Y-%m-%d_%H_%M_%S") + ) + return + onix_name = "onix_data_%s" % ts.strftime("%Y-%m-%d_%H_%M_%S") + extra_attributes = dict() + for device, dev_df in data.groupby("device_name"): + extra_attributes[device] = {s.subname: s.file for s in dev_df.itertuples()} + output[onix_name] = OnixData( + path=folder, + genealogy=folder_genealogy + (onix_name,), + extra_attributes=extra_attributes, + created=ts.strftime("%Y-%m-%d " "%H:%M:%S"), + flexilims_session=flexilims_session, + project=project, + is_raw=is_raw, + ) return output def __init__( From 1708e2c85c7171b25954b83f1895cc625fd8e1f4 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Wed, 30 Aug 2023 09:47:20 +0100 Subject: [PATCH 30/73] [feature] `check_flm_issues` can add missing paths --- CHANGELOG.md | 6 ++++++ flexiznam/cli.py | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 053d5f6..5dc0593 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Change log +## v0.4.0 + +### Main changes + +- `check_flexilims_issues` can now add missing paths + ## v0.3.8 ### Main changes diff --git a/flexiznam/cli.py b/flexiznam/cli.py index 734cc3f..5562b46 100644 --- a/flexiznam/cli.py +++ b/flexiznam/cli.py @@ -316,7 +316,10 @@ def yaml_to_flexilims(source_yaml, raw_data_folder=None, conflicts=None): @click.option("-t", "--target_file", default=None, help="Path to write csv output.") @click.option("-r", "--root_name", default=None, help="Root entity to start the check.") @click.option("--flexilims_username", default=None, help="Your username on flexilims.") -def check_flexilims_issues(project_id, target_file, root_name, flexilims_username): +@click.option("--add-path/--no-add-path", default=False, help="Add missing paths.") +def check_flexilims_issues( + project_id, target_file, root_name, flexilims_username, add_path +): """Check that database is properly formatted This will check recursively all mice if `root_name` is not provided. Elements that @@ -350,3 +353,6 @@ def check_flexilims_issues(project_id, target_file, root_name, flexilims_usernam else: df = pdf df.to_csv(target_file) + if add_path: + print("Adding missing paths") + utils.add_missing_paths(flexilims_session, root_name=root_name) From 05f3b991502e7f915dcb4008616a6434ab90518d Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 31 Aug 2023 09:27:06 +0100 Subject: [PATCH 31/73] [minor] clearer error message in get_id --- flexiznam/main.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/flexiznam/main.py b/flexiznam/main.py index 2ec0867..04d2310 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -45,8 +45,9 @@ def get_data_root(which, project=None, flexilims_session=None): project = flexilims_session.project_id if project not in PARAMETERS["project_ids"]: - project = lookup_project(project, prm=None) - assert project is not None, f"Invalid project {project}" + proj = lookup_project(project, prm=None) + assert proj is not None, f"Invalid project {project}" + project = proj if project in PARAMETERS["project_paths"]: return Path(PARAMETERS["project_paths"][project][which]) @@ -798,7 +799,7 @@ def get_entities( :py:class:`pandas.DataFrame`: containing all matching entities """ - assert (project_id is not None) or (flexilims_session is not None) + # assert (project_id is not None) or (flexilims_session is not None) if flexilims_session is None: flexilims_session = get_flexilims_session(project_id) results = flexilims_session.get( @@ -942,6 +943,8 @@ def get_id(name, datatype=None, project_id=None, flexilims_session=None): entity = get_entity( datatype=datatype, flexilims_session=flexilims_session, name=name ) + if entity is None: + raise FlexilimsError("Cannot find entity named `%s`" % name) return entity["id"] @@ -1081,10 +1084,27 @@ def get_datasets_recursively( For example, this is useful if you want to retrieve paths to all *scanimage* datasets associated with a given session. + Args: + origin_id (str): hexadecimal ID of the origin session. Not required if + origin_name is provided. + origin_name (str): text name of the origin session. Not required if origin_id + is provided. + origin_series (pandas.Series): series of the origin session. Not required if + origin_id or origin_name is provided. + dataset_type (str): type of the dataseet to filter by. If `None`, + will return all datasets. + filter_datasets (dict): dictionary of key-value pairs to filter datasets by. + parent_type (str): type of the parent entity. If `None`, will return all + filter_parents (dict): dictionary of key-value pairs to filter parents by. + return_paths (bool): if True, return a list of paths + project_id (str): text name of the project. Not required if + `flexilims_session` is provided. + flexilims_session (:py:class:`flexilims.Flexilims`): Flexylims session object + _output (list): internal argument used for recursion. + Returns: dict: Dictionary with direct parent id as keys and lists of associated datasets, or dataset paths as values - """ if origin_series is None: if origin_id is None: @@ -1168,7 +1188,7 @@ def get_datasets( otherwise ensure that only one dataset exists online and return it. return_paths (bool): if True, return a list of paths return_dataseries (bool): if True, a dataframe or a dataseries - _output (list): internal argument used for recursion. + """ From 91b0e121f29759be73c426e845d62463cda696bd Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 6 Oct 2023 09:58:01 +0100 Subject: [PATCH 32/73] [feature] add create yaml compatible with GUI --- flexiznam/camp/sync_data.py | 38 +++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index e1833ee..ab37407 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -16,6 +16,36 @@ from flexiznam.utils import clean_recursively +def create_yaml(root_folder, project, origin_name, output_file, overwrite=False): + """Create a yaml file from a folder + + Args: + root_folder (str): Folder to parse + project (str): Name of the project + origin_name (str): Name of the origin on flexilims + output_file (str): Full path to output yaml. + overwrite (bool, optional): Overwrite output file if it exists. Defaults to False. + """ + output_file = pathlib.Path(output_file) + if (not overwrite) and output_file.exists(): + s = input("File %s already exists. Overwrite (yes/[no])? " % output_file) + if s == "yes": + overwrite = True + else: + raise ( + FileExistsError( + "File %s already exists and overwrite is not allowed" % output_file + ) + ) + root_folder = pathlib.Path(root_folder) + if not root_folder.is_dir(): + raise FileNotFoundError("source_dir %s is not a directory" % root_folder) + + data = create_yaml_dict(root_folder, project, origin_name) + with open(output_file, "w") as f: + yaml.dump(data, f) + + def create_yaml_dict( root_folder, project, @@ -56,8 +86,12 @@ def create_yaml_dict( format_yaml=format_yaml, parent_dict=dict(), ) + if format_yaml: + root_folder = str(root_folder.parent) + else: + root_folder = root_folder.parent out = dict( - root_folder=root_folder.parent, + root_folder=root_folder, origin_name=origin_name, children=data, project=project, @@ -132,7 +166,7 @@ def upload_yaml( list of names of entities created/updated """ - if isinstance(source_yaml, str): + if isinstance(source_yaml, str) or isinstance(source_yaml, Path): source_yaml = Path(source_yaml) with open(source_yaml, "r") as f: yaml_data = yaml.safe_load(f) From 2b3d4339a3a3ef6f600013fa205e6b00a901f56a Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 6 Oct 2023 09:58:34 +0100 Subject: [PATCH 33/73] [bugfix] adapt CLI to new create_yaml --- flexiznam/cli.py | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/flexiznam/cli.py b/flexiznam/cli.py index ed7629b..bc25616 100644 --- a/flexiznam/cli.py +++ b/flexiznam/cli.py @@ -180,7 +180,9 @@ def add_password(app, username, password, password_file): @click.option( "-p", "--project", default="NOT SPECIFIED", help="Project name on flexilims." ) -@click.option("-m", "--mouse", default="NOT SPECIFIED", help="Mouse name on flexilims.") +@click.option( + "-o", "--origin", default="NOT SPECIFIED", help="Origin name on flexilims." +) @click.option( "--overwrite/--no-overwrite", default=False, @@ -191,38 +193,15 @@ def add_password(app, username, password, password_file): default=False, help="After creating the yaml skeleton, should I also parse it?", ) -@click.option( - "-r", - "--raw_data_folder", - default=None, - help="Path to the root folder containing raw data. Only used with " "`--process`", -) -def create_yaml( - source_dir, target_yaml, project, mouse, overwrite, process, raw_data_folder -): +def create_yaml(source_dir, target_yaml, project, origin, overwrite, process): """Create a yaml file by looking recursively in `root_dir`""" from flexiznam import camp - import pathlib - target_yaml = pathlib.Path(target_yaml) - if (not overwrite) and target_yaml.exists(): - s = input("File %s already exists. Overwrite (yes/[no])? " % target_yaml) - if s == "yes": - overwrite = True - else: - raise ( - FileExistsError( - "File %s already exists and overwrite is not allowed" % target_yaml - ) - ) - source_dir = pathlib.Path(source_dir) - if not source_dir.is_dir(): - raise FileNotFoundError("source_dir %s is not a directory" % source_dir) - yml_content = camp.sync_data.create_yaml( + camp.sync_data.create_yaml( root_folder=source_dir, - outfile=target_yaml, + output_file=target_yaml, + origin_name=origin, project=project, - mouse=mouse, overwrite=overwrite, ) click.echo("Created yml skeleton in %s" % target_yaml) From df64b070433908b2ccfdfaa09c1f9d13dc4b4947 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 6 Oct 2023 10:44:15 +0100 Subject: [PATCH 34/73] [feature] add parse_yaml that I gui compatible To parse existing yaml files --- flexiznam/camp/sync_data.py | 153 +++++++++++++++++++++++++++++------- 1 file changed, 126 insertions(+), 27 deletions(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index ab37407..0b3658a 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -16,11 +16,11 @@ from flexiznam.utils import clean_recursively -def create_yaml(root_folder, project, origin_name, output_file, overwrite=False): +def create_yaml(folder_to_parse, project, origin_name, output_file, overwrite=False): """Create a yaml file from a folder Args: - root_folder (str): Folder to parse + folder_to_parse (str): Folder to parse project (str): Name of the project origin_name (str): Name of the origin on flexilims output_file (str): Full path to output yaml. @@ -37,17 +37,17 @@ def create_yaml(root_folder, project, origin_name, output_file, overwrite=False) "File %s already exists and overwrite is not allowed" % output_file ) ) - root_folder = pathlib.Path(root_folder) - if not root_folder.is_dir(): - raise FileNotFoundError("source_dir %s is not a directory" % root_folder) + folder_to_parse = pathlib.Path(folder_to_parse) + if not folder_to_parse.is_dir(): + raise FileNotFoundError("source_dir %s is not a directory" % folder_to_parse) - data = create_yaml_dict(root_folder, project, origin_name) + data = create_yaml_dict(folder_to_parse, project, origin_name) with open(output_file, "w") as f: yaml.dump(data, f) def create_yaml_dict( - root_folder, + folder_to_parse, project, origin_name, format_yaml=True, @@ -57,7 +57,7 @@ def create_yaml_dict( Recursively parse a folder and create a yaml dict with the structure of the folder. Args: - root_folder (str): Path to the folder to parse + folder_to_parse (str): Path to the folder to parse project (str): Name of the project, used as root of the path in the output origin_name (str): Name of the origin on flexilims. Must be online and have genealogy set. @@ -76,20 +76,86 @@ def create_yaml_dict( assert origin is not None, f"Origin {origin_name} not found in project {project}" assert "genealogy" in origin, f"Origin {origin_name} has no genealogy" genealogy = origin["genealogy"] - root_folder = Path(root_folder) - assert root_folder.is_dir(), f"Folder {root_folder} does not exist" + folder_to_parse = Path(folder_to_parse) + assert folder_to_parse.is_dir(), f"Folder {folder_to_parse} does not exist" data = _create_yaml_dict( - level_folder=root_folder, + level_folder=folder_to_parse, project=project, genealogy=genealogy, format_yaml=format_yaml, parent_dict=dict(), ) if format_yaml: - root_folder = str(root_folder.parent) + root_folder = str(folder_to_parse.parent) else: - root_folder = root_folder.parent + root_folder = folder_to_parse.parent + out = dict( + root_folder=root_folder, + origin_name=origin_name, + children=data, + project=project, + ) + return out + + +def parse_yaml( + yaml_file, + root_folder=None, + origin_name=None, + project=None, + format_yaml=True, +): + """Parse a yaml file and check validity + + This will add datasets to each existing levels of the yaml, but won't create + nested levels + + Args: + yaml_file (str): path to the yaml file + root_folder (str): path to the root folder. If not provided, will be read from + the yaml file. This is the folder that contains the main folder, so "mouse" + for a "session". + origin_name (str): name of the origin on flexilims. If not provided, will be + read from the yaml file + project (str): name of the project. If not provided, will be read from the yaml + file + format_yaml (bool, optional): Format the output to be yaml compatible if True, + otherwise keep dataset as Dataset object and path as pathlib.Path. Defaults + to True. + Returns + dict: yaml dict with datasets added + """ + yaml_data = check_yaml_validity(yaml_file, root_folder, origin_name, project) + if root_folder is None: + root_folder = Path(yaml_data["root_folder"]) + assert root_folder.is_dir(), f"Folder {root_folder} does not exist" + + if project is None: + project = yaml_data["project"] + flm_sess = flz.get_flexilims_session(project_id=project) + + if origin_name is None: + origin_name = yaml_data["origin_name"] + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + origin = flz.get_entity(name=origin_name, flexilims_session=flm_sess) + assert origin is not None, f"Origin {origin_name} not found in project {project}" + assert "genealogy" in origin, f"Origin {origin_name} has no genealogy" + genealogy = origin["genealogy"] + + assert len(yaml_data["children"]) == 1, "Parsing only one folder is allowed" + child = list(yaml_data["children"].keys())[0] + data = _create_yaml_dict( + level_folder=root_folder / child, + project=project, + genealogy=genealogy, + format_yaml=format_yaml, + parent_dict=yaml_data["children"], + only_datasets=True, + ) + if format_yaml: + root_folder = str(root_folder) out = dict( root_folder=root_folder, origin_name=origin_name, @@ -204,6 +270,7 @@ def _create_yaml_dict( genealogy, format_yaml, parent_dict, + only_datasets=False, ): """Private function to create a yaml dict from a folder @@ -219,34 +286,59 @@ def _create_yaml_dict( format_yaml (bool): format results to be yaml compatible or keep Dataset and pathlib.Path objects parent_dict (dict): dict of the parent folder. Used for recursion + only_datasets (bool): only parse datasets, not folders """ level_folder = Path(level_folder) assert level_folder.is_dir(), "root_folder must be a directory" - level_dict = dict() + level_name = level_folder.name + if level_name in parent_dict: + level_dict = parent_dict[level_name] + else: + level_dict = dict() genealogy = list(genealogy) - level_name = level_folder.name m = re.fullmatch(r"R\d\d\d\d\d\d_?(.*)?", level_name) if m: - level_dict["type"] = "recording" - level_dict["protocol"] = ( - m[1] if m[1] is not None else "XXERRORXX PROTOCOL NOT SPECIFIED" - ) - level_dict["recording_type"] = "XXERRORXX error RECORDING TYPE NOT SPECIFIED" - + if "type" in level_dict: + assert ( + level_dict["type"] == "recording" + ), "Conflicting types, expected recording" + else: + level_dict["type"] = "recording" + if "protocol" not in level_dict: + level_dict["protocol"] = ( + m[1] if m[1] is not None else "XXERRORXX PROTOCOL NOT SPECIFIED" + ) + if "recording_type" not in level_dict: + level_dict["recording_type"] = "XXERRORXX RECORDING TYPE NOT SPECIFIED" elif re.fullmatch(r"S\d*", level_name): - level_dict["type"] = "session" + if "type" in level_dict: + assert ( + level_dict["type"] == "session" + ), "Conflicting types, expected session" + else: + level_dict["type"] = "session" else: - level_dict["type"] = "sample" - level_dict["genealogy"] = genealogy + [level_name] - level_dict["path"] = Path(project, *level_dict["genealogy"]) + if "type" not in level_dict: + level_dict["type"] = "sample" + if "genealogy" in level_dict: + assert level_dict["genealogy"] == genealogy + [ + level_name + ], f"Conflicting genealogy for {level_name}" + else: + level_dict["genealogy"] = genealogy + [level_name] + if "path" not in level_dict: + level_dict["path"] = Path(project, *level_dict["genealogy"]) if format_yaml: level_dict["path"] = str(PurePosixPath(level_dict["path"])) - children = dict() + children = dict() if "children" not in level_dict else level_dict["children"] datasets = Dataset.from_folder(level_folder) if datasets: for ds_name, ds in datasets.items(): + if ds_name in children: + warnings.warn(f"Dataset {ds_name} already exists in {level_name}. Skip") + continue ds.genealogy = genealogy + list(ds.genealogy) if format_yaml: # find path root @@ -262,7 +354,14 @@ def _create_yaml_dict( else: children[ds_name] = ds - for child in level_folder.glob("*"): + if only_datasets: + subfolders = [ + level_folder / n for n, c in children.items() if c["type"] != "dataset" + ] + else: + subfolders = level_folder.glob("*") + + for child in subfolders: if child.is_dir(): _create_yaml_dict( child, From ce0b20e84f84f5cb9d001d3a8f20340b7b31d647 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 6 Oct 2023 11:38:20 +0100 Subject: [PATCH 35/73] [feature] adapt parse yaml to empty levels One can just give a recording name, with nothing below --- flexiznam/camp/sync_data.py | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index 0b3658a..0710471 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -100,7 +100,7 @@ def create_yaml_dict( def parse_yaml( - yaml_file, + yaml_data, root_folder=None, origin_name=None, project=None, @@ -112,7 +112,7 @@ def parse_yaml( nested levels Args: - yaml_file (str): path to the yaml file + yaml_file (str): path to the yaml file (or data as dict) root_folder (str): path to the root folder. If not provided, will be read from the yaml file. This is the folder that contains the main folder, so "mouse" for a "session". @@ -126,7 +126,10 @@ def parse_yaml( Returns dict: yaml dict with datasets added """ - yaml_data = check_yaml_validity(yaml_file, root_folder, origin_name, project) + if isinstance(yaml_data, str) or isinstance(yaml_data, Path): + with open(yaml_data, "r") as f: + yaml_data = yaml.safe_load(f) + if root_folder is None: root_folder = Path(yaml_data["root_folder"]) assert root_folder.is_dir(), f"Folder {root_folder} does not exist" @@ -162,10 +165,29 @@ def parse_yaml( children=data, project=project, ) + yaml_data = check_yaml_validity(yaml_data, root_folder, origin_name, project) return out def check_yaml_validity(yaml_data, root_folder=None, origin_name=None, project=None): + """Check that a yaml file is valid + + This will check that the genealogy is correct, that the datasets are valid and + that the folder structure is correct + + Args: + yaml_file (str): path to the yaml file (or data as dict) + root_folder (str): path to the root folder. If not provided, will be read from + the yaml file. This is the folder that contains the main folder, so "mouse" + for a "session". + origin_name (str): name of the origin on flexilims. If not provided, will be + read from the yaml file + project (str): name of the project. If not provided, will be read from the yaml + file + + Returns: + dict: same as input yaml_data, but with errors added + """ if isinstance(yaml_data, str) or isinstance(yaml_data, Path): with open(yaml_data, "r") as f: yaml_data = yaml.safe_load(f) @@ -294,6 +316,8 @@ def _create_yaml_dict( level_name = level_folder.name if level_name in parent_dict: level_dict = parent_dict[level_name] + if level_dict is None: + level_dict = dict() else: level_dict = dict() genealogy = list(genealogy) @@ -356,7 +380,9 @@ def _create_yaml_dict( if only_datasets: subfolders = [ - level_folder / n for n, c in children.items() if c["type"] != "dataset" + level_folder / n + for n, c in children.items() + if (c is None) or (c.get("type", "unknown") != "dataset") ] else: subfolders = level_folder.glob("*") From 4aee89e09036d44b47338ad2581146632fbc1a33 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 6 Oct 2023 16:42:18 +0100 Subject: [PATCH 36/73] [bugfix] typo in parse_folder --- flexiznam/gui/flexigui.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexiznam/gui/flexigui.py b/flexiznam/gui/flexigui.py index 91adf41..c3916ff 100644 --- a/flexiznam/gui/flexigui.py +++ b/flexiznam/gui/flexigui.py @@ -182,7 +182,7 @@ def parse_folder(self): self.report(f"Parsing folder {folder}...") self.root_folder.set(folder) data = flz.camp.sync_data.create_yaml_dict( - root_folder=folder, + folder_to_parse=folder, project=self.project.get(), origin_name=self.origin_name.get(), format_yaml=True, From 73990b74ca931026fe16e8eae346ce77d01c2e72 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 13 Oct 2023 16:33:56 +0100 Subject: [PATCH 37/73] [feature] get tickbox of the gui to do stuff fixes #116 --- flexiznam/camp/sync_data.py | 30 +++++++++++++--- flexiznam/gui/flexigui.py | 71 +++++++++++++++++++++++++++++++++---- 2 files changed, 90 insertions(+), 11 deletions(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index 0710471..de4c0fd 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -165,7 +165,10 @@ def parse_yaml( children=data, project=project, ) - yaml_data = check_yaml_validity(yaml_data, root_folder, origin_name, project) + yaml_data, errors = check_yaml_validity( + yaml_data, root_folder, origin_name, project + ) + return out @@ -218,14 +221,14 @@ def check_yaml_validity(yaml_data, root_folder=None, origin_name=None, project=N origin = flz.get_entity(name=origin_name, flexilims_session=flm_sess) assert hasattr(origin, "genealogy"), f"Origin {origin_name} has no genealogy" - _check_recursively( + errors = _check_recursively( yaml_data["children"], origin_genealogy=origin["genealogy"], root_folder=root_folder, project=project, genealogy=[], ) - return yaml_data + return yaml_data, errors def upload_yaml( @@ -478,8 +481,16 @@ def _upload_yaml_dict( def _check_recursively( - yaml_data, origin_genealogy, root_folder, project, genealogy, fixerrors=False + yaml_data, + origin_genealogy, + root_folder, + project, + genealogy, + fixerrors=False, + errors=None, ): + if errors is None: + errors = dict() root_folder = Path(root_folder) for child, child_dict in yaml_data.items(): @@ -489,6 +500,7 @@ def _check_recursively( if child_dict["type"] != "dataset": if not fname.is_dir(): child_dict["PATH_ERROR"] = f"XXERRORXX folder {fname} does not exist" + errors[fname] = child_dict else: data_series = pd.Series(child_dict) for k, v in data_series.pop("extra_attributes").items(): @@ -500,6 +512,7 @@ def _check_recursively( msg = ds.is_valid(return_reason=True) if msg: child_dict["VALIDATION_ERROR"] = f"XXERRORXX {msg}" + errors[fname] = child_dict if child_dict["genealogy"] != origin_genealogy + child_genealogy: if fixerrors: @@ -507,6 +520,7 @@ def _check_recursively( child_dict["genealogy"] = origin_genealogy + child_genealogy else: child_dict["GENEALOGY_ERROR"] = f"XXERRORXX genealogy is not correct" + errors[fname] = child_dict if "children" in child_dict: _check_recursively( child_dict["children"], @@ -514,10 +528,18 @@ def _check_recursively( root_folder, project, genealogy=genealogy + [child], + fixerrors=fixerrors, + errors=errors, ) + return errors if __name__ == "__main__": + example_yml = "/Users/blota/Desktop/test_yaml.yml" + out = parse_yaml(example_yml) + with open("/Users/blota/Desktop/test_yaml_redump.yml", "w") as f: + yaml.dump(out, f) + rel = "blota_onix_pilote/BRAC7448.2d/" root_folder = Path(flz.PARAMETERS["data_root"]["raw"]) / rel yaml_file = Path(flz.PARAMETERS["data_root"]["processed"]) / rel / "S20230421.yml" diff --git a/flexiznam/gui/flexigui.py b/flexiznam/gui/flexigui.py index c3916ff..cba5684 100644 --- a/flexiznam/gui/flexigui.py +++ b/flexiznam/gui/flexigui.py @@ -157,6 +157,22 @@ def _create_statusbar(self): ############# GUI update methods ############# # These methods are used to actually do stuff with the GUI elements + def get_checked_data(self, item=None, checked_data=None): + if checked_data is None: + checked_data = dict(children=dict()) + for k in ["project", "origin_name", "root_folder"]: + checked_data[k] = self.data[k] + + for child in self.treeview.get_children(item=item): + if self.treeview.tag_has("checked", child): + name, data = self._entity_by_itemid[child] + data = data.copy() + if "children" in data: + data["children"] = {} + data = self.get_checked_data(item=child, checked_data=data) + checked_data["children"][name] = data + return checked_data + def report(self, message): self.sb_msg.set(message) print(message) @@ -188,9 +204,11 @@ def parse_folder(self): format_yaml=True, ) self.report("Parsing done. Validating data...") - data = flz.camp.sync_data.check_yaml_validity(data) + data, errors = flz.camp.sync_data.check_yaml_validity(data) self.data = data - self.update_data() + self.update_data(remove_unchecked=False) + checked = self.get_checked_data(item=None, checked_data=None) + assert checked == self.data self.report("Done") def chg_root_folder(self): @@ -229,17 +247,20 @@ def load_yaml(self): self.update_data() self.report("Done") - def update_data(self, name_to_select=None): + def update_data(self, name_to_select=None, remove_unchecked=True): """Update GUI data from self.data Args: name_to_select (str, optional): Name of item to select in treeview. Defaults to None.""" self.report("Updating GUI") + if remove_unchecked: + self.data = self.get_checked_data() self.textview.delete("1.0", tk.END) self.selected_item.set("None") self.treeview.delete(*self.treeview.get_children()) self._entity_by_itemid = {} + if "project" in self.data: self.project.set(self.data["project"]) if "origin_name" in self.data: @@ -262,7 +283,6 @@ def _insert_yaml_data(self, data, parent="", name_to_select=None): values=[dtype], open=True, ) - self.treeview.change_state(item, "checked") if any( [ v.startswith("XXERRORXX") @@ -272,8 +292,8 @@ def _insert_yaml_data(self, data, parent="", name_to_select=None): ): self.contains_errors = True self.report(f"ERROR: {child} contains errors") - self.treeview.item(item, tags=("error",)) - + self.treeview.item(item, tags=("error", "checked")) + self.treeview.change_state(item, "checked") self._entity_by_itemid[item] = (child, child_data) if name_to_select and child == name_to_select: self.treeview.focus(item) @@ -313,7 +333,8 @@ def upload(self): return self.report("Validating data...") - self.data = flz.camp.sync_data.check_yaml_validity(self.data) + self.update_data() + data, errors = flz.camp.sync_data.check_yaml_validity(self.get_checked_data()) if self.contains_errors: tk.messagebox.showerror( @@ -323,8 +344,16 @@ def upload(self): return data = dict(self.data) + # remove unchecked items + for item in self.treeview.get_children(): + if not self.treeview.tag_has("checked", item): + name, _ = self._entity_by_itemid[item] + self.report(f"Removing item {name}") + data["children"].pop(name) + data["project"] = self.project.get() data["root_folder"] = self.root_folder.get() + self.report("Validating data...") flz.camp.sync_data.upload_yaml( source_yaml=data, @@ -368,5 +397,33 @@ def update_item(self): if __name__ == "__main__": + + def diffofdict(d1, d2, diff=None, level=""): + """Find differences between 2 dictionary of dictionaries""" + + if diff is None: + diff = [] + all_keys = set(list(d1.keys()) + list(d2.keys())) + for k in all_keys: + level = level + k + "." + if k not in d2: + diff.append(f"{level} (missing in d2)") + elif k not in d1: + diff.append(f"{level} (missing in d1)") + elif isinstance(d1[k], dict): + diff = diffofdict(d1[k], d2[k], diff, level) + elif d1[k] != d2[k]: + diff.append(f"{level} ({d1[k]} != {d2[k]})") + return diff + app = FlexiGui() + app.root_folder.set( + "/Volumes/lab-znamenskiyp/data/instruments/raw_data/projects/blota_onix_pilote/BRYA142.5d/" + ) + app.origin_name.set("BRYA142.5d") + app.project.set("blota_onix_pilote") app.mainloop() + df = diffofdict(app.data["children"], app.get_checked_data()["children"]) + a = app.data["children"]["S20230915"]["children"] + b = app.get_checked_data()["children"]["S20230915"]["children"] + a == b From f232d0a24a4b8b554a49bd4c4e56a3eeac0fee2e Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 20 Oct 2023 15:11:24 +0100 Subject: [PATCH 38/73] [minor] Clearer error message in create_yaml --- flexiznam/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexiznam/cli.py b/flexiznam/cli.py index fd6bddc..ae0bf53 100644 --- a/flexiznam/cli.py +++ b/flexiznam/cli.py @@ -206,7 +206,7 @@ def create_yaml(source_dir, target_yaml, project, origin, overwrite, process): ) click.echo("Created yml skeleton in %s" % target_yaml) if process: - raise NotImplementedError + raise NotImplementedError("Process yaml at creation is not implemented yet") @cli.command() From 254e8705cc758cd29ed6ca5142a76b4d45114d85 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Mon, 23 Oct 2023 13:55:49 +0100 Subject: [PATCH 39/73] [bugfix] tristate tag was not recognised in gui --- flexiznam/gui/flexigui.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/flexiznam/gui/flexigui.py b/flexiznam/gui/flexigui.py index cba5684..bad2b49 100644 --- a/flexiznam/gui/flexigui.py +++ b/flexiznam/gui/flexigui.py @@ -164,7 +164,9 @@ def get_checked_data(self, item=None, checked_data=None): checked_data[k] = self.data[k] for child in self.treeview.get_children(item=item): - if self.treeview.tag_has("checked", child): + if self.treeview.tag_has("checked", child) or self.treeview.tag_has( + "tristate", child + ): name, data = self._entity_by_itemid[child] data = data.copy() if "children" in data: From aefc1cb13b146fe7fbf91669beba5f841b2c7c63 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Wed, 25 Oct 2023 09:44:25 +0100 Subject: [PATCH 40/73] [bugfix] scanimage valid with full path --- flexiznam/schema/scanimage_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexiznam/schema/scanimage_data.py b/flexiznam/schema/scanimage_data.py index a973a74..9468890 100644 --- a/flexiznam/schema/scanimage_data.py +++ b/flexiznam/schema/scanimage_data.py @@ -240,7 +240,7 @@ def is_valid(self, return_reason=False, tif_files=None): # checking file one by one is long, compare sets tif_files = set(tif_files) existing_file = { - f for f in os.listdir(self.path) if f.endswith(("tif", ".tiff")) + f for f in os.listdir(self.path_full) if f.endswith(("tif", ".tiff")) } if tif_files - existing_file: msg = "Some tif files do not exist: %s" % (tif_files - existing_file) From 6604eb34ed0eccda6c7facd81f91748934daea82 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Wed, 25 Oct 2023 09:58:22 +0100 Subject: [PATCH 41/73] [feature] relax constrain on recording type Try to autoguess what it is and default to accepted "NOT SPECIFIED" --- flexiznam/camp/sync_data.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index de4c0fd..fd6139a 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -338,7 +338,14 @@ def _create_yaml_dict( m[1] if m[1] is not None else "XXERRORXX PROTOCOL NOT SPECIFIED" ) if "recording_type" not in level_dict: - level_dict["recording_type"] = "XXERRORXX RECORDING TYPE NOT SPECIFIED" + if "camera" in level_dict["protocol"]: + level_dict["recording_type"] = "camera" + elif "onix" in level_dict["protocol"]: + level_dict["recording_type"] = "ephys" + elif "harp" in level_dict["protocol"]: + level_dict["recording_type"] = "behaviour" + else: + level_dict["recording_type"] = "NOT SPECIFIED" elif re.fullmatch(r"S\d*", level_name): if "type" in level_dict: assert ( From 1f5cdc43046f7b3c5f46d3a1547251491276f5a0 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Tue, 9 Jan 2024 13:58:10 +0000 Subject: [PATCH 42/73] [feature] add a visstim dataset To be able to decouple harp and visual stimulation TODO: allow multiple dataset per folder? Or make the parent folder the dataset instead of a recording --- flexiznam/schema/__init__.py | 2 + flexiznam/schema/visstim_data.py | 139 ++++++++++++++++++ .../tests_schema/test_visstim.py | 24 +++ 3 files changed, 165 insertions(+) create mode 100644 flexiznam/schema/visstim_data.py create mode 100644 tests/test_components/tests_schema/test_visstim.py diff --git a/flexiznam/schema/__init__.py b/flexiznam/schema/__init__.py index c5ba526..d922e26 100644 --- a/flexiznam/schema/__init__.py +++ b/flexiznam/schema/__init__.py @@ -29,6 +29,7 @@ from .microscopy_data import MicroscopyData from .onix_data import OnixData from .sequencing_data import SequencingData +from .visstim_data import VisStimData Dataset.SUBCLASSES["camera"] = CameraData Dataset.SUBCLASSES["harp"] = HarpData @@ -36,3 +37,4 @@ Dataset.SUBCLASSES["microscopy"] = MicroscopyData Dataset.SUBCLASSES["onix"] = OnixData Dataset.SUBCLASSES["sequencing"] = SequencingData +Dataset.SUBCLASSES["visstim"] = VisStimData diff --git a/flexiznam/schema/visstim_data.py b/flexiznam/schema/visstim_data.py new file mode 100644 index 0000000..d7084f4 --- /dev/null +++ b/flexiznam/schema/visstim_data.py @@ -0,0 +1,139 @@ +import datetime +import os +import pathlib +import re + +from flexiznam.schema.datasets import Dataset + + +class VisStimData(Dataset): + DATASET_TYPE = "visstim" + + @classmethod + def from_folder( + cls, + folder, + folder_genealogy=None, + is_raw=None, + verbose=True, + flexilims_session=None, + project=None, + ): + """Create a visual stimulation dataset by loading info from folder + + A visual stimulation dataset is a folder containing at least a `FrameLog.csv` + file and any number of other associated csvs. + + Args: + folder (str): path to the folder + folder_genealogy (tuple): genealogy of the folder, if None assume that + the genealogy is just (folder,), i.e. no parents + is_raw (bool): does this folder contain raw data? + verbose (bool=True): print info about what is found + flexilims_session (flm.Session): session to interact with flexilims + project (str): project ID or name + + Returns: + dict of dataset (flz.schema.harp_data.HarpData) + """ + + csv_files = list(pathlib.Path(folder).glob("*.csv")) + + fnames = [f.name for f in csv_files] + if 'framelog.csv' not in [f.lower() for f in fnames]: + raise IOError("Cannot find FrameLog.csv file") + + log_file = [f for f in csv_files if f.name.lower() == 'framelog.csv'][0] + if verbose: + print(f"Found FrameLog.csv file: {log_file}") + + if folder_genealogy is None: + folder_genealogy = (pathlib.Path(folder).stem,) + elif isinstance(folder_genealogy, list): + folder_genealogy = tuple(folder_genealogy) + output = {} + extra_attributes = dict(csv_files={f.stem: f.name for f in csv_files}) + genealogy = folder_genealogy + ("visstim",) + created = datetime.datetime.fromtimestamp(log_file.stat().st_mtime) + output["visstim"] = VisStimData( + genealogy=genealogy, + is_raw=is_raw, + path=folder, + extra_attributes=extra_attributes, + created=created.strftime("%Y-%m-%d %H:%M:%S"), + flexilims_session=flexilims_session, + project=project, + ) + return output + + def __init__( + self, + path, + is_raw=None, + genealogy=None, + extra_attributes=None, + created=None, + project=None, + project_id=None, + origin_id=None, + id=None, + flexilims_session=None, + ): + """Create a VisStim dataset + + Args: + path: folder containing the dataset or path to file (valid only for single + file datasets) + is_raw: bool, used to sort in raw and processed subfolders + genealogy (tuple): parents of this dataset from the project (excluded) down to + the dataset name itself (included) + extra_attributes: dict, optional attributes. + created: Creation date, in "YYYY-MM-DD HH:mm:SS" + project: name of the project. Must be in config, can be guessed from + project_id + project_id: hexadecimal code for the project. Must be in config, can be + guessed from project + origin_id: hexadecimal code for the origin on flexilims. + id: hexadecimal code for the dataset on flexilims. + flexilims_session: authentication session to connect to flexilims + + Expected extra_attributes: + csv_files (optional): Dictionary of csv files associated to the binary file. + Keys are identifier provided for convenience, + values are the full file name + """ + + super().__init__( + genealogy=genealogy, + path=path, + is_raw=is_raw, + dataset_type=VisStimData.DATASET_TYPE, + extra_attributes=extra_attributes, + created=created, + project=project, + project_id=project_id, + origin_id=origin_id, + id=id, + flexilims_session=flexilims_session, + ) + + @property + def csv_files(self): + return self.extra_attributes.get("csv_files", None) + + @csv_files.setter + def csv_files(self, value): + self.extra_attributes["csv_files"] = str(value) + + def is_valid(self, return_reason=False): + """Check that all csv files exist + + Args: + return_reason (bool): if True, return a string with the reason why the + dataset is not valid + Returns:""" + for _, file_path in self.csv_files.items(): + if not (self.path_full / file_path).exists(): + msg = f"Missing file {file_path}" + return msg if return_reason else False + return "" if return_reason else True \ No newline at end of file diff --git a/tests/test_components/tests_schema/test_visstim.py b/tests/test_components/tests_schema/test_visstim.py new file mode 100644 index 0000000..f5d6507 --- /dev/null +++ b/tests/test_components/tests_schema/test_visstim.py @@ -0,0 +1,24 @@ +import pytest +from flexiznam.schema.visstim_data import VisStimData +from tests.tests_resources.data_for_testing import DATA_ROOT + + +def test_vistim(): + folder_genealogy = ["mouse_onix", "S20230915", "R165222_SpheresPermTubeReward"] + data_dir = DATA_ROOT.joinpath(*folder_genealogy) + ds = VisStimData.from_folder(data_dir, verbose=False) + assert len(ds) == 1 + ds_name = "visstim" + d = ds[ds_name] + assert d.full_name == folder_genealogy[-1] + "_" + ds_name + d.project = "demo_project" + assert d.is_valid() + assert len(d.csv_files) == 4 + ds = VisStimData.from_folder( + data_dir, verbose=False, folder_genealogy=folder_genealogy + ) + d = ds[ds_name] + d.project = "demo_project" + assert d.full_name == "_".join(folder_genealogy + [ds_name]) + assert d.is_valid() + assert len(d.csv_files) == 4 From 45187277e931076fd047715f956583e1283652d8 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Tue, 30 Jan 2024 17:54:20 +0000 Subject: [PATCH 43/73] [feature] add conflicts to add_mouse --- CHANGELOG.md | 1 + flexiznam/main.py | 32 ++++++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 122fa2c..c979a8b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ ### Minor - `add_mouse` uploads birth and death dates in a human readable format instead. +- Add `conflicts` argument to `add_mouse` to overwrite existing mice - `get_entities` does not raise warnings anymore if `name` is specified and `datatype` is not. This is now supported upstream by `flexilims` diff --git a/flexiznam/main.py b/flexiznam/main.py index e7c94be..daac58c 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -139,6 +139,7 @@ def add_mouse( mcms_password=None, flexilims_username=None, flexilims_password=None, + conflicts="abort", ): """Check if a mouse is already in the database and add it if it isn't @@ -162,6 +163,8 @@ def add_mouse( flexilims session is not provided flexilims_password (str): [optional] password for flexilims, used only if flexilims session is not provided + conflicts (str): `abort`, `skip`, `update` or `overwrite` (see update_entity for + detailed description) Returns (dict): flexilims reply @@ -175,8 +178,14 @@ def add_mouse( mice_df = get_entities(flexilims_session=flexilims_session, datatype="mouse") if mouse_name in mice_df.index: - print("Mouse already online") - return mice_df.loc[mouse_name] + if conflicts.lower() == "skip": + print("Mouse already online") + return mice_df.loc[mouse_name] + elif conflicts.lower() == "abort": + raise FlexilimsError("Mouse already online") + is_online = True + else: + is_online = False if mouse_info is None: mouse_info = {} @@ -222,12 +231,19 @@ def add_mouse( mouse_info["genealogy"] = [mouse_name] project_name = lookup_project(flexilims_session.project_id, PARAMETERS) mouse_info["path"] = str(Path(project_name) / mouse_name) - resp = flexilims_session.post( - datatype="mouse", - name=mouse_name, - attributes=mouse_info, - strict_validation=False, - ) + if is_online: + resp = update_entity(datatype='mouse', + name=mouse_name, + mode=conflicts, + attributes=mouse_info, + flexilims_session=flexilims_session) + else: + resp = flexilims_session.post( + datatype="mouse", + name=mouse_name, + attributes=mouse_info, + strict_validation=False, + ) return resp From 49637217749e17ba36852334afcfd4130092b00b Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 1 Feb 2024 11:06:40 +0000 Subject: [PATCH 44/73] [minor] improve error message --- CHANGELOG.md | 1 + flexiznam/mcms.py | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c979a8b..6611da1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ - Add `conflicts` argument to `add_mouse` to overwrite existing mice - `get_entities` does not raise warnings anymore if `name` is specified and `datatype` is not. This is now supported upstream by `flexilims` +- Clearer error message when mouse info cannot be found in MCMS ### Bugfixes diff --git a/flexiznam/mcms.py b/flexiznam/mcms.py index 2976375..24c23ee 100644 --- a/flexiznam/mcms.py +++ b/flexiznam/mcms.py @@ -1,8 +1,8 @@ import re import pandas as pd -from pymcms.main import McmsSession +from requests.exceptions import InvalidURL from flexiznam.config import PARAMETERS, get_password - +from pymcms.main import McmsSession def get_mouse_info(mouse_name, username, password=None): """Load mouse info from mcms in a dataframe @@ -18,7 +18,11 @@ def get_mouse_info(mouse_name, username, password=None): if password is None: password = get_password(username=username, app="mcms") mcms_sess = McmsSession(username=username, password=password) - original_data = mcms_sess.get_animal(name=mouse_name) + try: + original_data = mcms_sess.get_animal(name=mouse_name) + except InvalidURL: + raise InvalidURL(f"Mouse {mouse_name} not found under your PPL") + # convert to camel case for flexlilims mouse_data = {} pattern = re.compile(r"(? Date: Thu, 29 Feb 2024 19:12:02 +0000 Subject: [PATCH 45/73] [bugfix] issue in update_flexilims skip In skip mode the dataset name was wrong --- flexiznam/schema/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index 6ea04b9..548cd8a 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -414,7 +414,7 @@ def update_flexilims(self, mode="safe"): resp = flz.update_entity( datatype="dataset", id=self.id, - name=self.full_name, + name=self.dataset_name, origin_id=self.origin_id, mode=mode, attributes=attributes, From 4913451d5fc15912fa454f84db3e0f89e0c148d2 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 29 Feb 2024 19:12:40 +0000 Subject: [PATCH 46/73] [style] black --- flexiznam/main.py | 12 +++++++----- flexiznam/mcms.py | 1 + flexiznam/schema/visstim_data.py | 14 +++++++------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/flexiznam/main.py b/flexiznam/main.py index daac58c..e13bfdf 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -232,11 +232,13 @@ def add_mouse( project_name = lookup_project(flexilims_session.project_id, PARAMETERS) mouse_info["path"] = str(Path(project_name) / mouse_name) if is_online: - resp = update_entity(datatype='mouse', - name=mouse_name, - mode=conflicts, - attributes=mouse_info, - flexilims_session=flexilims_session) + resp = update_entity( + datatype="mouse", + name=mouse_name, + mode=conflicts, + attributes=mouse_info, + flexilims_session=flexilims_session, + ) else: resp = flexilims_session.post( datatype="mouse", diff --git a/flexiznam/mcms.py b/flexiznam/mcms.py index 24c23ee..8e22bce 100644 --- a/flexiznam/mcms.py +++ b/flexiznam/mcms.py @@ -4,6 +4,7 @@ from flexiznam.config import PARAMETERS, get_password from pymcms.main import McmsSession + def get_mouse_info(mouse_name, username, password=None): """Load mouse info from mcms in a dataframe diff --git a/flexiznam/schema/visstim_data.py b/flexiznam/schema/visstim_data.py index d7084f4..b3a179f 100644 --- a/flexiznam/schema/visstim_data.py +++ b/flexiznam/schema/visstim_data.py @@ -21,7 +21,7 @@ def from_folder( ): """Create a visual stimulation dataset by loading info from folder - A visual stimulation dataset is a folder containing at least a `FrameLog.csv` + A visual stimulation dataset is a folder containing at least a `FrameLog.csv` file and any number of other associated csvs. Args: @@ -38,15 +38,15 @@ def from_folder( """ csv_files = list(pathlib.Path(folder).glob("*.csv")) - + fnames = [f.name for f in csv_files] - if 'framelog.csv' not in [f.lower() for f in fnames]: + if "framelog.csv" not in [f.lower() for f in fnames]: raise IOError("Cannot find FrameLog.csv file") - - log_file = [f for f in csv_files if f.name.lower() == 'framelog.csv'][0] + + log_file = [f for f in csv_files if f.name.lower() == "framelog.csv"][0] if verbose: print(f"Found FrameLog.csv file: {log_file}") - + if folder_genealogy is None: folder_genealogy = (pathlib.Path(folder).stem,) elif isinstance(folder_genealogy, list): @@ -136,4 +136,4 @@ def is_valid(self, return_reason=False): if not (self.path_full / file_path).exists(): msg = f"Missing file {file_path}" return msg if return_reason else False - return "" if return_reason else True \ No newline at end of file + return "" if return_reason else True From a67cbde1e24b48a37c213a3abdc5e9e870be3117 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 29 Feb 2024 19:21:40 +0000 Subject: [PATCH 47/73] [style] ruff to remove unused imports --- flexiznam/camp/sync_data.py | 8 +------- flexiznam/cli.py | 1 - flexiznam/gui/flexigui.py | 1 - flexiznam/main.py | 4 ++-- flexiznam/mcms.py | 2 +- flexiznam/schema/datasets.py | 2 -- flexiznam/schema/sequencing_data.py | 2 -- flexiznam/schema/visstim_data.py | 2 -- tests/test_2p.py | 1 - tests/test_barseq.py | 1 - tests/test_components/test_cli.py | 1 - tests/test_components/test_main.py | 2 +- tests/test_components/test_utils.py | 1 - tests/test_components/tests_schema/test_camera_data.py | 1 - tests/test_components/tests_schema/test_harp.py | 1 - .../test_components/tests_schema/test_microscopy_data.py | 1 - tests/test_components/tests_schema/test_scanimage_data.py | 1 - .../test_components/tests_schema/test_sequencing_data.py | 3 +-- tests/test_components/tests_schema/test_visstim.py | 1 - tests/tests_resources/data_for_testing.py | 3 +-- 20 files changed, 7 insertions(+), 32 deletions(-) diff --git a/flexiznam/camp/sync_data.py b/flexiznam/camp/sync_data.py index fd6139a..8b3aa3a 100644 --- a/flexiznam/camp/sync_data.py +++ b/flexiznam/camp/sync_data.py @@ -1,19 +1,13 @@ """File to handle acquisition yaml file and create datasets on flexilims""" -import os import pathlib from pathlib import Path, PurePosixPath import re -import copy import warnings import pandas as pd import yaml -from yaml.parser import ParserError import flexiznam as flz -from flexiznam.errors import SyncYmlError, FlexilimsError from flexiznam.schema import Dataset -from flexiznam.config import PARAMETERS -from flexiznam.utils import clean_recursively def create_yaml(folder_to_parse, project, origin_name, output_file, overwrite=False): @@ -526,7 +520,7 @@ def _check_recursively( print(f"Fixing genealogy for {child}") child_dict["genealogy"] = origin_genealogy + child_genealogy else: - child_dict["GENEALOGY_ERROR"] = f"XXERRORXX genealogy is not correct" + child_dict["GENEALOGY_ERROR"] = "XXERRORXX genealogy is not correct" errors[fname] = child_dict if "children" in child_dict: _check_recursively( diff --git a/flexiznam/cli.py b/flexiznam/cli.py index ae0bf53..61e97d7 100644 --- a/flexiznam/cli.py +++ b/flexiznam/cli.py @@ -319,7 +319,6 @@ def check_flexilims_issues( """ from flexiznam.main import get_flexilims_session from flexiznam import utils - import pathlib import pandas as pd flexilims_session = get_flexilims_session( diff --git a/flexiznam/gui/flexigui.py b/flexiznam/gui/flexigui.py index bad2b49..eb9b3a8 100644 --- a/flexiznam/gui/flexigui.py +++ b/flexiznam/gui/flexigui.py @@ -1,6 +1,5 @@ import os import tkinter as tk -from tkinter import ttk from ttkwidgets import CheckboxTreeview import yaml from pathlib import Path diff --git a/flexiznam/main.py b/flexiznam/main.py index e13bfdf..1530d98 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -9,8 +9,8 @@ import flexiznam import yaml from flexiznam import mcms -from flexiznam.config import PARAMETERS, get_password, add_password -from flexiznam.errors import NameNotUniqueError, FlexilimsError, ConfigurationError +from flexiznam.config import PARAMETERS, get_password +from flexiznam.errors import NameNotUniqueError, FlexilimsError warnings.simplefilter("always", DeprecationWarning) diff --git a/flexiznam/mcms.py b/flexiznam/mcms.py index 8e22bce..8d756b7 100644 --- a/flexiznam/mcms.py +++ b/flexiznam/mcms.py @@ -1,7 +1,7 @@ import re import pandas as pd from requests.exceptions import InvalidURL -from flexiznam.config import PARAMETERS, get_password +from flexiznam.config import get_password from pymcms.main import McmsSession diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index 548cd8a..3f08709 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -1,9 +1,7 @@ import pathlib from datetime import datetime from pathlib import Path, PurePosixPath -import numpy as np import pandas as pd -from flexilims.utils import check_flexilims_validity import flexiznam as flz from flexiznam import utils from flexiznam.errors import FlexilimsError, DatasetError diff --git a/flexiznam/schema/sequencing_data.py b/flexiznam/schema/sequencing_data.py index 0e6f972..ca453b4 100644 --- a/flexiznam/schema/sequencing_data.py +++ b/flexiznam/schema/sequencing_data.py @@ -1,7 +1,5 @@ import datetime -import os import pathlib -import re import warnings from flexiznam.schema.datasets import Dataset diff --git a/flexiznam/schema/visstim_data.py b/flexiznam/schema/visstim_data.py index b3a179f..fe0ff8e 100644 --- a/flexiznam/schema/visstim_data.py +++ b/flexiznam/schema/visstim_data.py @@ -1,7 +1,5 @@ import datetime -import os import pathlib -import re from flexiznam.schema.datasets import Dataset diff --git a/tests/test_2p.py b/tests/test_2p.py index bcf476c..01dbd1d 100644 --- a/tests/test_2p.py +++ b/tests/test_2p.py @@ -20,7 +20,6 @@ TEST_PROJECT, ) import flexiznam as fzn -from flexiznam import camp MOUSE = "mouse_physio_2p" SESSION = "S20211102" diff --git a/tests/test_barseq.py b/tests/test_barseq.py index 7863fca..fd34425 100644 --- a/tests/test_barseq.py +++ b/tests/test_barseq.py @@ -17,7 +17,6 @@ TEST_PROJECT, ) import flexiznam as fzn -from flexiznam import camp MOUSE = "mouse_barseq" YAML = "yaml_automatic_skeleton.yml" diff --git a/tests/test_components/test_cli.py b/tests/test_components/test_cli.py index 2dc0ccb..a1e5f36 100644 --- a/tests/test_components/test_cli.py +++ b/tests/test_components/test_cli.py @@ -1,4 +1,3 @@ -import pytest import pathlib import yaml from click.testing import CliRunner diff --git a/tests/test_components/test_main.py b/tests/test_components/test_main.py index 252a588..949049e 100644 --- a/tests/test_components/test_main.py +++ b/tests/test_components/test_main.py @@ -6,7 +6,7 @@ import pytest import flexiznam as flz import yaml -from flexiznam.config import PARAMETERS, get_password +from flexiznam.config import PARAMETERS from flexiznam.errors import FlexilimsError, NameNotUniqueError from tests.tests_resources.data_for_testing import MOUSE_ID, SESSION diff --git a/tests/test_components/test_utils.py b/tests/test_components/test_utils.py index 9384073..1a37254 100644 --- a/tests/test_components/test_utils.py +++ b/tests/test_components/test_utils.py @@ -2,7 +2,6 @@ import pytest import numpy as np from pathlib import Path -import pandas as pd import tempfile from flexiznam.config import config_tools, DEFAULT_CONFIG from flexiznam import utils diff --git a/tests/test_components/tests_schema/test_camera_data.py b/tests/test_components/tests_schema/test_camera_data.py index 2c3a3d8..c691996 100644 --- a/tests/test_components/tests_schema/test_camera_data.py +++ b/tests/test_components/tests_schema/test_camera_data.py @@ -1,4 +1,3 @@ -import pytest from flexiznam.schema.camera_data import CameraData from flexiznam.schema.datasets import Dataset from tests.tests_resources.data_for_testing import DATA_ROOT, TEST_PROJECT diff --git a/tests/test_components/tests_schema/test_harp.py b/tests/test_components/tests_schema/test_harp.py index e31679f..2a0574a 100644 --- a/tests/test_components/tests_schema/test_harp.py +++ b/tests/test_components/tests_schema/test_harp.py @@ -1,4 +1,3 @@ -import pytest from flexiznam.schema.harp_data import HarpData from tests.tests_resources.data_for_testing import DATA_ROOT diff --git a/tests/test_components/tests_schema/test_microscopy_data.py b/tests/test_components/tests_schema/test_microscopy_data.py index 2eeb05d..4f3a6b1 100644 --- a/tests/test_components/tests_schema/test_microscopy_data.py +++ b/tests/test_components/tests_schema/test_microscopy_data.py @@ -1,4 +1,3 @@ -import pytest from flexiznam.schema.microscopy_data import MicroscopyData from tests.tests_resources.data_for_testing import DATA_ROOT diff --git a/tests/test_components/tests_schema/test_scanimage_data.py b/tests/test_components/tests_schema/test_scanimage_data.py index 675190a..eb07c24 100644 --- a/tests/test_components/tests_schema/test_scanimage_data.py +++ b/tests/test_components/tests_schema/test_scanimage_data.py @@ -1,4 +1,3 @@ -import pytest from flexiznam.schema.scanimage_data import ScanimageData from tests.tests_resources.data_for_testing import DATA_ROOT diff --git a/tests/test_components/tests_schema/test_sequencing_data.py b/tests/test_components/tests_schema/test_sequencing_data.py index e066926..d129f59 100644 --- a/tests/test_components/tests_schema/test_sequencing_data.py +++ b/tests/test_components/tests_schema/test_sequencing_data.py @@ -1,6 +1,5 @@ -import pytest from flexiznam.schema.sequencing_data import SequencingData -from tests.tests_resources.data_for_testing import DATA_ROOT, PROJECT_ID +from tests.tests_resources.data_for_testing import DATA_ROOT # Test creation of all dataset types. # diff --git a/tests/test_components/tests_schema/test_visstim.py b/tests/test_components/tests_schema/test_visstim.py index f5d6507..97b3f4a 100644 --- a/tests/test_components/tests_schema/test_visstim.py +++ b/tests/test_components/tests_schema/test_visstim.py @@ -1,4 +1,3 @@ -import pytest from flexiznam.schema.visstim_data import VisStimData from tests.tests_resources.data_for_testing import DATA_ROOT diff --git a/tests/tests_resources/data_for_testing.py b/tests/tests_resources/data_for_testing.py index dd6bb0a..d43a6ce 100644 --- a/tests/tests_resources/data_for_testing.py +++ b/tests/tests_resources/data_for_testing.py @@ -1,11 +1,10 @@ """A list of file coming from one experiment""" from pathlib import Path -import datetime from flexiznam.config import PARAMETERS MOUSE_ID = "6437dcb13ded9c65df142a12" # actual physio2p mouse -MOUSE_TEMP = "647a1aec7ddb34517470d3e6" # some random mouse where I can change data +MOUSE_TEMP = "647a1aec7ddb34517470d3e6" # some random mouse where I can change data TEST_PROJECT = "demo_project" PROJECT_ID = "610989f9a651ff0b6237e0f6" SESSION = "mouse_physio_2p_S20211102" From bd55ca347bff2d5a6acf0fa3a24de34800d4e95e Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 29 Feb 2024 19:41:57 +0000 Subject: [PATCH 48/73] [bugfix] bad dataset name when uploading with skip --- flexiznam/schema/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index 3f08709..8f4839a 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -434,7 +434,7 @@ def update_flexilims(self, mode="safe"): path=str(PurePosixPath(self.path)), is_raw="yes" if self.is_raw else "no", project_id=self.project_id, - dataset_name=self.full_name, + dataset_name=self.dataset_name, attributes=attributes, flexilims_session=self.flexilims_session, conflicts="abort", From e0c464674c333e9f902e54ffcdd5244b4456bebb Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 1 Mar 2024 16:27:44 +0000 Subject: [PATCH 49/73] Revert "[bugfix] issue in update_flexilims skip" This reverts commit e2d251ad3e78b6eef783b361770dec60e976dc21. --- flexiznam/schema/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index 8f4839a..33eb04c 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -412,7 +412,7 @@ def update_flexilims(self, mode="safe"): resp = flz.update_entity( datatype="dataset", id=self.id, - name=self.dataset_name, + name=self.full_name, origin_id=self.origin_id, mode=mode, attributes=attributes, From ad727fb9d7b05dd58c2177de94fba8920c7890a4 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Mon, 4 Mar 2024 17:08:13 +0000 Subject: [PATCH 50/73] [minor] option to print where config comes from --- flexiznam/config/config_tools.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/flexiznam/config/config_tools.py b/flexiznam/config/config_tools.py index bf2d373..1234d57 100644 --- a/flexiznam/config/config_tools.py +++ b/flexiznam/config/config_tools.py @@ -53,12 +53,24 @@ def _find_file(file_name, config_folder=None, create_if_missing=False): raise ConfigurationError("Cannot find %s" % file_name) -def load_param(param_folder=None, config_file="config.yml"): - """Read parameter file from config folder""" +def load_param(param_folder=None, config_file="config.yml", verbose=False): + """Read parameter file from config folder + + Args: + param_folder (str, optional): folder to look for the file. Defaults to None. + config_file (str, optional): name of the file to find. Defaults to "config.yml". + verbose (bool, optional): if True, print the path of the file being read. + Defaults to False. + + Returns: + dict: parameters read from the file + """ if param_folder is None: param_file = _find_file(config_file) else: param_file = Path(param_folder) / config_file + if verbose: + print(f"Reading parameters from {param_file}") with open(param_file, "r") as yml_file: prm = yaml.safe_load(yml_file) return prm From 53e2d8525d282d70260b396dcd7908235dfe7a4e Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Mon, 11 Mar 2024 07:45:14 +0000 Subject: [PATCH 51/73] [minor] describe change in changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6611da1..c3cf7c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ - `get_entities` does not raise warnings anymore if `name` is specified and `datatype` is not. This is now supported upstream by `flexilims` - Clearer error message when mouse info cannot be found in MCMS +- `load_param` can print the file used to read config with the `verbose` flag. ### Bugfixes From 5b3248a41f0b20e031beb2695108db80aa2e7025 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Wed, 1 May 2024 13:19:33 +0100 Subject: [PATCH 52/73] [style] run precommit --- .github/workflows/tests.yml | 2 +- .pre-commit-config.yaml | 28 +++++ .vscode/settings.json | 2 +- CHANGELOG.md | 52 +++++----- README.md | 4 +- docs/make.bat | 70 ++++++------- docs/source/conf.py | 136 ++++++++++++------------- docs/source/flexiznam.camp.rst | 2 +- flexiznam/config/config_tools.py | 8 +- flexiznam/gui/azure.tcl | 4 +- notebooks/01-Setup.ipynb | 11 +- notebooks/02-Add Data.ipynb | 7 +- notebooks/03-Using the database.ipynb | 7 +- requirements.txt | 14 +-- temp.py | 14 +++ tests/ReadMe.md | 24 ++--- tests/test-results/pytest_in_tests.xml | 2 +- 17 files changed, 209 insertions(+), 178 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 temp.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7e74ab0..0fc460c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,7 +13,7 @@ jobs: build: runs-on: ubuntu-latest - environment: + environment: name: testing strategy: matrix: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..b9d55f8 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,28 @@ +exclude: 'conf.py' + +# Configuring https://pre-commit.ci/ +ci: + autoupdate_schedule: monthly + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-docstring-first + - id: check-executables-have-shebangs + - id: check-merge-conflict + - id: check-toml + - id: end-of-file-fixer + - id: mixed-line-ending + args: [--fix=lf] + - id: requirements-txt-fixer + - id: trailing-whitespace + - repo: https://github.com/psf/black + rev: 23.3.0 + hooks: + - id: black + - repo: https://github.com/kynan/nbstripout + rev: 0.6.1 + hooks: + - id: nbstripout + args: [--extra-keys=metadata.language_info.version metadata.kernelspec.name metadata.kernelspec.display_name] diff --git a/.vscode/settings.json b/.vscode/settings.json index 06d557d..1c98995 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -10,4 +10,4 @@ ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, -} \ No newline at end of file +} diff --git a/CHANGELOG.md b/CHANGELOG.md index f3e82cb..d312328 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ ### Minor - `add_mouse` uploads birth and death dates in a human readable format instead. - Add `conflicts` argument to `add_mouse` to overwrite existing mice -- `get_entities` does not raise warnings anymore if `name` is specified and `datatype` +- `get_entities` does not raise warnings anymore if `name` is specified and `datatype` is not. This is now supported upstream by `flexilims` - Clearer error message when mouse info cannot be found in MCMS - `load_param` can print the file used to read config with the `verbose` flag. @@ -35,13 +35,13 @@ config but not the local config) to the config file - Add `get_data_root` function to get `raw` or `processed` root for a project - `get_children` can filter children by attributes before returning results -- refactor `get_datasets` to be non recursive and add filtering options. Also add +- refactor `get_datasets` to be non recursive and add filtering options. Also add multiple options to filter datasets and format output - add `get_datasets_recursively` to get all datasets below a given entity ### Bugfixes -- return empty dataframe if `filter` in `get_children` filters out everything (instead +- return empty dataframe if `filter` in `get_children` filters out everything (instead of crashing) - `update_flexilims` correctly uploads tuples parameters - `update_flexilims` correctly uploads floats and np.float/np.int parameters @@ -52,9 +52,9 @@ config but not the local config) to the config file ### Minor - `harp_dataset.from_folder` will now match csv even if there is nothing before or after - `harpmessage` in the file name (i.e. the file is `harpmessage.bin`, and all csvs in + `harpmessage` in the file name (i.e. the file is `harpmessage.bin`, and all csvs in the folder will be matched) -- private function `config_tools._find_files` has now a `create_if_missing` argument to +- private function `config_tools._find_files` has now a `create_if_missing` argument to create the file if it does not exist ## v0.3.7 @@ -72,24 +72,24 @@ config but not the local config) to the config file ## v0.3.5 ### Main changes -- `flz.get_datasets` can return `Dataset` objects instead of path strings if +- `flz.get_datasets` can return `Dataset` objects instead of path strings if `return_paths=False` - New `OnixData` class to handle Onix data - `get_flexilims_session` can now re-use token from a previous session - Add a GUI module. ### Minor -- More generic `clean_recursively` replaces the `clean_dictionary_recursively`. It +- More generic `clean_recursively` replaces the `clean_dictionary_recursively`. It handle more complex nesting and replaces non finite float by their string repr. - `CameraDataset` metadata can also be `.yml`, not only `.txt`. -- `Dataset.format(mode='yaml')` ensure yaml compatibility. (path to str, tuple to list, +- `Dataset.format(mode='yaml')` ensure yaml compatibility. (path to str, tuple to list, etc...) - `add_experimental_session` can be done with `parent_id` (or `parent_name`). - `add_dataset` can add a dataset to a mouse and does not require genealogy. ### Bugfixes -- Fix [#68](https://github.com/znamlab/flexiznam/issues/68). Dataset.format returns +- Fix [#68](https://github.com/znamlab/flexiznam/issues/68). Dataset.format returns always the path in posix format. - Fix [#88](https://github.com/znamlab/flexiznam/issues/88). Now make attributes JSON compatible before uploading to flexilims. This will replace special characters in @@ -124,7 +124,7 @@ config but not the local config) to the config file ### Bugfixes - `add_genealogy` now works with scanimage datasets -- `HarpData` does not match csv if the file name is only `harpmessage.bin`. +- `HarpData` does not match csv if the file name is only `harpmessage.bin`. See issue #93 - Adapt `add_mouse` to new MCMS page layout - `config --update` adds fields that are new in the default config to the current config @@ -137,29 +137,29 @@ config but not the local config) to the config file ## v0.3.2 ### Main changes -- Add CLI function: `check_flexilims_issues` to check for ill-named entity and invalid +- Add CLI function: `check_flexilims_issues` to check for ill-named entity and invalid paths -- `update_config` now adds all project_ids to the default config (requires to have +- `update_config` now adds all project_ids to the default config (requires to have flexilims access) ### Breaking changes: - `add_dataset` requires the genealogy argument -- `from_folder` uses a `folder_genealogy` argument instead of the previous `mouse`, +- `from_folder` uses a `folder_genealogy` argument instead of the previous `mouse`, `session` and ` recording` arguments - `Dataset` creation requires `genealogy` instead of `name` -- `Dataset` has now a `Dataset.full_name` and `Dataset.short_name` property instead - of a `Dataset.name` +- `Dataset` has now a `Dataset.full_name` and `Dataset.short_name` property instead + of a `Dataset.name` ### Main changes -- `from_origin` has a new `base_name` property to allow multiple datasets of the same +- `from_origin` has a new `base_name` property to allow multiple datasets of the same `dataset_type` below the same origin. ### Minor -- `add_mouse` can be given a dictionary of info instead of reading them from MCMS (to +- `add_mouse` can be given a dictionary of info instead of reading them from MCMS (to allow for manual download) -- `add_experimental_session` uses parent path as base path. It means that parent must +- `add_experimental_session` uses parent path as base path. It means that parent must have a path -- `CameraData.from_folder` has an option to detect partial datasets (i.e. without +- `CameraData.from_folder` has an option to detect partial datasets (i.e. without timestamps or metadata) - Reduce default verbosity of some functions - `get_flexilims_sessions` can get a session without setting the project_id @@ -171,21 +171,21 @@ config but not the local config) to the config file ### Main changes - Compatible with flexilims v0.2. `None` and `''` can both be uploaded. -- Dataset.is_raw can be autodetermined from path. If this fails, it **must** be +- Dataset.is_raw can be autodetermined from path. If this fails, it **must** be manually set. -- New function and CLI entry: `create_yaml` to create the skeleton of a yaml before +- New function and CLI entry: `create_yaml` to create the skeleton of a yaml before parsing. - Extensions for microscopy datasets are now defined in the config file. - ScanImage datasets have a `stack_type` attribute, default to `calcium`. - Authorise `overwrite` when adding samples, sessions, recordings, or datasets. - Add `flz.utils.check_flexilims_path` to verify that defined paths actually exist. -- Add `flz.utils.check_flexilims_names` to verify that entity names start with their +- Add `flz.utils.check_flexilims_names` to verify that entity names start with their parent's name. -- Add `flz.utils.add_genealogy` to add a `genealogy` field to flexilims entries. This - field contains the list of parents ([mouse, session, recording] for instance) up to +- Add `flz.utils.add_genealogy` to add a `genealogy` field to flexilims entries. This + field contains the list of parents ([mouse, session, recording] for instance) up to the short name of the current entity -- Add `flz.utilis.add_missing_paths` to update flexilims to add `path` attribute to - non-dataset entities that have a genealogy defined. The path is set to `project / +- Add `flz.utilis.add_missing_paths` to update flexilims to add `path` attribute to + non-dataset entities that have a genealogy defined. The path is set to `project / Path(*genealogy)` if this folder exists in the processed or raw root directory. ### Bugfixes: diff --git a/README.md b/README.md index 40a70f1..a8f9121 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ To set up the flexilims and mcms integration, the config file must be edited. Fi flexiznam config ``` -This should create a `~/.flexiznam/config.yml` file. Edit it with your favorite text editor to change `flexilims_username`, `mcms_username` and, +This should create a `~/.flexiznam/config.yml` file. Edit it with your favorite text editor to change `flexilims_username`, `mcms_username` and, if neeed `data_root`. You can then add passwords to make it simpler by running (one by one): @@ -78,7 +78,7 @@ If you used `pip -e .` to install, updating can be done with: ``` cd flexiznam -git pull +git pull pip install -e . --upgrade flexiznam config --update ``` diff --git a/docs/make.bat b/docs/make.bat index 6247f7e..9534b01 100644 --- a/docs/make.bat +++ b/docs/make.bat @@ -1,35 +1,35 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/conf.py b/docs/source/conf.py index 3295c09..82d2a42 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,68 +1,68 @@ -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) - -import os -import sys - -sys.path.insert(0, os.path.abspath("../..")) - -# -- Project information ----------------------------------------------------- - -project = "flexiznam" -copyright = "2021, Antonin Blot, Petr Znamenskiy" -author = "Antonin Blot, Petr Znamenskiy" - - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.autosectionlabel", - "sphinx.ext.intersphinx", - "sphinx.ext.napoleon", - "sphinx.ext.viewcode", - "sphinx_click", -] - -intersphinx_mapping = { - "python": ("https://docs.python.org/3", None), - "pandas": ("https://pandas.pydata.org/docs/", None), -} - - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = "sphinx_rtd_theme" - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) + +import os +import sys + +sys.path.insert(0, os.path.abspath("../..")) + +# -- Project information ----------------------------------------------------- + +project = "flexiznam" +copyright = "2021, Antonin Blot, Petr Znamenskiy" +author = "Antonin Blot, Petr Znamenskiy" + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.autosectionlabel", + "sphinx.ext.intersphinx", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx_click", +] + +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "pandas": ("https://pandas.pydata.org/docs/", None), +} + + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_rtd_theme" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] diff --git a/docs/source/flexiznam.camp.rst b/docs/source/flexiznam.camp.rst index 3acc855..1cf4650 100644 --- a/docs/source/flexiznam.camp.rst +++ b/docs/source/flexiznam.camp.rst @@ -8,7 +8,7 @@ Module contents :members: :undoc-members: :show-inheritance: - + flexiznam.camp.sync\_data module -------------------------------- diff --git a/flexiznam/config/config_tools.py b/flexiznam/config/config_tools.py index 1234d57..660a796 100644 --- a/flexiznam/config/config_tools.py +++ b/flexiznam/config/config_tools.py @@ -55,13 +55,13 @@ def _find_file(file_name, config_folder=None, create_if_missing=False): def load_param(param_folder=None, config_file="config.yml", verbose=False): """Read parameter file from config folder - + Args: param_folder (str, optional): folder to look for the file. Defaults to None. config_file (str, optional): name of the file to find. Defaults to "config.yml". - verbose (bool, optional): if True, print the path of the file being read. + verbose (bool, optional): if True, print the path of the file being read. Defaults to False. - + Returns: dict: parameters read from the file """ @@ -167,7 +167,7 @@ def update_config( project_ids.update(kwargs["project_ids"]) kwargs["project_ids"] = project_ids all_ids = {} - for (pname, pid) in kwargs["project_ids"].items(): + for pname, pid in kwargs["project_ids"].items(): if pid in all_ids: warnings.warn(f"PIDs {pname} and {all_ids[pid]} have the same ID") all_ids[pid] = pname diff --git a/flexiznam/gui/azure.tcl b/flexiznam/gui/azure.tcl index 3e75502..fead545 100644 --- a/flexiznam/gui/azure.tcl +++ b/flexiznam/gui/azure.tcl @@ -17,7 +17,7 @@ proc set_theme {mode} { -selectfg "#ffffff" -selectbg "#007fff" } - + ttk::style configure . \ -background $colors(-bg) \ -foreground $colors(-fg) \ @@ -44,7 +44,7 @@ proc set_theme {mode} { option add *font [ttk::style lookup . -font] option add *Menu.selectcolor $colors(-fg) - + } elseif {$mode == "light"} { ttk::style theme use "azure-light" diff --git a/notebooks/01-Setup.ipynb b/notebooks/01-Setup.ipynb index ad0fdc0..f749f7d 100644 --- a/notebooks/01-Setup.ipynb +++ b/notebooks/01-Setup.ipynb @@ -139,9 +139,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "!cat ~/.flexiznam/secret_password.yml" @@ -172,9 +170,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "language": "python" }, "language_info": { "codemirror_mode": { @@ -185,8 +181,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" + "pygments_lexer": "ipython3" }, "toc": { "base_numbering": 1, diff --git a/notebooks/02-Add Data.ipynb b/notebooks/02-Add Data.ipynb index 2b8524f..3b9e85d 100644 --- a/notebooks/02-Add Data.ipynb +++ b/notebooks/02-Add Data.ipynb @@ -241,9 +241,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "language": "python" }, "language_info": { "codemirror_mode": { @@ -254,8 +252,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" + "pygments_lexer": "ipython3" }, "toc": { "base_numbering": 1, diff --git a/notebooks/03-Using the database.ipynb b/notebooks/03-Using the database.ipynb index d839c14..ad48e98 100644 --- a/notebooks/03-Using the database.ipynb +++ b/notebooks/03-Using the database.ipynb @@ -290,9 +290,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" + "language": "python" }, "language_info": { "codemirror_mode": { @@ -303,8 +301,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" + "pygments_lexer": "ipython3" }, "toc": { "base_numbering": 1, diff --git a/requirements.txt b/requirements.txt index 21b31d0..ad16b0d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,12 @@ -pytest -setuptools -pandas -webbot +black click git+ssh://git@github.com/znamlab/flexilims.git#egg=flexilims +pandas +pytest pyyaml -tifffile +setuptools sphinx -sphinx-rtd-theme sphinx-click -black +sphinx-rtd-theme +tifffile +webbot diff --git a/temp.py b/temp.py new file mode 100644 index 0000000..872bbce --- /dev/null +++ b/temp.py @@ -0,0 +1,14 @@ +from flexiznam.config import config_tools + +ymlfile = "/camp/home/blota/home/users/blota/temp/s20230605_valid.yml" +from flexiznam.camp import sync_data as sd + +o = sd.parse_yaml(ymlfile) + + +config_folder = None +fname = config_tools._find_file("config.yml", config_folder=config_folder) +prm = config_tools.load_param(param_folder=config_folder) +config_tools.update_config( + param_file="config.yml", config_folder=config_folder, add_all_projects=True, **prm +) diff --git a/tests/ReadMe.md b/tests/ReadMe.md index edaf613..10036bd 100644 --- a/tests/ReadMe.md +++ b/tests/ReadMe.md @@ -4,37 +4,37 @@ Tests are separated in two: -- Main use cases found in the main test folder +- Main use cases found in the main test folder - Test of individual components found in `test_components` - -The `test_components` should cover most of the code but are not user friendly. The + +The `test_components` should cover most of the code but are not user friendly. The main use cases are example scripts that could be use for a real experiment. ## Data -Example datasets are available in the +Example datasets are available in the raw data folder on camp `data/instruments/raw_data/projects/demo_project/`. A corresponding preprocessed folder is also used by tests. ## Notes: ### MCMS -To test the MCMS part, you need a graphical interface and a browser. It is also +To test the MCMS part, you need a graphical interface and a browser. It is also particularly slow. -To avoid having to run it every time, the tests are marked as slow and require the +To avoid having to run it every time, the tests are marked as slow and require the `--runslow` flag to be executed. This is False by default ### Flexilims -For interaction with flexilims, you need to be connected via the crick network -(vpn or from the crick). Neither is easily doable on github workflow. Furthermore -flexilims does not have an API to delete entries. You will have clean it manually +For interaction with flexilims, you need to be connected via the crick network +(vpn or from the crick). Neither is easily doable on github workflow. Furthermore +flexilims does not have an API to delete entries. You will have clean it manually before running the tests -To make things simpler, the tests requiring flexilims or mcms are marked as integration +To make things simpler, the tests requiring flexilims or mcms are marked as integration tests. They can be skipped by running `pytest -m "not integtest"`. -To test the upload to flexilims properly, you need to clear flexilims yourself -(as there is no API to delete stuff). There should be a flag `FLM_IS_WIPED` at +To test the upload to flexilims properly, you need to clear flexilims yourself +(as there is no API to delete stuff). There should be a flag `FLM_IS_WIPED` at the beginning of each test file. If set to `False` (default), then tests involving flexilims will run with `conflicts=skip`. diff --git a/tests/test-results/pytest_in_tests.xml b/tests/test-results/pytest_in_tests.xml index e3838f1..d3c28e6 100644 --- a/tests/test-results/pytest_in_tests.xml +++ b/tests/test-results/pytest_in_tests.xml @@ -1 +1 @@ - \ No newline at end of file + From d8056039ee3e3e90c76d8e8eb6659d18d3241920 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Tue, 19 Mar 2024 16:06:42 +0000 Subject: [PATCH 53/73] [feature] crash `update_entity` if reserved fields are used as attributes. --- CHANGELOG.md | 7 +++++++ flexiznam/main.py | 6 +++++- tests/test_components/test_main.py | 12 ++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d312328..50807b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,13 @@ is not. This is now supported upstream by `flexilims` - `update_config` actually adds the new fields (i.e. fields that are in the default config but not the local config) to the config file + +## v0.3.10 + +### Main changes + +- Make `update_entity` safer by crashing if reserved fields are used as attributes. + ## v0.3.9 ### Main changes diff --git a/flexiznam/main.py b/flexiznam/main.py index 6ee819f..6bf4c41 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -772,7 +772,11 @@ def update_entity( raise AttributeError("`mode` must be `overwrite` or `update`") if id is None: id = entity["id"] - + for attr in full_attributes: + if attr in entity: + raise FlexilimsError( + "Attribute `%s` is a flexilims reserved keyword" % attr + ) rep = flexilims_session.update_one( id=id, datatype=datatype, diff --git a/tests/test_components/test_main.py b/tests/test_components/test_main.py index 949049e..bbd4927 100644 --- a/tests/test_components/test_main.py +++ b/tests/test_components/test_main.py @@ -429,3 +429,15 @@ def test_update_entity(flm_sess): datatype="dataset", name=dataset_name, flexilims_session=flm_sess ) assert repr(new_entity) == repr(original_entity) + with pytest.raises(FlexilimsError) as err: + flz.update_entity( + "dataset", + name=dataset_name, + flexilims_session=flm_sess, + attributes={ + "path": "new/path", + "dataset_type": "scanimage", + "project": "random", + "createdBy": "BAD", + }, + ) From 7efd79fed5f627666bdc022d6dad1be41027a091 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Wed, 1 May 2024 13:46:28 +0100 Subject: [PATCH 54/73] [bugfix] add_mouse works with alive mice --- flexiznam/main.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/flexiznam/main.py b/flexiznam/main.py index 6bf4c41..da6576f 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -217,12 +217,13 @@ def add_mouse( # format birthdate for date_type in ["birth_date", "death_date"]: d = mcms_info[date_type] - d = datetime.datetime.fromisoformat(d) - # birthdate is at midnight or 23 depending on the time zone - if d.hour <= 12: - date = d.strftime("%Y-%m-%d") - else: - date = (d + datetime.timedelta(days=1)).strftime("%Y-%m-%d") + if d is not None: + d = datetime.datetime.fromisoformat(d) + # birthdate is at midnight or 23 depending on the time zone + if d.hour <= 12: + date = d.strftime("%Y-%m-%d") + else: + date = (d + datetime.timedelta(days=1)).strftime("%Y-%m-%d") mcms_info[date_type] = date # update mouse_info with mcms_info but prioritise mouse_info for conflicts mouse_info = dict(mcms_info, **mouse_info) From 0ecacf40713a05639dae0a32253355b293df62f6 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Wed, 1 May 2024 13:48:11 +0100 Subject: [PATCH 55/73] [v0.3.11] --- CHANGELOG.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50807b4..ec9328f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,13 @@ is not. This is now supported upstream by `flexilims` - `update_config` actually adds the new fields (i.e. fields that are in the default config but not the local config) to the config file +## v0.3.11 + +### Bugfixes + +- Fix bugs related to raw_data for projects not in main folder +- Add mouse works with alive animals + ## v0.3.10 @@ -75,6 +82,23 @@ config but not the local config) to the config file - `get_children` output is filtered to contain only relevant columns when `children_datatype` is not None ### Bugfixes +## v0.3.6 + +### Main changes + +- New `SequencingData` class to handle sequencing data +- Add a `conda_envs` field in the config file to use in conjuction with `znamutils` +- `get_children` can work with name or id (instead of id only) + +### Minor +- `add_mouse` uploads birth and death dates in a human readable format instead. +- `get_entities` does not raise warnings anymore if `name` is specified and `datatype` +is not. This is now supported upstream by `flexilims` + +### Bugfixes + +- `update_config` actually adds the new fields (i.e. fields that are in the default +config but not the local config) to the config file ## v0.3.5 From d0972e67a61ae5d5f0996f150e00baec584d1c35 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 30 May 2024 10:03:30 +0100 Subject: [PATCH 56/73] [minor] set enforce_dataset_types to False by default --- flexiznam/config/default_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexiznam/config/default_config.py b/flexiznam/config/default_config.py index 5e0d4ae..55c7e2b 100644 --- a/flexiznam/config/default_config.py +++ b/flexiznam/config/default_config.py @@ -30,7 +30,7 @@ # list of all datatypes datatypes=["mouse", "session", "recording", "dataset", "sample"], # should we limit the valid dataset types? - enforce_dataset_types=True, + enforce_dataset_types=False, # if we enforce, what is the list of valid dataset type dataset_types=[ "scanimage", From eeb1cc6d236a18e16d8f1d0a00e550f5749e3ccd Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 30 May 2024 10:10:17 +0100 Subject: [PATCH 57/73] [dev] add pre-commit config --- flexiznam/.pre-commit-config.yaml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 flexiznam/.pre-commit-config.yaml diff --git a/flexiznam/.pre-commit-config.yaml b/flexiznam/.pre-commit-config.yaml new file mode 100644 index 0000000..b9d55f8 --- /dev/null +++ b/flexiznam/.pre-commit-config.yaml @@ -0,0 +1,28 @@ +exclude: 'conf.py' + +# Configuring https://pre-commit.ci/ +ci: + autoupdate_schedule: monthly + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-docstring-first + - id: check-executables-have-shebangs + - id: check-merge-conflict + - id: check-toml + - id: end-of-file-fixer + - id: mixed-line-ending + args: [--fix=lf] + - id: requirements-txt-fixer + - id: trailing-whitespace + - repo: https://github.com/psf/black + rev: 23.3.0 + hooks: + - id: black + - repo: https://github.com/kynan/nbstripout + rev: 0.6.1 + hooks: + - id: nbstripout + args: [--extra-keys=metadata.language_info.version metadata.kernelspec.name metadata.kernelspec.display_name] From 86b93a40a7c5b799a9a7f7183fd283ce71d38f05 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 30 May 2024 10:12:47 +0100 Subject: [PATCH 58/73] [feature] extra_attributes in from_origin To be able to return the dataset with the same arguements if it exists or create a new one otherwise (with `append`) --- CHANGELOG.md | 2 ++ flexiznam/schema/datasets.py | 61 ++++++++++++++++++++++++++---------- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ec9328f..ff73ed2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ - Add a `conda_envs` field in the config file to use in conjuction with `znamutils` - `get_children` can work with name or id (instead of id only) - `check_flexilims_issues` can now add missing paths +- `Dataset.from_origin` has a new `extra_attributes` argument to match online datasets + with specific attributes only. ### Minor - `add_mouse` uploads birth and death dates in a human readable format instead. diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index 33eb04c..0f1c3fd 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -131,20 +131,22 @@ def from_origin( base_name=None, conflicts=None, flexilims_session=None, + extra_arguments=None, ): """Creates a dataset of a given type as a child of a parent entity Args: project (str): Name of the project or hexadecimal project_id origin_type (str): sample type of the origin - origin_id (str): hexadecimal ID of the origin. This or origin_name must be provided + origin_id (str): hexadecimal ID of the origin. This or origin_name must be + provided origin_name (str): name of the origin. This or origin_id must be provided - dataset_type (str): type of dataset to create. Must be defined in the config file + dataset_type (str): type of dataset to create. Must be defined in the config + file base_name (str): How is this dataset name? Use dataset_type if base_name is None (default) conflicts (str): What to do if a dataset of this type already exists - as a child of the parent entity? - + as a child of the parent entity? Behaviour modified by `extra_arguments` `append` Create a new dataset with a new name and path `abort` or None @@ -152,9 +154,13 @@ def from_origin( exit `skip` or `overwrite` Return a Dataset corresponding to the existing entry if there - is exactly one existing entry, otherwise through a + is exactly one existing entry, otherwise throw a :py:class:`flexiznam.errors.NameNotUniqueError` - flexilims_session (:py:class:`flexilims.Flexilims`): authentication session to connect to flexilims + flexilims_session (:py:class:`flexilims.Flexilims`): authentication session + to connect to flexilims + extra_arguments (dict): additional arguments. If provided, change the + `conflicts` behaviour to consider only datasets that have the exact + same extra_arguments. Returns: :py:class:`flexiznam.schema.datasets.Dataset`: a dataset object (WITHOUT updating flexilims) @@ -185,8 +191,25 @@ def from_origin( processed = processed[ [g[-1].startswith(base_name) for g in processed.genealogy] ] + + # If extra_arguments is provided, only consider datasets that have the exact + # same extra_arguments + if extra_arguments is not None: + valid_processed = [] + for _, proc in processed.iterrows(): + online = Dataset._format_series_to_kwargs(proc)["extra_attributes"] + differences = utils.compare_dictionaries_recursively( + utils.clean_recursively(extra_arguments), online + ) + if not differences: + valid_processed.append(proc) + else: + valid_processed = processed + already_processed = len(processed) > 0 - if (not already_processed) or (conflicts == "append"): + if (not already_processed) or ( + (not len(valid_processed)) and conflicts == "append" + ): dataset_root = "%s_%s" % (origin["name"], base_name) dataset_name = flz.generate_name( "dataset", @@ -208,21 +231,25 @@ def from_origin( flexilims_session=flexilims_session, ) else: + # There are some datasets of this type already online if (conflicts is None) or (conflicts == "abort"): raise flz.errors.DatasetError( f"Dataset(s) of type {dataset_type} already exist(s):" + f" {processed.loc[:, 'name']}" ) - elif conflicts == "skip" or conflicts == "overwrite": - if len(processed) == 1: - return Dataset.from_dataseries(dataseries=processed.iloc[0]) - else: - raise flz.errors.NameNotUniqueError( - "{} {} datasets with name starting by {} exists for {}, " - "which one to return?".format( - len(processed), dataset_type, base_name, origin["name"] - ) - ) + elif conflicts == "skip" and len(valid_processed) == 1: + # If skip, ensure extra_arguments are the same + return Dataset.from_dataseries(dataseries=valid_processed[0]) + elif conflicts == "overwrite" and len(processed) == 1: + # If overwrite, ensure there is only one dataset of this type as we + # won't be able to guess which one should be replaced + return Dataset.from_dataseries(dataseries=processed.iloc[0]) + else: + txt = f"{len(processed)} {dataset_type} datasets with name starting by" + txt += f" {base_name} exists for {origin['name']}" + if extra_arguments: + txt += f", {len(valid_processed)} matching extra_arguments" + raise flz.errors.NameNotUniqueError(txt) @staticmethod def _format_series_to_kwargs(flm_series): From 2544d35322202db4289f0393e1ad003bd5d0587f Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 30 May 2024 10:15:22 +0100 Subject: [PATCH 59/73] [minor] remove temp file that should not have been commited --- temp.py | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 temp.py diff --git a/temp.py b/temp.py deleted file mode 100644 index 872bbce..0000000 --- a/temp.py +++ /dev/null @@ -1,14 +0,0 @@ -from flexiznam.config import config_tools - -ymlfile = "/camp/home/blota/home/users/blota/temp/s20230605_valid.yml" -from flexiznam.camp import sync_data as sd - -o = sd.parse_yaml(ymlfile) - - -config_folder = None -fname = config_tools._find_file("config.yml", config_folder=config_folder) -prm = config_tools.load_param(param_folder=config_folder) -config_tools.update_config( - param_file="config.yml", config_folder=config_folder, add_all_projects=True, **prm -) From c84c123e62dd3c0247ce8655b216264ed1e8829a Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 30 May 2024 11:05:43 +0100 Subject: [PATCH 60/73] [bugfix] correct behaviour of from_origin with extra_attributes we need to still be able to append if we want --- flexiznam/schema/datasets.py | 111 +++++++++++++++++++++++++---------- 1 file changed, 79 insertions(+), 32 deletions(-) diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index 0f1c3fd..d677622 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -135,6 +135,18 @@ def from_origin( ): """Creates a dataset of a given type as a child of a parent entity + This function will create a dataset with a unique name based on the origin name + and the dataset type. If a dataset of this type already exists, the behaviour is + defined by the `conflicts` argument. If `extra_arguments` is provided, only + consider datasets that have the exact same extra_arguments when resolving + conflicts (see next paragraph for details). + + Conflicts can be resolved in the following ways: + - `abort`: raise an error if a dataset of this type already exists + + + + Args: project (str): Name of the project or hexadecimal project_id origin_type (str): sample type of the origin @@ -145,17 +157,8 @@ def from_origin( file base_name (str): How is this dataset name? Use dataset_type if base_name is None (default) - conflicts (str): What to do if a dataset of this type already exists - as a child of the parent entity? Behaviour modified by `extra_arguments` - `append` - Create a new dataset with a new name and path - `abort` or None - Through a :py:class:`flexiznam.errors.NameNotUniqueError` and - exit - `skip` or `overwrite` - Return a Dataset corresponding to the existing entry if there - is exactly one existing entry, otherwise throw a - :py:class:`flexiznam.errors.NameNotUniqueError` + conflicts (str): How to resolve conflicts? One of `abort`, `skip`, `append`, + `overwrite`. Default is `abort` flexilims_session (:py:class:`flexilims.Flexilims`): authentication session to connect to flexilims extra_arguments (dict): additional arguments. If provided, change the @@ -207,9 +210,16 @@ def from_origin( valid_processed = processed already_processed = len(processed) > 0 - if (not already_processed) or ( - (not len(valid_processed)) and conflicts == "append" + + def _create_new_ds( + origin, + base_name, + project, + flexilims_session, + dataset_type, + extra_attributes, ): + """Inner function to create a new dataset object""" dataset_root = "%s_%s" % (origin["name"], base_name) dataset_name = flz.generate_name( "dataset", @@ -229,27 +239,64 @@ def from_origin( project=project, origin_id=origin["id"], flexilims_session=flexilims_session, + extra_attributes=extra_attributes, ) - else: - # There are some datasets of this type already online - if (conflicts is None) or (conflicts == "abort"): - raise flz.errors.DatasetError( - f"Dataset(s) of type {dataset_type} already exist(s):" - + f" {processed.loc[:, 'name']}" - ) - elif conflicts == "skip" and len(valid_processed) == 1: - # If skip, ensure extra_arguments are the same - return Dataset.from_dataseries(dataseries=valid_processed[0]) - elif conflicts == "overwrite" and len(processed) == 1: - # If overwrite, ensure there is only one dataset of this type as we - # won't be able to guess which one should be replaced + + # CONFLICTS RESOLUTION + # There are no datasets, create one + if not already_processed: + return _create_new_ds( + origin, + base_name, + project, + flexilims_session, + dataset_type, + extra_arguments, + ) + # There are some datasets of this type already online and we abort + if (conflicts is None) or (conflicts == "abort"): + raise flz.errors.DatasetError( + f"Dataset(s) of type {dataset_type} already exist(s):" + + f" {processed.loc[:, 'name']}" + ) + # Three cases left: skip, append, overwrite + if conflicts == "overwrite": + # If overwrite, ensure there is only one dataset of this type as we + # won't be able to guess which one should be replaced + if len(processed) == 1: return Dataset.from_dataseries(dataseries=processed.iloc[0]) - else: - txt = f"{len(processed)} {dataset_type} datasets with name starting by" - txt += f" {base_name} exists for {origin['name']}" - if extra_arguments: - txt += f", {len(valid_processed)} matching extra_arguments" - raise flz.errors.NameNotUniqueError(txt) + raise flz.errors.NameNotUniqueError( + f"Multiple datasets of type {dataset_type} already exist(s):" + + f" {processed.loc[:, 'name']}" + ) + if conflicts == "skip": + # If skip and we have an exact match, return it + if len(valid_processed) == 1: + return Dataset.from_dataseries(dataseries=valid_processed[0]) + # If there is no match, create a new dataset + if len(valid_processed) == 0: + return _create_new_ds( + origin, + base_name, + project, + flexilims_session, + dataset_type, + extra_arguments, + ) + raise flz.errors.NameNotUniqueError( + f"Multiple datasets of type {dataset_type} already exist(s):" + + f" {processed.loc[:, 'name']}" + ) + if conflicts == "append": + # Create a new dataset + return _create_new_ds( + origin, + base_name, + project, + flexilims_session, + dataset_type, + extra_arguments, + ) @staticmethod def _format_series_to_kwargs(flm_series): From 4a2fe9e33959e48a383f8699d4cc8f34b609357e Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 30 May 2024 18:07:46 +0100 Subject: [PATCH 61/73] [bugfix] correct extra_attribute in from_origin - rename to match other places - correct beahviour with append, always append --- flexiznam/schema/datasets.py | 42 +++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index d677622..1b0e93b 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -131,20 +131,16 @@ def from_origin( base_name=None, conflicts=None, flexilims_session=None, - extra_arguments=None, + extra_attributes=None, + ignore_attributes=(), ): """Creates a dataset of a given type as a child of a parent entity This function will create a dataset with a unique name based on the origin name and the dataset type. If a dataset of this type already exists, the behaviour is - defined by the `conflicts` argument. If `extra_arguments` is provided, only - consider datasets that have the exact same extra_arguments when resolving - conflicts (see next paragraph for details). - - Conflicts can be resolved in the following ways: - - `abort`: raise an error if a dataset of this type already exists - - + defined by the `conflicts` argument. If `extra_attributes` is provided, only + consider datasets that have the exact same extra_attributes when resolving + conflicts. Args: @@ -161,9 +157,11 @@ def from_origin( `overwrite`. Default is `abort` flexilims_session (:py:class:`flexilims.Flexilims`): authentication session to connect to flexilims - extra_arguments (dict): additional arguments. If provided, change the + extra_attributes (dict): additional arguments. If provided, change the `conflicts` behaviour to consider only datasets that have the exact - same extra_arguments. + same extra_attributes. + ignore_attributes (list): list of arguments to ignore when comparing datasets + for conflicts resolution. Used only if `extra_attributes` is provided. Returns: :py:class:`flexiznam.schema.datasets.Dataset`: a dataset object (WITHOUT updating flexilims) @@ -195,15 +193,17 @@ def from_origin( [g[-1].startswith(base_name) for g in processed.genealogy] ] - # If extra_arguments is provided, only consider datasets that have the exact - # same extra_arguments - if extra_arguments is not None: + # If extra_attributes is provided, only consider datasets that have the exact + # same extra_attributes + if extra_attributes is not None: valid_processed = [] + to_compare = utils.clean_recursively( + extra_attributes.copy(), keys=ignore_attributes + ) for _, proc in processed.iterrows(): online = Dataset._format_series_to_kwargs(proc)["extra_attributes"] - differences = utils.compare_dictionaries_recursively( - utils.clean_recursively(extra_arguments), online - ) + online = utils.clean_recursively(online, keys=ignore_attributes) + differences = utils.compare_dictionaries_recursively(to_compare, online) if not differences: valid_processed.append(proc) else: @@ -251,7 +251,7 @@ def _create_new_ds( project, flexilims_session, dataset_type, - extra_arguments, + extra_attributes, ) # There are some datasets of this type already online and we abort if (conflicts is None) or (conflicts == "abort"): @@ -263,6 +263,8 @@ def _create_new_ds( if conflicts == "overwrite": # If overwrite, ensure there is only one dataset of this type as we # won't be able to guess which one should be replaced + if len(valid_processed) == 1: + return Dataset.from_dataseries(dataseries=valid_processed[0]) if len(processed) == 1: return Dataset.from_dataseries(dataseries=processed.iloc[0]) raise flz.errors.NameNotUniqueError( @@ -281,7 +283,7 @@ def _create_new_ds( project, flexilims_session, dataset_type, - extra_arguments, + extra_attributes, ) raise flz.errors.NameNotUniqueError( f"Multiple datasets of type {dataset_type} already exist(s):" @@ -295,7 +297,7 @@ def _create_new_ds( project, flexilims_session, dataset_type, - extra_arguments, + extra_attributes, ) @staticmethod From 6fcf2f284014372f413bdf441e3528328559be58 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Mon, 17 Jun 2024 11:09:53 +0100 Subject: [PATCH 62/73] [feature] add delete recursively --- flexiznam/main.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/flexiznam/main.py b/flexiznam/main.py index da6576f..7c3f43f 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -1317,3 +1317,33 @@ def format_results(results, return_list=False): if return_list: return results return pd.DataFrame(results) + + +def delete_recursively(source_id, flexilims_session, do_it=False): + """Delete an entity and all its children recursively + + Args: + source_id (str): hexadecimal ID of the entity to delete + flexilims_session (:py:class:`flexilims.Flexilims`): Flexylims session object + do_it (bool): if True, will actually delete the entities + + Returns: + list: hexadecimal IDs of the entities to delete + + """ + to_delete = [] + + def _get_children(parent_id): + children = get_children( + parent_id=parent_id, flexilims_session=flexilims_session + ) + for _, child in children.iterrows(): + to_delete.append(child["id"]) + if child["type"] != "dataset": + _get_children(child["id"]) + + _get_children(source_id) + if do_it: + for child_id in to_delete: + flexilims_session.delete(child_id) + return to_delete From 291087dd81be1c38a317a65ad873f621d783a4b4 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Mon, 17 Jun 2024 11:10:45 +0100 Subject: [PATCH 63/73] [doc] edit changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff73ed2..b5439f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - `check_flexilims_issues` can now add missing paths - `Dataset.from_origin` has a new `extra_attributes` argument to match online datasets with specific attributes only. +- `delete_recursively` can delete all children of an entity ### Minor - `add_mouse` uploads birth and death dates in a human readable format instead. From 3dbbde6c290de6d645093ab64e1ba7ac40888afc Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Tue, 18 Jun 2024 16:26:48 +0100 Subject: [PATCH 64/73] [bugfix] from_origin respects base_name It was looking for anything starting with base_name, improve by ensuring that base_name is followed by '_' Still risk of confusion between `poorly_chosen` nad `poorly_chosen_name` for instance --- flexiznam/schema/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index 1b0e93b..7148370 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -190,7 +190,7 @@ def from_origin( ) if len(processed): processed = processed[ - [g[-1].startswith(base_name) for g in processed.genealogy] + [g[-1].startswith(base_name + "_") for g in processed.genealogy] ] # If extra_attributes is provided, only consider datasets that have the exact From f23f47d398788ebc04dd66775b3da348ae2369bb Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Tue, 18 Jun 2024 16:27:25 +0100 Subject: [PATCH 65/73] [minor] verbose option for from_origin To display if a match was found or a new dataset created --- flexiznam/schema/datasets.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index 7148370..a636e2c 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -133,6 +133,7 @@ def from_origin( flexilims_session=None, extra_attributes=None, ignore_attributes=(), + verbose=False, ): """Creates a dataset of a given type as a child of a parent entity @@ -162,6 +163,7 @@ def from_origin( same extra_attributes. ignore_attributes (list): list of arguments to ignore when comparing datasets for conflicts resolution. Used only if `extra_attributes` is provided. + verbose (bool): print debug information Returns: :py:class:`flexiznam.schema.datasets.Dataset`: a dataset object (WITHOUT updating flexilims) @@ -245,6 +247,8 @@ def _create_new_ds( # CONFLICTS RESOLUTION # There are no datasets, create one if not already_processed: + if verbose: + print("No datasets of type %s found. Creating new" % dataset_type) return _create_new_ds( origin, base_name, @@ -264,8 +268,12 @@ def _create_new_ds( # If overwrite, ensure there is only one dataset of this type as we # won't be able to guess which one should be replaced if len(valid_processed) == 1: + if verbose: + print("Overwriting dataset %s" % valid_processed.iloc[0].name) return Dataset.from_dataseries(dataseries=valid_processed[0]) if len(processed) == 1: + if verbose: + print("Overwriting dataset %s" % processed.iloc[0].name) return Dataset.from_dataseries(dataseries=processed.iloc[0]) raise flz.errors.NameNotUniqueError( f"Multiple datasets of type {dataset_type} already exist(s):" @@ -274,9 +282,13 @@ def _create_new_ds( if conflicts == "skip": # If skip and we have an exact match, return it if len(valid_processed) == 1: + if verbose: + print("Skip. Returning dataset %s" % valid_processed.iloc[0].name) return Dataset.from_dataseries(dataseries=valid_processed[0]) # If there is no match, create a new dataset if len(valid_processed) == 0: + if verbose: + print("No matching dataset found. Creating new dataset") return _create_new_ds( origin, base_name, @@ -291,6 +303,8 @@ def _create_new_ds( ) if conflicts == "append": # Create a new dataset + if verbose: + print("Appending dataset") return _create_new_ds( origin, base_name, From 11cad5aafeabfaf7bd42421918adeeb3b34300cc Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 20 Jun 2024 15:49:51 +0100 Subject: [PATCH 66/73] [bugfix] verbose print had syntax error --- flexiznam/schema/datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index a636e2c..08026e9 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -269,7 +269,7 @@ def _create_new_ds( # won't be able to guess which one should be replaced if len(valid_processed) == 1: if verbose: - print("Overwriting dataset %s" % valid_processed.iloc[0].name) + print("Overwriting dataset %s" % valid_processed[0].name) return Dataset.from_dataseries(dataseries=valid_processed[0]) if len(processed) == 1: if verbose: @@ -283,7 +283,7 @@ def _create_new_ds( # If skip and we have an exact match, return it if len(valid_processed) == 1: if verbose: - print("Skip. Returning dataset %s" % valid_processed.iloc[0].name) + print("Skip. Returning dataset %s" % valid_processed[0].name) return Dataset.from_dataseries(dataseries=valid_processed[0]) # If there is no match, create a new dataset if len(valid_processed) == 0: From 5d5a303ec87428853ecf989981b16a49e9f65362 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Wed, 24 Jul 2024 16:59:59 +0100 Subject: [PATCH 67/73] [bugfix] from_origin overwrite extra_attributes it was not overwriting --- flexiznam/schema/datasets.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index 08026e9..51f343b 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -270,11 +270,15 @@ def _create_new_ds( if len(valid_processed) == 1: if verbose: print("Overwriting dataset %s" % valid_processed[0].name) - return Dataset.from_dataseries(dataseries=valid_processed[0]) + dataset = Dataset.from_dataseries(dataseries=valid_processed[0]) + dataset.extra_attributes = extra_attributes + return dataset if len(processed) == 1: if verbose: print("Overwriting dataset %s" % processed.iloc[0].name) - return Dataset.from_dataseries(dataseries=processed.iloc[0]) + dataset = Dataset.from_dataseries(dataseries=processed.iloc[0]) + dataset.extra_attributes = extra_attributes + return dataset raise flz.errors.NameNotUniqueError( f"Multiple datasets of type {dataset_type} already exist(s):" + f" {processed.loc[:, 'name']}" From 897fee8ae655b329b5c3bc56b02b5a845214209f Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 26 Jul 2024 11:03:58 +0100 Subject: [PATCH 68/73] [bugfix] delete_recursively deletes source --- flexiznam/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexiznam/main.py b/flexiznam/main.py index 7c3f43f..97f4f95 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -1331,7 +1331,7 @@ def delete_recursively(source_id, flexilims_session, do_it=False): list: hexadecimal IDs of the entities to delete """ - to_delete = [] + to_delete = [source_id] def _get_children(parent_id): children = get_children( From 3eaca8aa15ceef8ceb59c3351bbccbcc7b7d15c8 Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Mon, 5 Aug 2024 18:44:47 +0100 Subject: [PATCH 69/73] [bugfix] in from_origin with skip --- flexiznam/schema/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index 51f343b..eca983e 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -209,7 +209,7 @@ def from_origin( if not differences: valid_processed.append(proc) else: - valid_processed = processed + valid_processed = [ser for _, ser in processed.iterrows()] already_processed = len(processed) > 0 From e0d1f6067d047da291ab162643df9beba4672e6d Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Thu, 19 Sep 2024 09:17:09 +0100 Subject: [PATCH 70/73] [feature] option to get session in offline mode --- flexiznam/main.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/flexiznam/main.py b/flexiznam/main.py index 97f4f95..920b8fc 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -10,7 +10,7 @@ import yaml from flexiznam import mcms from flexiznam.config import PARAMETERS, get_password -from flexiznam.errors import NameNotUniqueError, FlexilimsError +from flexiznam.errors import NameNotUniqueError, FlexilimsError, ConfigurationError warnings.simplefilter("always", DeprecationWarning) @@ -93,6 +93,18 @@ def get_flexilims_session( Returns: :py:class:`flexilims.Flexilims`: Flexilims session object. """ + offline_mode = PARAMETERS.get("offline_mode", False) + if offline_mode: + yaml_file = PARAMETERS.get("offline_yaml", None) + if yaml_file is None: + raise ConfigurationError("offline_mode is set but offline_yaml is not") + yaml_file = Path(yaml_file) + if not yaml_file.exists(): + yaml_file = get_data_root("processed") / yaml_file + if not yaml_file.exists(): + raise ConfigurationError(f"offline_yaml file {yaml_file} not found") + flexilims_session = flm.OfflineFlexilims(yaml_file) + return flexilims_session if project_id is not None: project_id = _format_project(project_id, PARAMETERS) From 4e5590c696008f3e184c711d1225f83f75d8b8ba Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Fri, 20 Sep 2024 18:05:30 +0100 Subject: [PATCH 71/73] [feature] option to set offline mode outside of ops It is sometimes convenient to have both online and offline sessions at the same time. Do it by specifying arg in get_session --- flexiznam/main.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/flexiznam/main.py b/flexiznam/main.py index 920b8fc..4e0ff94 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -76,6 +76,7 @@ def get_flexilims_session( password=None, reuse_token=True, timeout=10, + offline_mode=None, ): """Open a new flexilims session by creating a new authentication token. @@ -88,12 +89,19 @@ def get_flexilims_session( read from the secrets file, or failing that triggers an input prompt. reuse_token (bool): (optional) if True, try to reuse an existing token timeout (int): (optional) timeout in seconds for the portalocker lock. Default - to 10. + to 10. + offline_mode (bool): (optional) if True, will use an offline session. In this + case, the `offline_yaml` parameter must be set in the config file. If + not provided, will look for the `offline_mode` parameter in the config + file. Default to None. + Returns: :py:class:`flexilims.Flexilims`: Flexilims session object. """ - offline_mode = PARAMETERS.get("offline_mode", False) + if offline_mode is None: + offline_mode = PARAMETERS.get("offline_mode", False) + if offline_mode: yaml_file = PARAMETERS.get("offline_yaml", None) if yaml_file is None: From 046c2ccb8be6cdfa30ccdf0f573b167ebb2f383a Mon Sep 17 00:00:00 2001 From: Antonin Blot Date: Mon, 23 Sep 2024 16:18:44 +0100 Subject: [PATCH 72/73] [minor] adapt to offline flexilims --- flexiznam/main.py | 18 ++++++++++-------- flexiznam/schema/datasets.py | 2 +- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/flexiznam/main.py b/flexiznam/main.py index 4e0ff94..6347b4e 100755 --- a/flexiznam/main.py +++ b/flexiznam/main.py @@ -94,30 +94,32 @@ def get_flexilims_session( case, the `offline_yaml` parameter must be set in the config file. If not provided, will look for the `offline_mode` parameter in the config file. Default to None. - + Returns: :py:class:`flexilims.Flexilims`: Flexilims session object. """ + + if project_id is not None: + project_id = _format_project(project_id, PARAMETERS) + else: + warnings.warn("Starting flexilims session without setting project_id.") + if offline_mode is None: offline_mode = PARAMETERS.get("offline_mode", False) - + if offline_mode: yaml_file = PARAMETERS.get("offline_yaml", None) if yaml_file is None: raise ConfigurationError("offline_mode is set but offline_yaml is not") yaml_file = Path(yaml_file) if not yaml_file.exists(): - yaml_file = get_data_root("processed") / yaml_file + yaml_file = get_data_root("processed", project=project_id) / yaml_file if not yaml_file.exists(): raise ConfigurationError(f"offline_yaml file {yaml_file} not found") - flexilims_session = flm.OfflineFlexilims(yaml_file) + flexilims_session = flm.OfflineFlexilims(yaml_file, project_id=project_id) return flexilims_session - if project_id is not None: - project_id = _format_project(project_id, PARAMETERS) - else: - warnings.warn("Starting flexilims session without setting project_id.") if username is None: username = PARAMETERS["flexilims_username"] if password is None: diff --git a/flexiznam/schema/datasets.py b/flexiznam/schema/datasets.py index eca983e..2da759e 100644 --- a/flexiznam/schema/datasets.py +++ b/flexiznam/schema/datasets.py @@ -690,7 +690,7 @@ def flexilims_session(self, value): self._flexilims_session = value if value is None: return - if hasattr(value, "project_id"): + if hasattr(value, "project_id") and (value.project_id is not None): if self.project_id is None: self.project_id = value.project_id elif self.project_id != value.project_id: From 465730c13a8ae2c2c233c72020520e8324a5dc31 Mon Sep 17 00:00:00 2001 From: Petr Znamenskiy Date: Thu, 26 Sep 2024 14:06:43 +0100 Subject: [PATCH 73/73] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b5439f6..b28132d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - `Dataset.from_origin` has a new `extra_attributes` argument to match online datasets with specific attributes only. - `delete_recursively` can delete all children of an entity +- Offline mode using downloaded copy of the database ### Minor - `add_mouse` uploads birth and death dates in a human readable format instead.