From 66527f00926cbb6ffb8ac501be840a161b949b38 Mon Sep 17 00:00:00 2001 From: sherwoodf <161822064+sherwoodf@users.noreply.github.com> Date: Mon, 22 Jul 2024 17:34:52 +0100 Subject: [PATCH] Model updates, and some conversion logic (#123) * model updates to standardise names further and to account for which fields will actually be generated endpoints * model updates and added specimen growth, preparation, and image acquisition conversion logic * added logic to generate annotation method objects * tidied up imports * created empty annotation file list to make tests pass * moved file reference conversion to it's own file, fixed imports of shared models, and fixed file_name -> file_path as per model change for file references * updated models and ingest code --- .../conversion/annotation_method.py | 71 ++ .../bia_ingest_sm/conversion/biosample.py | 22 +- .../experimental_imaging_dataset.py | 107 +-- .../conversion/file_reference.py | 64 ++ .../conversion/image_acquisition.py | 69 ++ .../conversion/specimen_growth_protocol.py | 61 ++ .../specimen_imaging_preparation_protocol.py | 64 ++ .../bia_ingest_sm/conversion/study.py | 10 +- .../bia_ingest_sm/conversion/utils.py | 16 +- .../test/data/S-BIADTEST.json | 16 + .../test/data/file_list_annotations_1.json | 2 + .../test/test_shared_models.py | 22 +- bia-ingest-shared-models/test/utils.py | 660 ++++++++---------- bia-shared-datamodels/.vscode/settings.json | 5 +- .../bia_shared_datamodels/bia_data_model.py | 95 +-- .../bia_shared_datamodels/semantic_models.py | 416 ++++++----- .../test/test_shared_models.py | 74 +- bia-shared-datamodels/test/utils.py | 194 +++-- 18 files changed, 1138 insertions(+), 830 deletions(-) create mode 100644 bia-ingest-shared-models/bia_ingest_sm/conversion/annotation_method.py create mode 100644 bia-ingest-shared-models/bia_ingest_sm/conversion/file_reference.py create mode 100644 bia-ingest-shared-models/bia_ingest_sm/conversion/image_acquisition.py create mode 100644 bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_growth_protocol.py create mode 100644 bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_imaging_preparation_protocol.py create mode 100644 bia-ingest-shared-models/test/data/file_list_annotations_1.json diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/annotation_method.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/annotation_method.py new file mode 100644 index 00000000..67628f65 --- /dev/null +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/annotation_method.py @@ -0,0 +1,71 @@ +import logging +from typing import List, Any, Dict +from .utils import ( + dicts_to_api_models, + find_sections_recursive, + dict_to_uuid, + persist +) +from ..biostudies import ( + Submission, + attributes_to_dict, +) +from bia_shared_datamodels import bia_data_model + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +def get_annotation_method( + submission: Submission, persist_artefacts=False +) -> List[bia_data_model.AnnotationMethod]: + + annotation_method_model_dicts = extract_annotation_method_dicts(submission) + annotation_methods = dicts_to_api_models(annotation_method_model_dicts, bia_data_model.AnnotationMethod) + + if persist_artefacts and annotation_methods: + persist(annotation_methods, "annotation_method", submission.accno) + + return annotation_methods + + +def extract_annotation_method_dicts(submission: Submission) -> List[Dict[str, Any]]: + annotation_sections = find_sections_recursive(submission.section, ["Annotations"], []) + + key_mapping = [ + ("title_id", "Name", ""), + ("protocol_description", "Annotation overview", ""), + ("annotation_criteria", "Annotation criteria", ""), + ("annotation_coverage", "Annotation coverage", ""), + ("method_type", "Annotation method", "other"), + ] + + model_dicts = [] + for section in annotation_sections: + attr_dict = attributes_to_dict(section.attributes) + + model_dict = {k: attr_dict.get(v, default) for k, v, default in key_mapping} + + # TODO: change template to get source dataset information + model_dict["source_dataset"] = [] + + model_dict["accno"] = section.__dict__.get("accno", "") + model_dict["accession_id"] = submission.accno + model_dict["uuid"] = generate_annotation_method_uuid(model_dict) + model_dicts.append(model_dict) + + return model_dicts + + +def generate_annotation_method_uuid(protocol_dict: Dict[str, Any]) -> str: + attributes_to_consider = [ + "accession_id", + "accno", + "title_id", + "protocol_description", + "annotation_criteria", + "annotation_coverage", + "method_type", + "source_dataset" + ] + return dict_to_uuid(protocol_dict, attributes_to_consider) diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/biosample.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/biosample.py index feb98dd4..b5de7a8b 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/biosample.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/biosample.py @@ -1,17 +1,16 @@ import logging -from pathlib import Path from typing import List, Any, Dict from .utils import ( dicts_to_api_models, find_sections_recursive, - dict_to_uuid + dict_to_uuid, + persist ) from ..biostudies import ( Submission, attributes_to_dict, ) -from ..config import settings -from src.bia_models import bia_data_model, semantic_models +from bia_shared_datamodels import bia_data_model, semantic_models logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @@ -25,14 +24,7 @@ def get_biosample( biosamples = dicts_to_api_models(biosample_model_dicts, bia_data_model.BioSample) if persist_artefacts and biosamples: - output_dir = Path(settings.bia_data_dir) / "biosamples" / submission.accno - if not output_dir.is_dir(): - output_dir.mkdir(parents=True) - logger.info(f"Created {output_dir}") - for biosample in biosamples: - output_path = output_dir / f"{biosample.uuid}.json" - output_path.write_text(biosample.model_dump_json(indent=2)) - logger.info(f"Written {output_path}") + persist(biosamples, "biosamples", submission.accno) return biosamples @@ -41,7 +33,7 @@ def extract_biosample_dicts(submission: Submission) -> List[Dict[str, Any]]: key_mapping = [ ("title_id", "Title", ""), - ("description", "Description", ""), + ("biological_entity_description", "Biological entity", ""), ("organism", "Organism", ""), ] @@ -93,9 +85,7 @@ def generate_biosample_uuid(biosample_dict: Dict[str, Any]) -> str: "accno", "title_id", "organism_classification", - "description", - # TODO: Discuss including below in semantic_models.BioSample - # "biological_entity", + "biological_entity_description", "intrinsic_variable_description", "extrinsic_variable_description", "experimental_variable_description", diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/experimental_imaging_dataset.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/experimental_imaging_dataset.py index f815d62f..356f792e 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/experimental_imaging_dataset.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/experimental_imaging_dataset.py @@ -1,23 +1,21 @@ import logging -from pathlib import Path -from typing import List, Dict +from typing import List, Dict, Any from .utils import ( find_sections_recursive, get_generic_section_as_list, dict_to_uuid, - get_generic_section_as_dict + get_generic_section_as_dict, + persist ) +from .file_reference import get_file_reference_by_study_component import bia_ingest_sm.conversion.biosample as biosample_conversion import bia_ingest_sm.conversion.study as study_conversion from ..biostudies import ( Submission, attributes_to_dict, - find_file_lists_in_submission, - flist_from_flist_fname, - file_uri, ) from ..config import settings -from src.bia_models import bia_data_model, semantic_models +from bia_shared_datamodels import bia_data_model, semantic_models logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @@ -58,9 +56,6 @@ def get_experimental_imaging_dataset( for section in study_components: attr_dict = attributes_to_dict(section.attributes) key_mapping = [ - ("biosample", "Biosample", None,), - ("specimen", "Specimen", None,), - ("image_acquisition", "Image acquisition", None,), ("image_analysis", "Image analysis", None,), ("image_correlation", "Image correlation", None,), ] @@ -69,11 +64,10 @@ def get_experimental_imaging_dataset( ) analysis_method_list = [] - biosample_list = [] - image_acquisition_method_list = [] correlation_method_list = [] - specimen_preparation_method_list = [] + biosample_list = [] + #TODO: move this to main CLI code to make object generation more independent if len(associations) > 0: # Image Analysis Method analysis_methods_from_associations = [ @@ -81,7 +75,7 @@ def get_experimental_imaging_dataset( ] for analysis_method in analysis_method_dict.values(): if ( - analysis_method.method_description + analysis_method.protocol_description in analysis_methods_from_associations ): analysis_method_list.append(analysis_method) @@ -92,44 +86,23 @@ def get_experimental_imaging_dataset( if biosample in biosamples_in_submission_uuid: biosample_list.extend(biosamples_in_submission_uuid[biosample]) + section_name = attr_dict["Name"] - study_component_file_references = file_reference_uuids.get(section_name, []) model_dict = { "title_id": section_name, - # "description": attr_dict["Description"], - "submitted_in_study": study_conversion.get_study_uuid(submission), - "file": study_component_file_references, - "image": [], - "specimen_preparation_method": specimen_preparation_method_list, - "acquisition_method": image_acquisition_method_list, - "biological_entity": biosample_list, + "description": attr_dict["Description"], + "submitted_in_study_uuid": study_conversion.get_study_uuid(submission), "analysis_method": analysis_method_list, "correlation_method": correlation_method_list, - "file_reference_count": len(study_component_file_references), - "image_count": 0, "example_image_uri": [], } - # TODO: Add 'description' to computation of uuid (Maybe accno?) - model_dict["uuid"] = dict_to_uuid( - model_dict, ["title_id", "submitted_in_study",] - ) + model_dict["uuid"] = generate_experimental_imaging_dataset_uuid(model_dict) experimental_imaging_dataset.append( bia_data_model.ExperimentalImagingDataset.model_validate(model_dict) ) if persist_artefacts and experimental_imaging_dataset: - output_dir = ( - Path(settings.bia_data_dir) - / "experimental_imaging_datasets" - / submission.accno - ) - if not output_dir.is_dir(): - output_dir.mkdir(parents=True) - logger.info(f"Created {output_dir}") - for dataset in experimental_imaging_dataset: - output_path = output_dir / f"{dataset.uuid}.json" - output_path.write_text(dataset.model_dump_json(indent=2)) - logger.info(f"Written {output_path}") + persist(experimental_imaging_dataset, "experimental_imaging_dataset", submission.accno) return experimental_imaging_dataset @@ -139,7 +112,7 @@ def get_image_analysis_method( ) -> Dict[str, semantic_models.ImageAnalysisMethod]: key_mapping = [ - ("method_description", "Title", None,), + ("protocol_description", "Title", None,), ("features_analysed", "Image analysis overview", None,), ] @@ -151,48 +124,10 @@ def get_image_analysis_method( ) -def get_file_reference_by_study_component( - submission: Submission, persist_artefacts: bool = False -) -> Dict[str, List[bia_data_model.FileReference]]: - """ - Return Dict of list of file references in study components. - """ - file_list_dicts = find_file_lists_in_submission(submission) - fileref_to_study_components = {} - - if persist_artefacts: - output_dir = Path(settings.bia_data_dir) / "file_references" / submission.accno - if not output_dir.is_dir(): - output_dir.mkdir(parents=True) - logger.info(f"Created {output_dir}") - - for file_list_dict in file_list_dicts: - study_component_name = file_list_dict["Name"] - if study_component_name not in fileref_to_study_components: - fileref_to_study_components[study_component_name] = [] - - fname = file_list_dict["File List"] - files_in_fl = flist_from_flist_fname(submission.accno, fname) - for f in files_in_fl: - file_dict = { - "accession_id": submission.accno, - "file_name": str(f.path), - "size_in_bytes": str(f.size), - } - fileref_uuid = dict_to_uuid( - file_dict, ["accession_id", "file_name", "size_in_bytes"] - ) - fileref_to_study_components[study_component_name].append(fileref_uuid) - # TODO - Not storing submission_dataset uuid yet!!! - if persist_artefacts: - file_dict["uuid"] = fileref_uuid - file_dict["uri"] = file_uri(submission.accno, f) - file_dict["submission_dataset"] = fileref_uuid - file_dict["format"] = f.type - file_dict["attribute"] = attributes_to_dict(f.attributes) - file_reference = bia_data_model.FileReference.model_validate(file_dict) - output_path = output_dir / f"{fileref_uuid}.json" - output_path.write_text(file_reference.model_dump_json(indent=2)) - logger.info(f"Written {output_path}") - - return fileref_to_study_components +def generate_experimental_imaging_dataset_uuid(experimental_imaging_dataset_dict: Dict[str, Any]) -> str: + # TODO: Add 'description' to computation of uuid (Maybe accno?) + attributes_to_consider = [ + "title_id", + "submitted_in_study_uuid", + ] + return dict_to_uuid(experimental_imaging_dataset_dict, attributes_to_consider) \ No newline at end of file diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/file_reference.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/file_reference.py new file mode 100644 index 00000000..d1b921ef --- /dev/null +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/file_reference.py @@ -0,0 +1,64 @@ +import logging +from pathlib import Path +from typing import List, Dict +from .utils import ( + dict_to_uuid, +) +from ..biostudies import ( + Submission, + attributes_to_dict, + find_file_lists_in_submission, + flist_from_flist_fname, + file_uri, +) +from ..config import settings +from bia_shared_datamodels import bia_data_model + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + +def get_file_reference_by_study_component( + submission: Submission, persist_artefacts: bool = False +) -> Dict[str, List[bia_data_model.FileReference]]: + """ + Return Dict of list of file references in study components. + """ + file_list_dicts = find_file_lists_in_submission(submission) + fileref_to_study_components = {} + + if persist_artefacts: + output_dir = Path(settings.bia_data_dir) / "file_references" / submission.accno + if not output_dir.is_dir(): + output_dir.mkdir(parents=True) + logger.info(f"Created {output_dir}") + + for file_list_dict in file_list_dicts: + study_component_name = file_list_dict["Name"] + if study_component_name not in fileref_to_study_components: + fileref_to_study_components[study_component_name] = [] + + fname = file_list_dict["File List"] + files_in_fl = flist_from_flist_fname(submission.accno, fname) + for f in files_in_fl: + file_dict = { + "accession_id": submission.accno, + "file_path": str(f.path), + "size_in_bytes": str(f.size), + } + fileref_uuid = dict_to_uuid( + file_dict, ["accession_id", "file_path", "size_in_bytes"] + ) + fileref_to_study_components[study_component_name].append(fileref_uuid) + # TODO - Not storing submission_dataset uuid yet!!! + if persist_artefacts: + file_dict["uuid"] = fileref_uuid + file_dict["uri"] = file_uri(submission.accno, f) + file_dict["submission_dataset"] = fileref_uuid + file_dict["format"] = f.type + file_dict["attribute"] = attributes_to_dict(f.attributes) + file_reference = bia_data_model.FileReference.model_validate(file_dict) + output_path = output_dir / f"{fileref_uuid}.json" + output_path.write_text(file_reference.model_dump_json(indent=2)) + logger.info(f"Written {output_path}") + + return fileref_to_study_components diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/image_acquisition.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/image_acquisition.py new file mode 100644 index 00000000..c78d2f9a --- /dev/null +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/image_acquisition.py @@ -0,0 +1,69 @@ +import logging +from typing import List, Any, Dict +from .utils import ( + dicts_to_api_models, + find_sections_recursive, + dict_to_uuid, + persist +) +from ..biostudies import ( + Submission, + attributes_to_dict, +) +from bia_shared_datamodels import bia_data_model + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +def get_image_acquisition( + submission: Submission, persist_artefacts=False +) -> List[bia_data_model.ImageAcquisition]: + + image_acquisition_model_dicts = extract_image_acquisition_dicts(submission) + image_acquisitions = dicts_to_api_models(image_acquisition_model_dicts, bia_data_model.ImageAcquisition) + + if persist_artefacts and image_acquisitions: + persist(image_acquisitions, "specimen_growth_protocol", submission.accno) + + return image_acquisitions + + +def extract_image_acquisition_dicts(submission: Submission) -> List[Dict[str, Any]]: + acquisition_sections = find_sections_recursive(submission.section, ["Image acquisition"], []) + + key_mapping = [ + ("title_id", "Title", ""), + ("protocol_description", "Image acquisition parameters", ""), + ("imaging_instrument_description", "Imaging instrument", ""), + ("imaging_method_name", "Imaging method", ""), + ] + + model_dicts = [] + for section in acquisition_sections: + attr_dict = attributes_to_dict(section.attributes) + + model_dict = {k: attr_dict.get(v, default) for k, v, default in key_mapping} + + # TODO: change template / create logic to lookup the fbbi ID + model_dict["fbbi_id"] = [] + + model_dict["accno"] = section.__dict__.get("accno", "") + model_dict["accession_id"] = submission.accno + model_dict["uuid"] = generate_image_acquisition_uuid(model_dict) + model_dicts.append(model_dict) + + return model_dicts + + +def generate_image_acquisition_uuid(protocol_dict: Dict[str, Any]) -> str: + attributes_to_consider = [ + "accession_id", + "accno", + "title_id", + "protocol_description", + "imaging_instrument_description", + "imaging_method_name", + "fbbi_id" + ] + return dict_to_uuid(protocol_dict, attributes_to_consider) diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_growth_protocol.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_growth_protocol.py new file mode 100644 index 00000000..f5e4b4a3 --- /dev/null +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_growth_protocol.py @@ -0,0 +1,61 @@ +import logging +from typing import List, Any, Dict +from .utils import ( + dicts_to_api_models, + find_sections_recursive, + dict_to_uuid, + persist +) +from ..biostudies import ( + Submission, + attributes_to_dict, +) +from bia_shared_datamodels import bia_data_model + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +def get_specimen_growth_protocol( + submission: Submission, persist_artefacts=False +) -> List[bia_data_model.SpecimenGrowthProtocol]: + + specimen_growth_protocol_model_dicts = extract_specimen_growth_protocol_dicts(submission) + specimen_growth_protocols = dicts_to_api_models(specimen_growth_protocol_model_dicts, bia_data_model.SpecimenGrowthProtocol) + + if persist_artefacts and specimen_growth_protocols: + persist(specimen_growth_protocols, "specimen_growth_protocol", submission.accno) + + return specimen_growth_protocols + + +def extract_specimen_growth_protocol_dicts(submission: Submission) -> List[Dict[str, Any]]: + specimen_sections = find_sections_recursive(submission.section, ["Specimen"], []) + + key_mapping = [ + ("title_id", "Title", ""), + ("protocol_description", "Growth protocol", ""), + ] + + model_dicts = [] + for section in specimen_sections: + attr_dict = attributes_to_dict(section.attributes) + + model_dict = {k: attr_dict.get(v, default) for k, v, default in key_mapping} + + model_dict["accno"] = section.__dict__.get("accno", "") + model_dict["accession_id"] = submission.accno + model_dict["uuid"] = generate_specimen_growth_protocol_uuid(model_dict) + model_dicts.append(model_dict) + + return model_dicts + + +def generate_specimen_growth_protocol_uuid(protocol_dict: Dict[str, Any]) -> str: + attributes_to_consider = [ + "accession_id", + "accno", + "title_id", + "protocol_description", + ] + return dict_to_uuid(protocol_dict, attributes_to_consider) diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_imaging_preparation_protocol.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_imaging_preparation_protocol.py new file mode 100644 index 00000000..d0aa304a --- /dev/null +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_imaging_preparation_protocol.py @@ -0,0 +1,64 @@ +import logging +from typing import List, Any, Dict +from .utils import ( + dicts_to_api_models, + find_sections_recursive, + dict_to_uuid, + persist +) +from ..biostudies import ( + Submission, + attributes_to_dict, +) +from bia_shared_datamodels import bia_data_model + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + + +def get_specimen_imaging_preparation_protocol( + submission: Submission, persist_artefacts=False +) -> List[bia_data_model.SpecimenImagingPrepartionProtocol]: + + specimen_preparation_protocol_model_dicts = extract_specimen_preparation_protocol_dicts(submission) + specimen_preparation_protocols = dicts_to_api_models(specimen_preparation_protocol_model_dicts, bia_data_model.SpecimenImagingPrepartionProtocol) + + if persist_artefacts and specimen_preparation_protocols: + persist(specimen_preparation_protocols, "specimen_imaging_protocol", submission.accno) + + return specimen_preparation_protocols + + +def extract_specimen_preparation_protocol_dicts(submission: Submission) -> List[Dict[str, Any]]: + specimen_sections = find_sections_recursive(submission.section, ["Specimen"], []) + + key_mapping = [ + ("title_id", "Title", ""), + ("protocol_description", "Sample preparation protocol", ""), + ] + + model_dicts = [] + for section in specimen_sections: + attr_dict = attributes_to_dict(section.attributes) + + model_dict = {k: attr_dict.get(v, default) for k, v, default in key_mapping} + + # Currently generates empty list as we need to change the submission template + model_dict["signal_channel_information"] = [] + + model_dict["accno"] = section.__dict__.get("accno", "") + model_dict["accession_id"] = submission.accno + model_dict["uuid"] = generate_specimen_imaging_preparation_uuid(model_dict) + model_dicts.append(model_dict) + + return model_dicts + + +def generate_specimen_imaging_preparation_uuid(protocol_dict: Dict[str, Any]) -> str: + attributes_to_consider = [ + "accession_id", + "accno", + "title_id", + "protocol_description", + ] + return dict_to_uuid(protocol_dict, attributes_to_consider) diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/study.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/study.py index 8fc93848..64745831 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/study.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/study.py @@ -14,7 +14,7 @@ attributes_to_dict, ) from ..config import settings -from src.bia_models import bia_data_model, semantic_models +from bia_shared_datamodels import bia_data_model, semantic_models logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @@ -31,10 +31,10 @@ def get_study( contributors = get_contributor(submission) grants = get_grant(submission) - experimental_imaging_datasets = eid_conversion.get_experimental_imaging_dataset( + # TODO: move this to main CLI code to make object generation more independent + eid_conversion.get_experimental_imaging_dataset( submission, persist_artefacts=persist_artefacts ) - experimental_imaging_dataset_uuids = [e.uuid for e in experimental_imaging_datasets] study_attributes = attributes_to_dict(submission.section.attributes) @@ -65,7 +65,6 @@ def get_study( "author": [c.model_dump() for c in contributors], "grant": [g.model_dump() for g in grants], "attribute": study_attributes, - "experimental_imaging_component": experimental_imaging_dataset_uuids, "annotation_component": [], } # study_uuid = dict_to_uuid(study_dict, ["accession_id",]) @@ -87,9 +86,6 @@ def get_study( return study - - - def get_study_uuid(submission: Submission) -> str: return dict_to_uuid({"accession_id": submission.accno}, ["accession_id",]) diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/utils.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/utils.py index 0afe4ccd..3c49fe68 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/utils.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/utils.py @@ -1,6 +1,5 @@ import logging from pathlib import Path -import re import hashlib import uuid from typing import List, Any, Dict, Optional, Tuple, Type, Union @@ -10,12 +9,8 @@ attributes_to_dict, Section, Attribute, - find_file_lists_in_submission, - flist_from_flist_fname, - file_uri, ) from ..config import settings -from src.bia_models import bia_data_model, semantic_models logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @@ -145,3 +140,14 @@ def dict_to_uuid(my_dict: Dict[str, Any], attributes_to_consider: List[str]) -> seed = "".join([f"{my_dict[attr]}" for attr in attributes_to_consider]) hexdigest = hashlib.md5(seed.encode("utf-8")).hexdigest() return str(uuid.UUID(version=4, hex=hexdigest)) + + +def persist(object_list: List, object_path: str, sumbission_accno: str): + output_dir = Path(settings.bia_data_dir) / object_path / sumbission_accno + if not output_dir.is_dir(): + output_dir.mkdir(parents=True) + logger.info(f"Created {output_dir}") + for object in object_list: + output_path = output_dir / f"{object.uuid}.json" + output_path.write_text(object.model_dump_json(indent=2)) + logger.info(f"Written {output_path}") \ No newline at end of file diff --git a/bia-ingest-shared-models/test/data/S-BIADTEST.json b/bia-ingest-shared-models/test/data/S-BIADTEST.json index 049f9c1a..c6f2f63e 100644 --- a/bia-ingest-shared-models/test/data/S-BIADTEST.json +++ b/bia-ingest-shared-models/test/data/S-BIADTEST.json @@ -283,6 +283,22 @@ "name" : "Image analysis overview", "value" : "Test image analysis overview" } ] + }, { + "accno" : "Annotations-29", + "type" : "Annotations", + "attributes" : [ { + "name" : "Name", + "value" : "Segmentation masks" + }, { + "name" : "Annotation overview", + "value" : "Test annotation overview 1" + }, { + "name" : "Annotation criteria", + "value" : "Test annotation criteria 1" + }, { + "name" : "File List", + "value" : "file_list_annotations_1.json" + } ] }, { "accno" : "Study Component-1", "type" : "Study Component", diff --git a/bia-ingest-shared-models/test/data/file_list_annotations_1.json b/bia-ingest-shared-models/test/data/file_list_annotations_1.json new file mode 100644 index 00000000..0d4f101c --- /dev/null +++ b/bia-ingest-shared-models/test/data/file_list_annotations_1.json @@ -0,0 +1,2 @@ +[ +] diff --git a/bia-ingest-shared-models/test/test_shared_models.py b/bia-ingest-shared-models/test/test_shared_models.py index 4f7d194c..3486aa74 100644 --- a/bia-ingest-shared-models/test/test_shared_models.py +++ b/bia-ingest-shared-models/test/test_shared_models.py @@ -6,7 +6,11 @@ from bia_ingest_sm.conversion import ( biosample, experimental_imaging_dataset, - study + specimen_imaging_preparation_protocol, + study, + specimen_growth_protocol, + image_acquisition, + annotation_method, ) from bia_ingest_sm.biostudies import requests @@ -35,6 +39,22 @@ def mock_request_get(flist_url: str) -> Dict[str, str]: utils.get_test_experimental_imaging_dataset, experimental_imaging_dataset.get_experimental_imaging_dataset, ), + ( + utils.get_test_specimen_imaging_preparation_protocol, + specimen_imaging_preparation_protocol.get_specimen_imaging_preparation_protocol, + ), + ( + utils.get_test_specimen_growth_protocol, + specimen_growth_protocol.get_specimen_growth_protocol, + ), + ( + utils.get_test_image_acquisition, + image_acquisition.get_image_acquisition, + ), + ( + utils.get_test_annotation_method, + annotation_method.get_annotation_method, + ), # Not testing as we need to deal with links that are not proper # urls # (utils.get_test_external_reference, conversion.get_external_reference,), diff --git a/bia-ingest-shared-models/test/utils.py b/bia-ingest-shared-models/test/utils.py index 9a7de090..d6abc8ac 100644 --- a/bia-ingest-shared-models/test/utils.py +++ b/bia-ingest-shared-models/test/utils.py @@ -1,64 +1,148 @@ -"""Utility functions to create models - - This module attempts to create models starting from the outer nodes (leaves) of the - model dependency graph +""" +Utility functions to create models +This module attempts to create models starting from the outer nodes (leaves) of the model dependency graph """ from typing import Dict, List -from src.bia_models import bia_data_model, semantic_models +from bia_shared_datamodels import bia_data_model, semantic_models from bia_ingest_sm.conversion.utils import dict_to_uuid -from uuid import uuid4 -template_taxon = semantic_models.Taxon.model_validate( - { - "common_name": "Test Common Name", - "scientific_name": "Test Scientific Name", - "ncbi_id": "Test_NCBI_ID", - } -) + +def get_test_annotation_method() -> List[bia_data_model.AnnotationMethod]: + # For UUID + attributes_to_consider = [ + "accession_id", + "accno", + "title_id", + "protocol_description", + "annotation_criteria", + "annotation_coverage", + "method_type", + "source_dataset", + ] + protocol_info = [ + { + "accno": "Annotations-29", + "accession_id": "S-BIADTEST", + "title_id": "Segmentation masks", + "protocol_description": "Test annotation overview 1", + "annotation_criteria": "Test annotation criteria 1", + "annotation_coverage": "", + "method_type": "other", + "source_dataset": [], + }, + ] + + protocol = [] + for protocol_dict in protocol_info: + protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider) + protocol.append(bia_data_model.AnnotationMethod.model_validate(protocol_dict)) + return protocol -def get_template_channel() -> semantic_models.Channel: - return semantic_models.Channel.model_validate( +def get_test_specimen_growth_protocol() -> List[bia_data_model.ImageAcquisition]: + # For UUID + attributes_to_consider = [ + "accession_id", + "accno", + "title_id", + "protocol_description", + ] + protocol_info = [ { - "colormap_start": 0.0, - "colormap_end": 1.0, - "scale_factor": 1.0, - "label": "Template label", - } - ) + "accno": "Image acquisition-3", + "accession_id": "S-BIADTEST", + "title_id": "Test Primary Screen Image Acquisition", + "protocol_description": "Test image acquisition parameters 1", + "imaging_instrument_description": "Test imaging instrument 1", + "imaging_method_name": "confocal microscopy", + "fbbi_id": [], + }, + { + "accno": "Image acquisition-7", + "accession_id": "S-BIADTEST", + "title_id": "Test Secondary Screen Image Acquisition", + "protocol_description": "Test image acquisition parameters 2", + "imaging_instrument_description": "Test imaging instrument 2", + "imaging_method_name": "fluorescence microscopy", + "fbbi_id": [], + }, + ] + + protocol = [] + for protocol_dict in protocol_info: + protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider) + protocol.append(bia_data_model.ImageAcquisition.model_validate(protocol_dict)) + return protocol -def get_template_rendered_view() -> semantic_models.RenderedView: - return semantic_models.RenderedView.model_validate( +def get_test_specimen_growth_protocol() -> List[bia_data_model.SpecimenGrowthProtocol]: + # For UUID + attributes_to_consider = [ + "accession_id", + "accno", + "title_id", + "protocol_description", + ] + protocol_info = [ { - "z": "Template z position", - "t": "Template t position", - "channel_information": [ - get_template_channel(), - ], - } - ) + "accno": "Specimen-1", + "accession_id": "S-BIADTEST", + "title_id": "Test specimen 1", + "protocol_description": "Test growth protocol 1", + }, + { + "accno": "Specimen-2", + "accession_id": "S-BIADTEST", + "title_id": "Test specimen 2", + "protocol_description": "Test growth protocol 2", + }, + ] + + protocol = [] + for protocol_dict in protocol_info: + protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider) + protocol.append( + bia_data_model.SpecimenGrowthProtocol.model_validate(protocol_dict) + ) + return protocol -def get_template_specimen_preparation_protocol() -> ( - bia_data_model.SpecimenPrepartionProtocol +def get_test_specimen_imaging_preparation_protocol() -> ( + List[bia_data_model.SpecimenImagingPrepartionProtocol] ): - specimen_preparation_protocol = ( - bia_data_model.SpecimenPrepartionProtocol.model_validate( - { - "uuid": uuid4(), - "title_id": "Test specimen preparation protocol", - "method_description": "Test description", - "signal_contrast_mechanism_description": "Test description", - "growth_protocol_description": "Test description", - "channel_content_description": "Test description", - "channel_biological_entity": "Test Entity", - } + # For UUID + attributes_to_consider = [ + "accession_id", + "accno", + "title_id", + "protocol_description", + ] + protocol_info = [ + { + "accno": "Specimen-1", + "accession_id": "S-BIADTEST", + "title_id": "Test specimen 1", + "protocol_description": "Test sample preparation protocol 1", + "signal_channel_information": [], + }, + { + "accno": "Specimen-2", + "accession_id": "S-BIADTEST", + "title_id": "Test specimen 2", + "protocol_description": "Test sample preparation protocol 2", + "signal_channel_information": [], + }, + ] + + protocol = [] + for protocol_dict in protocol_info: + protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider) + protocol.append( + bia_data_model.SpecimenImagingPrepartionProtocol.model_validate(protocol_dict) ) - ) - return specimen_preparation_protocol + return protocol def get_test_biosample() -> List[bia_data_model.BioSample]: @@ -68,9 +152,7 @@ def get_test_biosample() -> List[bia_data_model.BioSample]: "accno", "title_id", "organism_classification", - "description", - # TODO: Discuss including below in semantic_models.BioSample - #"biological_entity", + "biological_entity_description", "intrinsic_variable_description", "extrinsic_variable_description", "experimental_variable_description", @@ -97,7 +179,7 @@ def get_test_biosample() -> List[bia_data_model.BioSample]: "organism_classification": [ taxon1.model_dump(), ], - "description": "Test description 1 (\"with some escaped chars\") ", + "biological_entity_description": "Test biological entity 1", "experimental_variable_description": [ "Test experimental entity 1", ], @@ -107,14 +189,15 @@ def get_test_biosample() -> List[bia_data_model.BioSample]: "intrinsic_variable_description": [ "Test intrinsic variable 1\nwith escaped character", ], - }, { + }, + { "accno": "Biosample-2", "accession_id": "S-BIADTEST", "title_id": "Test Biosample 2 ", "organism_classification": [ taxon2.model_dump(), ], - "description": "Test description 2", + "biological_entity_description": "Test biological entity 2", "experimental_variable_description": [ "Test experimental entity 2", ], @@ -126,7 +209,7 @@ def get_test_biosample() -> List[bia_data_model.BioSample]: ], }, ] - + biosample = [] for biosample_dict in biosample_info: biosample_dict["uuid"] = dict_to_uuid(biosample_dict, attributes_to_consider) @@ -134,123 +217,14 @@ def get_test_biosample() -> List[bia_data_model.BioSample]: return biosample -# Depends on: -# bia_data_model.BioSample -# bia_data_model.SpecimenPreparationProtocol -def get_template_specimen() -> bia_data_model.Specimen: - specimen = bia_data_model.Specimen.model_validate( - { - "preparation_method": [ - get_template_specimen_preparation_protocol().uuid, - ], - "sample_of": [ - biosample.uuid for biosample in get_test_biosample() - ], - } - ) - return specimen - - -# Depends on ExperimentalImagingDataset (circular) -def get_template_annotation_method() -> bia_data_model.AnnotationMethod: - annotation_method = bia_data_model.AnnotationMethod.model_validate( - { - "uuid": uuid4(), - "title_id": "Template annotation method", - "source_dataset": [], # ExperimentalImagingDataset.uuid or url - "method_description": "Template annotation method description", - "annotation_criteria": "Template annotation criteria", - "annotation_coverage": "Template annotation coverage", - "method_type": semantic_models.AnnotationType.class_labels, - } - ) - return annotation_method - - -# Depends on: -# bia_data_model.ExperimentalImagingDataset (circular dependency) -# bia_data_model.ImageAcquisition -# bia_data_model.ImageRepresentation -# bia_data_model.Specimen -def get_template_experimentally_captured_image() -> ( - bia_data_model.ExperimentallyCapturedImage -): - return bia_data_model.ExperimentallyCapturedImage.model_validate( - { - "uuid": uuid4(), - "acquisition_process": [get_template_image_acquisition().uuid], - "representation": [ - get_template_image_representation().uuid, - ], - "submission_dataset": get_template_experimental_imaging_dataset().uuid, - "subject": get_template_specimen(), - "attribute": {}, - } - ) - - -# Depends on: -# bia_data_model.ImageAnnotationDataset (circular dependency) -# bia_data_model.AnnotationMethod -# bia_data_model.ImageRepresentation -def get_template_derived_image() -> bia_data_model.DerivedImage: - derived_image = bia_data_model.DerivedImage.model_validate( - { - "uuid": uuid4(), - "source_image": [ - get_template_image_representation().uuid, - ], - "submission_dataset": get_template_image_annotation_dataset().uuid, - "creation_process": get_template_annotation_method().uuid, - "representation": [ - get_template_image_representation().uuid, - ], - "transformation_description": "Template transformation description", - "spatial_information": "Template spatial information", - "attribute": {}, - } - ) - return derived_image - - -# Depends on: -# bia_data_model.DerivedImage -# bia_data_model.FileReference (this is a circular dependence!) -# bia_data_model.Study -# bia_data_model.AnnotationFileReference (this is a circular dependence!) -# bia_data_model.AnnotationMethod -# -# TODO: Verify that in practice, the Datasets are created then the -# FileReference instances are added. So here we have empty lists -# for the dataset -def get_template_image_annotation_dataset() -> bia_data_model.ImageAnnotationDataset: - image_annotation_dataset = bia_data_model.ImageAnnotationDataset.model_validate( - { - "uuid": uuid4(), - "title_id": "Template image annotation dataset", - "image": [ - get_template_image_representation().uuid, - ], - "file": [], # This should be a list of FileReference UUIDs ... - "annotation_file": [], # This should be a list of AnnotationFileReference UUIDs ... - "submitted_in_study": get_template_study().uuid, - "annotation_method": get_template_annotation_method().uuid, - "file_reference_count": 0, - "image_count": 0, - "example_image_uri": ["https://dummy.url.org"], - } - ) - return image_annotation_dataset - - def get_test_image_acquisition() -> List[bia_data_model.ImageAcquisition]: attributes_to_consider = [ "accession_id", "accno", "title_id", - "method_description", + "protocol_description", "imaging_instrument_description", - "image_acquisition_parameters", + "imaging_method_name", "fbbi_id", ] image_acquisition_info = [ @@ -258,221 +232,134 @@ def get_test_image_acquisition() -> List[bia_data_model.ImageAcquisition]: "accno": "Image acquisition-3", "accession_id": "S-BIADTEST", "title_id": "Test Primary Screen Image Acquisition", - "method_description": "confocal microscopy", + "protocol_description": "Test image acquisition parameters 1", "imaging_instrument_description": "Test imaging instrument 1", - "image_acquisition_parameters": "Test image acquisition parameters 1", + "imaging_method_name": "confocal microscopy", "fbbi_id": [], - }, { + }, + { "accno": "Image acquisition-7", "accession_id": "S-BIADTEST", "title_id": "Test Secondary Screen Image Acquisition", - "method_description": "flourescence microscopy", + "protocol_description": "Test image acquisition parameters 2", "imaging_instrument_description": "Test imaging instrument 2", - "image_acquisition_parameters": "Test image acquisition parameters 2", + "imaging_method_name": "fluorescence microscopy", "fbbi_id": [], }, ] image_acquisition = [] for image_acquisition_dict in image_acquisition_info: - image_acquisition_dict["uuid"] = dict_to_uuid(image_acquisition_dict, attributes_to_consider) - image_acquisition.append(bia_data_model.ImageAcquisition.model_validate(image_acquisition_dict)) + image_acquisition_dict["uuid"] = dict_to_uuid( + image_acquisition_dict, attributes_to_consider + ) + image_acquisition.append( + bia_data_model.ImageAcquisition.model_validate(image_acquisition_dict) + ) return image_acquisition def get_test_image_analysis_method() -> semantic_models.ImageAnalysisMethod: return semantic_models.ImageAnalysisMethod.model_validate( { - "method_description": "Test image analysis", + "protocol_description": "Test image analysis", "features_analysed": "Test image analysis overview", } ) -def get_template_image_correlation_method() -> semantic_models.ImageCorrelationMethod: +def get_test_image_correlation_method() -> semantic_models.ImageCorrelationMethod: return semantic_models.ImageCorrelationMethod.model_validate( { - "method_description": "Template Analysis method", + "protocol_description": "Template Analysis method", "fiducials_used": "Template fiducials used", "transformation_matrix": "Template transformation matrix", } ) -# TODO: Create FileReferences and ExperimentallyCapturedImage +# TODO: Create FileReferences and ExperimentallyCapturedImage def get_test_experimental_imaging_dataset() -> ( bia_data_model.ExperimentalImagingDataset ): study_uuid = dict_to_uuid( - {"accession_id": "S-BIADTEST",}, - attributes_to_consider=["accession_id",] - ) - # Create first study component - file_references = [{ - "accession_id": "S-BIADTEST", - "file_name": "study_component1/im06.png", - "size_in_bytes": 3, - },{ - "accession_id": "S-BIADTEST", - "file_name": "study_component1/im08.png", - "size_in_bytes": 123, - },{ - "accession_id": "S-BIADTEST", - "file_name": "study_component1/ann01-05", - "size_in_bytes": 11, - },{ + { "accession_id": "S-BIADTEST", - "file_name": "study_component1/ann06-10.json", - "size_in_bytes": 12, }, - ] - file_reference_uuids = get_test_file_reference_uuid(file_references) + attributes_to_consider=[ + "accession_id", + ], + ) experimental_imaging_dataset_dict = { "title_id": "Study Component 1", - "image": [], # This should be a list of Experimentally captured image UUIDs - "file": file_reference_uuids, - "submitted_in_study": study_uuid, - "specimen_preparation_method": [ - #get_template_specimen_preparation_protocol().uuid, - ], - "acquisition_method": [ - #get_test_image_acquisition()[0].uuid, - ], - # This study component uses both biosamples - "biological_entity": [ - biosample.uuid for biosample in get_test_biosample() - ], + "submitted_in_study_uuid": study_uuid, "analysis_method": [ get_test_image_analysis_method().model_dump(), ], "correlation_method": [ - #get_template_image_correlation_method().model_dump(), + # get_template_image_correlation_method().model_dump(), ], - "file_reference_count": 4, - "image_count": 0, "example_image_uri": [], + "description": "Description of study component 1", } - experimental_imaging_dataset_uuid = dict_to_uuid(experimental_imaging_dataset_dict, ["title_id", "submitted_in_study",]) + experimental_imaging_dataset_uuid = dict_to_uuid( + experimental_imaging_dataset_dict, + [ + "title_id", + "submitted_in_study_uuid", + ], + ) experimental_imaging_dataset_dict["uuid"] = experimental_imaging_dataset_uuid - experimental_imaging_dataset1 = bia_data_model.ExperimentalImagingDataset.model_validate(experimental_imaging_dataset_dict) + experimental_imaging_dataset1 = ( + bia_data_model.ExperimentalImagingDataset.model_validate( + experimental_imaging_dataset_dict + ) + ) # Create second study component - file_references = [{ + file_references = [ + { "accession_id": "S-BIADTEST", - "file_name": "study_component2/im06.png", + "file_path": "study_component2/im06.png", "size_in_bytes": 3, - },{ + }, + { "accession_id": "S-BIADTEST", - "file_name": "study_component2/im08.png", + "file_path": "study_component2/im08.png", "size_in_bytes": 123, - },{ + }, + { "accession_id": "S-BIADTEST", - "file_name": "study_component2/ann01-05", + "file_path": "study_component2/ann01-05", "size_in_bytes": 11, }, ] - file_reference_uuids = get_test_file_reference_uuid(file_references) - experimental_imaging_dataset_dict = { "title_id": "Study Component 2", - "image": [], # This should be a list of Experimentally captured image UUIDs - "file": file_reference_uuids, - "submitted_in_study": study_uuid, - "specimen_preparation_method": [ - #get_template_specimen_preparation_protocol().uuid, - ], - "acquisition_method": [ - #get_test_image_acquisition()[1].uuid, - ], - # This study component uses only second biosample - "biological_entity": [ - get_test_biosample()[1].uuid, - ], + "submitted_in_study_uuid": study_uuid, "analysis_method": [ get_test_image_analysis_method().model_dump(), ], "correlation_method": [ - #get_template_image_correlation_method().model_dump(), + # get_template_image_correlation_method().model_dump(), ], - "file_reference_count": 3, - "image_count": 0, "example_image_uri": [], + "description": "Description of study component 2", } - experimental_imaging_dataset_uuid = dict_to_uuid(experimental_imaging_dataset_dict, ["title_id", "submitted_in_study",]) - experimental_imaging_dataset_dict["uuid"] = experimental_imaging_dataset_uuid - experimental_imaging_dataset2 = bia_data_model.ExperimentalImagingDataset.model_validate(experimental_imaging_dataset_dict) - return [experimental_imaging_dataset1, experimental_imaging_dataset2] - - -# Depends on: -# bia_data_model.ImageAnnotationDataset (circular) -# bia_data_model.ExperimentalImagingDataset (circular) -def get_template_annotation_file_reference() -> bia_data_model.AnnotationFileReference: - return bia_data_model.AnnotationFileReference.model_validate( - { - "uuid": uuid4(), - "file_name": "Dummy file name", - "format": "Dummy format", - "size_in_bytes": 10, - "uri": "https://dummy.uri.co", - "attribute": {}, - "submission_dataset": get_template_image_annotation_dataset().uuid, - "source_image": [ - get_template_image_representation().uuid, - ], - "transformation_description": "Template transformation description", - "spatial_information": "Template spatial information", - "creation_process": get_template_annotation_method().uuid, - } - ) - - -# Depends on: -# bia_data_model.ImageAnnotationDataset (circular) -# bia_data_model.ExperimentalImagingDataset (circular) -def get_template_file_reference() -> bia_data_model.FileReference: - file_reference = bia_data_model.FileReference.model_validate( - { - "uuid": uuid4(), - "file_name": "Dummy file name", - "format": "Dummy format", - "size_in_bytes": 10, - "uri": "https://dummy.uri.co", - "attribute": {}, - "submission_dataset": get_template_experimental_imaging_dataset().uuid, - } + experimental_imaging_dataset_uuid = dict_to_uuid( + experimental_imaging_dataset_dict, + [ + "title_id", + "submitted_in_study_uuid", + ], ) - return file_reference - - -# Depends on: -# bia_data_model.FileReference ( -def get_template_image_representation() -> bia_data_model.ImageRepresentation: - return bia_data_model.ImageRepresentation.model_validate( - { - "uuid": uuid4(), - "original_file_reference": [ - get_template_file_reference().uuid, - ], - "image_format": "Template image format", - "file_uri": [ - "https://dummy.uri.org", - ], - "total_size_in_bytes": 0, - "physical_size_x": 1, - "physical_size_y": 1, - "physical_size_z": 1, - "size_x": 1, - "size_y": 1, - "size_z": 1, - "size_c": 1, - "size_t": 1, - "image_viewer_setting": [ - get_template_rendered_view().model_dump(), - ], - "attribute": {}, - } + experimental_imaging_dataset_dict["uuid"] = experimental_imaging_dataset_uuid + experimental_imaging_dataset2 = ( + bia_data_model.ExperimentalImagingDataset.model_validate( + experimental_imaging_dataset_dict + ) ) + return [experimental_imaging_dataset1, experimental_imaging_dataset2] def get_test_affiliation() -> Dict[str, semantic_models.Affiliation]: @@ -492,7 +379,10 @@ def get_test_affiliation() -> Dict[str, semantic_models.Affiliation]: "website": None, } ) - return { "o1": affiliation1, "o2": affiliation2, } + return { + "o1": affiliation1, + "o2": affiliation2, + } def get_test_contributor() -> Dict[str, semantic_models.Contributor]: @@ -526,57 +416,92 @@ def get_test_contributor() -> Dict[str, semantic_models.Contributor]: } ) - return [contributor1, contributor2,] + return [ + contributor1, + contributor2, + ] + def get_test_publication() -> List[semantic_models.Publication]: - publication1 = semantic_models.Publication.model_validate({ - "pubmed_id": "38381674", - "title": "Test publication 1", - # TODO: No release date -> ST only collects Year - "release_date": "2024", - # TODO: Author is a string here. - "author": "Test Author11, Test Author12.", - }) - publication2 = semantic_models.Publication.model_validate({ - "pubmed_id": "38106175", - "doi": "10.1101/2023.12.07.570699", - "title": "Test publication 2", - # TODO: Author is a string here. - "author": "Test Author21, Test Author22", - "release_date": "2023", - }) - return [publication1, publication2,] + publication1 = semantic_models.Publication.model_validate( + { + "pubmed_id": "38381674", + "title": "Test publication 1", + # TODO: No release date -> ST only collects Year + "release_date": "2024", + # TODO: Author is a string here. + "author": "Test Author11, Test Author12.", + } + ) + publication2 = semantic_models.Publication.model_validate( + { + "pubmed_id": "38106175", + "doi": "10.1101/2023.12.07.570699", + "title": "Test publication 2", + # TODO: Author is a string here. + "author": "Test Author21, Test Author22", + "release_date": "2023", + } + ) + return [ + publication1, + publication2, + ] + def get_test_external_reference() -> List[semantic_models.ExternalReference]: - link1 = semantic_models.ExternalReference.model_validate({ - "link": "https://www.test.link1.com/", - "description": "Test link 1.", - }) - link1 = semantic_models.ExternalReference.model_validate({ - "link": "ERP116793", - "description": "Test ENA link", - "Type": "ENA", - }) - return [link1, link2,] + link1 = semantic_models.ExternalReference.model_validate( + { + "link": "https://www.test.link1.com/", + "description": "Test link 1.", + } + ) + link2 = semantic_models.ExternalReference.model_validate( + { + "link": "ERP116793", + "description": "Test ENA link", + "Type": "ENA", + } + ) + return [ + link1, + link2, + ] def get_test_grant() -> List[semantic_models.Grant]: - funding_body1 = semantic_models.FundingBody.model_validate({ - "display_name": "Test funding body1", - }) - funding_body2 = semantic_models.FundingBody.model_validate({ - "display_name": "Test funding body2", - }) - - grant1 = semantic_models.Grant.model_validate({ - "id": "TESTFUNDS1", - "funder": [funding_body1,], - }) - grant2 = semantic_models.Grant.model_validate({ - "id": "TESTFUNDS2", - "funder": [funding_body2,], - }) - return [grant1, grant2,] + funding_body1 = semantic_models.FundingBody.model_validate( + { + "display_name": "Test funding body1", + } + ) + funding_body2 = semantic_models.FundingBody.model_validate( + { + "display_name": "Test funding body2", + } + ) + + grant1 = semantic_models.Grant.model_validate( + { + "id": "TESTFUNDS1", + "funder": [ + funding_body1, + ], + } + ) + grant2 = semantic_models.Grant.model_validate( + { + "id": "TESTFUNDS2", + "funder": [ + funding_body2, + ], + } + ) + return [ + grant1, + grant2, + ] + def get_test_study() -> bia_data_model.Study: contributor = get_test_contributor() @@ -589,31 +514,38 @@ def get_test_study() -> bia_data_model.Study: "licence": semantic_models.LicenceType.CC0, "acknowledgement": "We thank you", "funding_statement": "This work was funded by the EBI", - "attribute": { - - }, + "attribute": {}, "related_publication": [], - "author": [ c.model_dump() for c in contributor ], + "author": [c.model_dump() for c in contributor], "keyword": [ "Test keyword1", "Test keyword2", "Test keyword3", ], - "grant": [ g.model_dump() for g in grant ], - "experimental_imaging_component": [e.uuid for e in get_test_experimental_imaging_dataset()], + "grant": [g.model_dump() for g in grant], + "experimental_imaging_component": [ + e.uuid for e in get_test_experimental_imaging_dataset() + ], "annotation_component": [], } - study_uuid = dict_to_uuid(study_dict, ["accession_id", ]) + study_uuid = dict_to_uuid( + study_dict, + [ + "accession_id", + ], + ) study_dict["uuid"] = study_uuid study = bia_data_model.Study.model_validate(study_dict) return study + def get_test_file_reference_uuid(file_references: List[Dict[str, str]]) -> List[str]: attributes_to_consider = [ "accession_id", - "file_name", + "file_path", "size_in_bytes", ] return [ - dict_to_uuid(file_reference, attributes_to_consider) for file_reference in file_references + dict_to_uuid(file_reference, attributes_to_consider) + for file_reference in file_references ] diff --git a/bia-shared-datamodels/.vscode/settings.json b/bia-shared-datamodels/.vscode/settings.json index fe205e28..e901c49f 100644 --- a/bia-shared-datamodels/.vscode/settings.json +++ b/bia-shared-datamodels/.vscode/settings.json @@ -7,5 +7,8 @@ "[python]": { "editor.defaultFormatter": "ms-python.black-formatter", "editor.formatOnSave": true - } + }, + "python.analysis.extraPaths": [ + "./src" + ] } \ No newline at end of file diff --git a/bia-shared-datamodels/src/bia_shared_datamodels/bia_data_model.py b/bia-shared-datamodels/src/bia_shared_datamodels/bia_data_model.py index ff8104e4..d1447059 100644 --- a/bia-shared-datamodels/src/bia_shared_datamodels/bia_data_model.py +++ b/bia-shared-datamodels/src/bia_shared_datamodels/bia_data_model.py @@ -1,11 +1,10 @@ from __future__ import annotations from . import semantic_models -from pydantic import BaseModel, Field, AnyUrl -from typing import List, Optional, Union +from pydantic import BaseModel, Field +from typing import List, Optional from uuid import UUID - -from pydantic_core import Url +from enum import Enum class DocumentMixin(BaseModel): @@ -24,24 +23,27 @@ class Study( semantic_models.Study, DocumentMixin, ): - experimental_imaging_component: List[UUID] = Field() - annotation_component: List[UUID] = Field() author: List[semantic_models.Contributor] = Field(min_length=1) - description: str = Field() class FileReference( semantic_models.FileReference, DocumentMixin, ): - submission_dataset: UUID = Field() + submission_dataset_uuid: UUID = Field() + submission_dataset_type: DatasetType = Field( + description="""The type of dataset in which this file was submitted to the BioImage Archive.""" + ) class ImageRepresentation( semantic_models.ImageRepresentation, DocumentMixin, ): - original_file_reference: Optional[List[UUID]] = Field() + # We may want to store the FileReference -> Image(Represenation) rather than in the original_file_reference_uuid + original_file_reference_uuid: Optional[List[UUID]] = Field() + representation_of_uuid: UUID = Field() + abstract_image_type: AbstractImageType = Field() class ExperimentalImagingDataset( @@ -49,30 +51,22 @@ class ExperimentalImagingDataset( DocumentMixin, UserIdentifiedObject, ): - image: List[UUID] = Field() - file: List[UUID] = Field() - submitted_in_study: UUID = Field() - specimen_preparation_method: List[UUID] = Field() - acquisition_method: List[UUID] = Field() - biological_entity: List[UUID] = Field() - # we include image analysis and correlation + submitted_in_study_uuid: UUID = Field() -class Specimen(semantic_models.Specimen): - preparation_method: List[UUID] = Field(min_length=1) - sample_of: List[UUID] = Field(min_length=1) +class Specimen(semantic_models.Specimen, DocumentMixin): + imaging_preparation_protocol_uuid: List[UUID] = Field(min_length=1) + sample_of_uuid: List[UUID] = Field(min_length=1) + growth_protocol_uuid: List[UUID] = Field() class ExperimentallyCapturedImage( semantic_models.ExperimentallyCapturedImage, DocumentMixin, ): - acquisition_process: List[UUID] = Field() - representation: List[UUID] = Field() - submission_dataset: UUID = Field() - subject: Specimen = Field() - # note Specimen is included in image document, but needs to be overriden to link to protocol & biosample via uuid. - + acquisition_process_uuid: List[UUID] = Field() + submission_dataset_uuid: UUID = Field() + subject_uuid: UUID = Field() class ImageAcquisition( semantic_models.ImageAcquisition, @@ -82,8 +76,16 @@ class ImageAcquisition( pass -class SpecimenPrepartionProtocol( - semantic_models.SpecimenPrepartionProtocol, +class SpecimenImagingPrepartionProtocol( + semantic_models.SpecimenImagingPrepartionProtocol, + DocumentMixin, + UserIdentifiedObject, +): + pass + + +class SpecimenGrowthProtocol( + semantic_models.SpecimenGrowthProtocol, DocumentMixin, UserIdentifiedObject, ): @@ -103,30 +105,25 @@ class ImageAnnotationDataset( DocumentMixin, UserIdentifiedObject, ): - image: List[UUID] = Field() - file: List[UUID] = Field() - annotation_file: List[UUID] = Field() - submitted_in_study: UUID = Field() - annotation_method: List[UUID] = Field() + submitted_in_study_uuid: UUID = Field() class AnnotationFileReference( semantic_models.AnnotationFileReference, DocumentMixin, ): - source_image: List[UUID] = Field() - submission_dataset: UUID = Field() - creation_process: List[UUID] = Field() + submission_dataset_uuid: UUID = Field() + source_image_uuid: List[UUID] = Field() + creation_process_uuid: List[UUID] = Field() class DerivedImage( semantic_models.DerivedImage, DocumentMixin, ): - source_image: List[UUID] = Field() - submission_dataset: UUID = Field() - creation_process: List[UUID] = Field() - representation: List[UUID] = Field() + source_image_uuid: List[UUID] = Field() + submission_dataset_uuid: UUID = Field() + creation_process_uuid: List[UUID] = Field() class AnnotationMethod( @@ -134,4 +131,22 @@ class AnnotationMethod( DocumentMixin, UserIdentifiedObject, ): - source_dataset: List[Union[UUID, AnyUrl]] + pass + + +class DatasetType(str, Enum): + """ + The type of Dataset stored in the BIA. Used by File Referneces to + """ + + ExperimentalImagingDataset = "ExperimentalImagingDataset" + ImageAnnotationDataset = "ImageAnnotationDataset" + + +class AbstractImageType(str, Enum): + """ + The type of Abstract Image stored in the BIA. Used by Image representations to store + """ + + ExperimentallyDerivedImage = "ExperimentallyDerivedImage" + DerivedImage = "DerivedImage" diff --git a/bia-shared-datamodels/src/bia_shared_datamodels/semantic_models.py b/bia-shared-datamodels/src/bia_shared_datamodels/semantic_models.py index c59de3da..68cbd2b8 100644 --- a/bia-shared-datamodels/src/bia_shared_datamodels/semantic_models.py +++ b/bia-shared-datamodels/src/bia_shared_datamodels/semantic_models.py @@ -8,104 +8,36 @@ ####################################################################################################### -# Subgraph 1: Documents, contributors & their affiliations +# Subgraph 1: Studies and links to external information (publications, grants etc) ####################################################################################################### -class PersonMixin(BaseModel): - """ - Person information - """ - - orcid: Optional[str] = Field( - None, description="""Open Researcher and Contributor ID.""" - ) - - -class OrganisationMixin(BaseModel): - """ - Organisation information - """ - - rorid: Optional[str] = Field( - None, description="""Reasearch Organisation Registry ID.""" - ) - address: Optional[str] = Field( - None, description="""Comma separated lines of the address.""" - ) - website: Optional[AnyUrl] = Field( - default=None, - description="""The website page with information about the Organisation.""", - ) - - -class Contributor(PersonMixin, OrganisationMixin): +class Study(BaseModel): """ - A person or group that contributed to the creation of a Document. - """ - - display_name: str = Field( - description="""Name as it should be displayed on the BioImage Archive.""" - ) - affiliation: List[Affiliation] = Field( - default_factory=list, - description="""The organisation(s) a contributor is afiliated with.""", - ) - contact_email: Optional[EmailStr] = Field( - default=None, description="""An email address to contact the Contributor.""" - ) - role: Optional[str] = Field( - default=None, description="""The role of the contributor.""" - ) - - -class Affiliation(OrganisationMixin): - """ - An organsiation that a contributor is affiliated with. + A piece of scientific work that resulted in the creation of imaging data. """ - display_name: str = Field( - description="""Name as it should be displayed on the BioImage Archive.""" + accession_id: str = Field(description="""Unique ID provided by BioStudies.""") + licence: LicenceType = Field( + description="""The license under which the data associated with the study is made avaliable.""" ) - - -class DocumentMixin(BaseModel): - """ - A documentary resource or body of scientific work. - """ - author: List[Contributor] = Field(description="""The creators of the document.""") title: str = Field( - description="""The title of a scientific document. This will usually be displayed when search results including your data are shown.""" + description="""The title of a study. This will usually be displayed when search results including your data are shown.""" ) release_date: date = Field(description="""Date of first publication""") + description: str = Field( + None, description="""Brief description of the study.""" + ) keyword: Optional[List[str]] = Field( default_factory=list, - description="""Keywords or tags used to describe the subject of a document""", + description="""Keywords or tags used to describe the subject or context of the study.""", ) acknowledgement: Optional[str] = Field( default_factory=list, - description="""Any person or group that should be acknowledged with the document.""", + description="""Any person or group that should be acknowledged outside of the authors/main contributors to the study.""", ) - description: Optional[str] = Field( - None, description="""Brief description of the scientific document.""" - ) - - -####################################################################################################### -# Subgraph 2: Studies and links to external information (publications, grants etc) -####################################################################################################### - -class Study(DocumentMixin): - """ - A piece of scientific work that resulted in the creation of imaging data. - """ - - accession_id: str = Field(description="""Unique ID provided by BioStudies.""") - licence: LicenceType = Field( - description="""The license under which the data associated with the study is made avaliable.""" - ) see_also: Optional[List[ExternalReference]] = Field( default_factory=list, description="""Links to publications, github repositories, and other pages related to this Study.""", @@ -120,38 +52,35 @@ class Study(DocumentMixin): funding_statement: Optional[str] = Field( default_factory=list, description="""Description of how the study was funded.""" ) - experimental_imaging_component: Optional[List[ExperimentalImagingDataset]] = Field( - default_factory=list, - description="""A dataset of that is associated with the study.""", - ) - annotation_component: Optional[List[ImageAnnotationDataset]] = Field( - default_factory=list, description="""""" - ) + + # TODO: In order to maintian consistency these will be endpoints that run a query in the DB, rather than a stored field. + # experimental_imaging_component: Optional[List[ExperimentalImagingDataset]] = Field( + # default_factory=list, + # description="""A dataset of that is associated with the study.""", + # ) + # annotation_component: Optional[List[ImageAnnotationDataset]] = Field( + # default_factory=list, description="""""" + # ) + attribute: dict = Field( description="""Freeform key-value pairs from user provided metadata (e.g. filelist data) and experimental fields.""" ) - # Override optional description in DocumentMixin - description: str = Field( - None, description="""Brief description of the scientific document.""" - ) -class Publication(DocumentMixin): +class Publication(BaseModel): """ A published paper or written work. """ + authors_name: str = Field( + description="""The list of names of the authors as displayed in the publication.""" + ) + title: str = Field(description="""The title of the publication.""") + publication_year: int = Field(description="""Year the article was published""") pubmed_id: Optional[str] = Field( None, description="""Identifier for journal articles/abstracts in PubMed""" ) doi: Optional[str] = Field(None, description="""Digital Object Identifier (DOI)""") - # TODO: Discuss making changes below to allow Publications created by - # submission tool to be ingested. See https://app.clickup.com/t/8694zc48g - #doi: Optional[str] = Field(None, description="""Digital Object Identifier (DOI)""") - ## Override DocumentMixin.release_date as biostudies.Submission.Publication only has year of publication - #release_date: Optional[str] = Field(None, description="""Release date associated with publication. Not necessarily a well formatted date string""") - ## Override DocumentMixin.Authors as biostudies.Submission.Publication imports just a string with author names - #author: Optional[str] = Field(None, description="""Names of author(s)""") class ExternalReference(BaseModel): @@ -203,6 +132,68 @@ class LicenceType(str, Enum): CC_BY_40 = "CC_BY_4.0" +####################################################################################################### +# Subgraph 2: Contributors & their affiliations +####################################################################################################### + + +class PersonMixin(BaseModel): + """ + Person information + """ + + orcid: Optional[str] = Field( + None, description="""Open Researcher and Contributor ID.""" + ) + + +class OrganisationMixin(BaseModel): + """ + Organisation information + """ + + rorid: Optional[str] = Field( + None, description="""Reasearch Organisation Registry ID.""" + ) + address: Optional[str] = Field( + None, description="""Comma separated lines of the address.""" + ) + website: Optional[AnyUrl] = Field( + default=None, + description="""The website page with information about the Organisation.""", + ) + + +class Contributor(PersonMixin, OrganisationMixin): + """ + A person or group that contributed to the creation of a Document. + """ + + display_name: str = Field( + description="""Name as it should be displayed on the BioImage Archive.""" + ) + affiliation: List[Affiliation] = Field( + default_factory=list, + description="""The organisation(s) a contributor is afiliated with.""", + ) + contact_email: Optional[EmailStr] = Field( + default=None, description="""An email address to contact the Contributor.""" + ) + role: Optional[str] = Field( + default=None, description="""The role of the contributor.""" + ) + + +class Affiliation(OrganisationMixin): + """ + An organsiation that a contributor is affiliated with. + """ + + display_name: str = Field( + description="""Name as it should be displayed on the BioImage Archive.""" + ) + + ####################################################################################################### # Subgraph 3: Dataset mixin and it's files. Method (of dataset creation/maniuplation) mixin. ####################################################################################################### @@ -213,14 +204,19 @@ class DatasetMixin(BaseModel): A logical grouping of data (in files) based on the process involved in it's creation. """ - file: List[FileReference] = Field( - description="""Files associated with the dataset.""" - ) - file_reference_count: int = Field( - description="""Number of files associated with the study.""" - ) - submitted_in_study: Study = Field( - description="""The study the dataset was submitted in.""" + # TODO: In order to maintain consistency this will be an endpoint that runs a query in the DB, rather than a stored field. + # file_reference_count: int = Field( + # description="""Number of files associated with the study.""" + # ) + + # TODO: submitted_in_study information is stored in submitted_in_study_uuid defined in bia_data_model. + # The field here will eventually be used to generate endpoints. + # submitted_in_study: Study = Field( + # description="""The study in which this dataset was submitted""" + # ) + + description: Optional[str] = Field( + None, description="""Brief description of the dataset.""" ) @@ -228,11 +224,9 @@ class FileReference(BaseModel): """ Information about a file, provided in file list. """ - - # TODO: Clarify if this should be file_name or file_path - file_name: str = Field(description="""The name of the file.""") - # TODO: Clarify if this should be biostudies 'type' or derived from - # file extension + + file_path: str = Field(description="""The path (including the name) of the file.""") + # TODO: Clarify if this should be biostudies 'type' or derived from file extension format: str = Field(description="""File format or type.""") size_in_bytes: int = Field(description="""Disc size in bytes.""") uri: str = Field(description="""URI from which the file can be accessed.""") @@ -240,17 +234,25 @@ class FileReference(BaseModel): description="""Freeform key-value pairs from user provided metadata (e.g. filelist data) and experimental fields.""" ) + # TODO: submission_dataset information is stored in submission_dataset_uuid defined in bia_data_model. + # The field here will eventually be used to generate endpoints. + # submission_dataset: DatasetMixin = Field( + # description="""The datatset in which this file was submitted to the BioImage Archive.""" + # ) + + class ProtocolMixin(BaseModel): """ A protocol for either capturing, combining, or analysing images. """ - method_description: str = Field( - description="""Description of steps involved in the process or method.""" + protocol_description: str = Field( + description="""Description of steps involved in the process.""" ) + ####################################################################################################### # Subgraph 4: Abstract images & their representations ####################################################################################################### @@ -261,9 +263,10 @@ class AbstractImageMixin(BaseModel): The abstract notion of an image that can have many representions in different image formats. """ - representation: List[ImageRepresentation] = Field( - description="""Representation(s) of the image in a specific image format.""" - ) + # TODO: In order to maintain consistency this will be an endpoint that runs a query in the DB, rather than a stored field. + # representation: List[ImageRepresentation] = Field( + # description="""Representation(s) of the image in a specific image format.""" + # ) attribute: dict = Field( description="""Freeform key-value pairs from user provided metadata (e.g. filelist data) and experimental fields.""" ) @@ -275,6 +278,9 @@ class ImageRepresentation(BaseModel): This object was created from one or more file refences (usually one) provided by submitters to the BioImage Archive. """ + # TODO: representation_of information is stored in representation_of_uuid defined in bia_data_model. + # The field here will eventually be used to generate endpoints. + # representation_of: AbstractImageMixin = Field( description="The abstraction of this image represtation.") image_format: str = Field(description="""Image format of the combined files.""") file_uri: List[str] = Field( description="""URI(s) of the file(s) which together make up this image representation.""" @@ -318,11 +324,13 @@ class ImageRepresentation(BaseModel): None, description="""Settings of a particular view of an image, such as a specific timestamp of a timeseries, or camera placement in a 3D model.""", ) - original_file_reference: Optional[List[FileReference]] = Field( - default_factory=list, - description="""The user sumbitted file references from which this image representation was created. - If this ImageRepresentation was created by conversion from another representation this will be empty.""", - ) + # TODO: representation_of information is stored in representation_of_uuid defined in bia_data_model. + # The field here will eventually be used to generate endpoints. + # original_file_reference: Optional[List[FileReference]] = Field( + # default_factory=list, + # description="""The user sumbitted file references from which this image representation was created. + # If this ImageRepresentation was created by conversion from another representation this will be empty.""", + # ) attribute: dict = Field( description="""Freeform key-value pairs from user provided metadata (e.g. filelist data) and experimental fields.""" ) @@ -358,6 +366,7 @@ class Channel(BaseModel): ) + ####################################################################################################### # Subgraph 5: ImagingStudyComponents, Images, Acquisitions, Specimens, BioSample ####################################################################################################### @@ -368,18 +377,19 @@ class ExperimentalImagingDataset(DatasetMixin): A logical collection of images that were created by the same acquisition and preparation procols being applied to a biosample. """ - image: List[ExperimentallyCapturedImage] = Field( - description="""Images associated with the dataset.""" - ) - acquisition_method: list[ImageAcquisition] = Field( - description="""Processes involved in the creation of the images and files in this dataset.""" - ) - specimen_preparation_method: list[SpecimenPrepartionProtocol] = Field( - description="""Processes involved in the creation of the samples that were then imaged.""" - ) - biological_entity: list[BioSample] = Field( - description="""The biological entity that was imaged.""" - ) + # TODO: In order to maintain consistency these fields will be endpoints that runs a query in the DB, rather than a stored field. + # acquisition_process: list[ImageAcquisition] = Field( + # description="""Processes involved in the creation of the images and files in this dataset.""" + # ) + # specimen_imaging_preparation_protocol: list[SpecimenImagingPrepartionProtocol] = Field( + # description="""Processes involved in the preprapartion of the samples for imaged.""" + # ) + # biological_entity: list[BioSample] = Field( + # description="""The biological entity or entities that were imaged.""" + # ) + # specimen_growth_protocol: Optional[list[SpecimenImagingPrepartionProtocol]] = Field( + # description="""Processes involved in the growth of the samples that were then imaged.""" + # ) analysis_method: Optional[list[ImageAnalysisMethod]] = Field( description="""Data analysis processes performed on the images.""" ) @@ -389,9 +399,10 @@ class ExperimentalImagingDataset(DatasetMixin): example_image_uri: list[str] = Field( description="A viewable image that is typical of the dataset." ) - image_count: int = Field( - description="""Number of images associated with the dataset.""" - ) + # TODO: Image_count will be a computed endpoint, rather than stored, so that consistency is maintained + # image_count: int = Field( + # description="""Number of images associated with the dataset.""" + # ) class ExperimentallyCapturedImage(AbstractImageMixin): @@ -399,15 +410,18 @@ class ExperimentallyCapturedImage(AbstractImageMixin): The abstract result of subject being captured by an image acquisition event. This can have many representions in different image formats. """ - acquisition_process: List[ImageAcquisition] = Field( - description="""The processes involved in the creation of the image.""" - ) - subject: Specimen = Field( - description="""The specimen that was prepared for and captured in the field of view of the image.""" - ) - submission_dataset: ExperimentalImagingDataset = Field( - description="""The dataset in which image was first submitted to the BIA.""" - ) + pass + # TODO: All the fields below are stored in _uuid named fields defined in bia_data_model. + # These fields will eventually be used to generate endpoints + # acquisition_process: List[ImageAcquisition] = Field( + # description="""The processes involved in the creation of the image.""" + # ) + # subject: Specimen = Field( + # description="""The specimen that was prepared for and captured in the field of view of the image.""" + # ) + # submission_dataset: ExperimentalImagingDataset = Field( + # description="""The dataset in which image was first submitted to the BIA.""" + # ) class ImageAcquisition(ProtocolMixin): @@ -418,22 +432,30 @@ class ImageAcquisition(ProtocolMixin): imaging_instrument_description: str = Field( description="""Names, types, or description of how the instruments used to create the image.""" ) - image_acquisition_parameters: str = Field( - description="""Parameters relevant to how the image was taken, such as instrument settings.""" - ) - fbbi_id: List[str] = Field( + fbbi_id: Optional[List[str]] = Field( description="""Biological Imaging Methods Ontology id indicating the kind of imaging that was perfomed.""" ) + imaging_method_name: Optional[str] = Field( + description="""Name of the kind of imaging method that was performed.""" + ) + + +class SpecimenImagingPrepartionProtocol(ProtocolMixin): + """ + The process to prepare biological entity for imaging. + """ + signal_channel_information: Optional[List[SignalChannelInformation]] + + +class SignalChannelInformation(BaseModel): + """ + Information about how signals were generated, staining compounds and their targets. + """ -class SpecimenPrepartionProtocol(ProtocolMixin): signal_contrast_mechanism_description: Optional[str] = Field( None, description="""How is the signal is generated by this sample.""" ) - growth_protocol_description: Optional[str] = Field( - None, - description="""How the specimen was grown, e.g. cell line cultures, crosses or plant growth.""", - ) channel_content_description: Optional[str] = Field( None, description="""What staining was used in preparation of the specimen (e.g. IEM, DAB).""", @@ -443,17 +465,31 @@ class SpecimenPrepartionProtocol(ProtocolMixin): ) +class SpecimenGrowthProtocol(ProtocolMixin): + """ + Protocol methods related to growth of the specimen. + """ + + pass + + class Specimen(BaseModel): """ The subject of an image acquisition, and the result of a BioSample being prepared to be imaged. """ - - sample_of: List[BioSample] = Field( - description="""The biological matter that sampled to create the specimen.""" - ) - preparation_method: List[SpecimenPrepartionProtocol] = Field( - description="""How the biosample was prepared for imaging.""" - ) + pass + # TODO: All the fields below are stored in _uuid named fields defined in bia_data_model. + # These fields will eventually be used to generate endpoints + # sample_of: List[BioSample] = Field( + # description="""The biological matter that sampled to create the specimen.""" + # ) + # imaging_preparation_protocol: List[SpecimenImagingPrepartionProtocol] = Field( + # description="""How the biosample was prepared for imaging.""" + # ) + # growth_protocol: Optional[List[SpecimenGrowthProtocol]] = Field( + # None, + # description="""How the specimen was grown, e.g. cell line cultures, crosses or plant growth.""", + # ) class BioSample(BaseModel): @@ -464,7 +500,7 @@ class BioSample(BaseModel): organism_classification: List[Taxon] = Field( description="""The classification of th ebiological matter.""" ) - description: str = Field( + biological_entity_description: str = Field( description="""A short description of the biological entity.""" ) experimental_variable_description: Optional[List[str]] = Field( @@ -517,21 +553,17 @@ class ImageAnnotationDataset(DatasetMixin): Information about the annotation process, such as methods used, or how much of a dataset was annotated. """ - annotation_method: List[AnnotationMethod] = Field( - description="""The process(es) that were performed to create the annotated data.""" - ) - annotation_file: List[AnnotationFileReference] = Field( - description="""Annotation files associated with the dataset.""" - ) - image: List[DerivedImage] = Field( - description="""Images associated with the dataset.""" - ) + # TODO: In order to maintain consistency this will be an endpoint that runs a query in the DB, rather than a stored field. + # annotation_method: List[AnnotationMethod] = Field( + # description="""The process(es) that were performed to create the annotated data.""" + # ) example_image_uri: List[str] = Field( description="A viewable image that is typical of the dataset." ) - image_count: int = Field( - description="""Number of images associated with the dataset.""" - ) + # TODO: In order to maintain consistency this will be an endpoint that runs a query in the DB, rather than a stored field. + # image_count: int = Field( + # description="""Number of images associated with the dataset.""" + # ) class AnnotationMethod(ProtocolMixin): @@ -539,9 +571,10 @@ class AnnotationMethod(ProtocolMixin): Information about the annotation process, such as methods used, or how much of a dataset was annotated. """ - source_dataset: Optional[List[Union[ExperimentalImagingDataset | AnyUrl]]] = Field( - description="""The datasets that were annotated.""" - ) + # TODO: This could be a UUID or a URL. In linked-data these would be the same, but this makes naming tricky in UUID world + # source_dataset: Optional[List[Union[ExperimentalImagingDataset | AnyUrl]]] = Field( + # description="""The datasets that were annotated.""" + # ) annotation_criteria: Optional[str] = Field( description="""Rules used to generate annotations.""" ) @@ -558,18 +591,20 @@ class AnnotationMixin(BaseModel): Information providing additional metadata or highlighting parts of an image. """ - source_image: List[ImageRepresentation] = Field( - description="""The original image(s) this file is annotating.""" - ) + # TODO: the fields below are stored in _uuid named fields defined in bia_data_model. + # The fields here will eventually be used to generate endpoints. + # source_image: List[ImageRepresentation] = Field( + # description="""The original image(s) this file is annotating.""" + # ) + # creation_process: List[AnnotationMethod] = Field( + # description="""The process that was followed to create the annotation.""" + # ) transformation_description: Optional[str] = Field( description="""Any transformations required to link annotations to the image.""" ) spatial_information: Optional[str] = Field( description="""Spatial information for non-pixel annotations.""" ) - creation_process: List[AnnotationMethod] = Field( - description="""The process that was followed to create the annotation.""" - ) class AnnotationFileReference(FileReference, AnnotationMixin): @@ -585,9 +620,11 @@ class DerivedImage(AnnotationMixin, AbstractImageMixin): An image that is an annotation of another image. """ - submission_dataset: ImageAnnotationDataset = Field( - description="""The dataset in which image was first submitted to the BIA.""" - ) + # TODO: the fields below are stored in _uuid named fields defined in bia_data_model. + # The fields here will eventually be used to generate endpoints. + # submission_dataset: ImageAnnotationDataset = Field( + # description="""The dataset in which image was first submitted to the BIA.""" + # ) class AnnotationType(str, Enum): @@ -625,8 +662,11 @@ class AnnotationType(str, Enum): # see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model # Need to do this in order to auto-generate the class diagram Contributor.model_rebuild() -DocumentMixin.model_rebuild() Study.model_rebuild() DatasetMixin.model_rebuild() ImageAnnotationDataset.model_rebuild() ExperimentalImagingDataset.model_rebuild() +BioSample.model_rebuild() +SpecimenImagingPrepartionProtocol.model_rebuild() +AnnotationMethod.model_rebuild() +ImageRepresentation.model_rebuild() \ No newline at end of file diff --git a/bia-shared-datamodels/test/test_shared_models.py b/bia-shared-datamodels/test/test_shared_models.py index 40fc0a0d..5ac45334 100644 --- a/bia-shared-datamodels/test/test_shared_models.py +++ b/bia-shared-datamodels/test/test_shared_models.py @@ -2,33 +2,52 @@ import pytest from pydantic import ValidationError from . import utils -from .utils import ( - bia_data_model, - semantic_models, -) +from bia_shared_datamodels import bia_data_model + @pytest.mark.parametrize( - ("expected_model_type", "model_creation_func",), ( - (bia_data_model.Study, utils.get_template_study,), - (bia_data_model.FileReference, utils.get_template_file_reference,), - (bia_data_model.ImageRepresentation, utils.get_template_image_representation,), + "expected_model_type", + "model_creation_func", + ), + ( + ( + bia_data_model.Study, + utils.get_template_study, + ), + ( + bia_data_model.FileReference, + utils.get_template_file_reference, + ), + ( + bia_data_model.ImageRepresentation, + utils.get_template_image_representation, + ), ( bia_data_model.ExperimentalImagingDataset, utils.get_template_experimental_imaging_dataset, ), - (bia_data_model.Specimen, utils.get_template_specimen,), + ( + bia_data_model.Specimen, + utils.get_template_specimen, + ), ( bia_data_model.ExperimentallyCapturedImage, utils.get_template_experimentally_captured_image, ), - (bia_data_model.ImageAcquisition, utils.get_template_image_acquisition,), ( - bia_data_model.SpecimenPrepartionProtocol, - utils.get_template_specimen_preparation_protocol, + bia_data_model.ImageAcquisition, + utils.get_template_image_acquisition, + ), + ( + bia_data_model.SpecimenImagingPrepartionProtocol, + utils.get_template_specimen_imaging_preparation_protocol, + ), + ( + bia_data_model.BioSample, + utils.get_template_biosample, ), - (bia_data_model.BioSample, utils.get_template_biosample,), ( bia_data_model.ImageAnnotationDataset, utils.get_template_image_annotation_dataset, @@ -37,8 +56,18 @@ bia_data_model.AnnotationFileReference, utils.get_template_annotation_file_reference, ), - (bia_data_model.DerivedImage, utils.get_template_derived_image,), - (bia_data_model.AnnotationMethod, utils.get_template_annotation_method,), + ( + bia_data_model.DerivedImage, + utils.get_template_derived_image, + ), + ( + bia_data_model.AnnotationMethod, + utils.get_template_annotation_method, + ), + ( + bia_data_model.SpecimenGrowthProtocol, + utils.get_template_specimen_growth_protocol, + ), ), ) def test_create_models(expected_model_type, model_creation_func): @@ -49,11 +78,20 @@ def test_create_models(expected_model_type, model_creation_func): def test_create_specimen_with_empty_lists_fails(): with pytest.raises(ValidationError): specimen = bia_data_model.Specimen.model_validate( - {"sample_of": [], "preparation_method": [],} + { + "sample_of": [], + "preparation_method": [], + } ) specimen = bia_data_model.Specimen.model_validate( - {"sample_of": [uuid4()], "preparation_method": [],} + { + "sample_of": [uuid4()], + "preparation_method": [], + } ) specimen = bia_data_model.Specimen.model_validate( - {"sample_of": [], "preparation_method": [uuid4()],} + { + "sample_of": [], + "preparation_method": [uuid4()], + } ) diff --git a/bia-shared-datamodels/test/utils.py b/bia-shared-datamodels/test/utils.py index 0a306371..cc3fc54b 100644 --- a/bia-shared-datamodels/test/utils.py +++ b/bia-shared-datamodels/test/utils.py @@ -1,17 +1,13 @@ -"""Utility functions to create models - - This module attempts to create models starting from the outer nodes (leaves) of the - model dependency graph +""" +Utility functions to create models +This module attempts to create models starting from the outer nodes (leaves) of the model dependency graph """ from pathlib import Path -import sys base_path = Path(__file__).parent -sys.path.append(f"{base_path.parent / 'src'}") -sys.path.append(f"{base_path.parent / 'src' / 'bia_models'}") -from bia_models import bia_data_model, semantic_models +from bia_shared_datamodels import bia_data_model, semantic_models from uuid import uuid4 template_taxon = semantic_models.Taxon.model_validate( @@ -46,23 +42,45 @@ def get_template_rendered_view() -> semantic_models.RenderedView: ) -def get_template_specimen_preparation_protocol() -> ( - bia_data_model.SpecimenPrepartionProtocol +def get_template_signal_channel_information() -> ( + semantic_models.SignalChannelInformation +): + return semantic_models.SignalChannelInformation.model_validate( + { + "signal_contrast_mechanism_description": "Test description", + "channel_content_description": "Test description", + "channel_biological_entity": "Test Entity", + } + ) + + +def get_template_specimen_imaging_preparation_protocol() -> ( + bia_data_model.SpecimenImagingPrepartionProtocol ): - specimen_preparation_protocol = ( - bia_data_model.SpecimenPrepartionProtocol.model_validate( + specimen_imaging_preparation_protocol = ( + bia_data_model.SpecimenImagingPrepartionProtocol.model_validate( { "uuid": uuid4(), "title_id": "Test specimen preparation protocol", - "method_description": "Test description", - "signal_contrast_mechanism_description": "Test description", - "growth_protocol_description": "Test description", - "channel_content_description": "Test description", - "channel_biological_entity": "Test Entity", + "protocol_description": "Test description", + "signal_channel_information": [ + get_template_signal_channel_information() + ], } ) ) - return specimen_preparation_protocol + return specimen_imaging_preparation_protocol + + +def get_template_specimen_growth_protocol() -> bia_data_model.SpecimenGrowthProtocol: + specimen_growth_protocol = bia_data_model.SpecimenGrowthProtocol.model_validate( + { + "uuid": uuid4(), + "title_id": "Test specimen preparation protocol", + "protocol_description": "Test description", + } + ) + return specimen_growth_protocol def get_template_biosample() -> bia_data_model.BioSample: @@ -73,7 +91,7 @@ def get_template_biosample() -> bia_data_model.BioSample: "organism_classification": [ template_taxon.model_dump(), ], - "description": "Test biosample description", + "biological_entity_description": "Test biological entity description", "experimental_variable_description": [ "Description of experimental variable", ], @@ -90,29 +108,33 @@ def get_template_biosample() -> bia_data_model.BioSample: # Depends on: # bia_data_model.BioSample -# bia_data_model.SpecimenPreparationProtocol +# bia_data_model.SpecimenImagingPreparationProtocol def get_template_specimen() -> bia_data_model.Specimen: specimen = bia_data_model.Specimen.model_validate( { - "preparation_method": [ - get_template_specimen_preparation_protocol().uuid, + "uuid": uuid4(), + "imaging_preparation_protocol_uuid": [ + get_template_specimen_imaging_preparation_protocol().uuid, ], - "sample_of": [ + "sample_of_uuid": [ get_template_biosample().uuid, ], + "growth_protocol_uuid": [ + get_template_specimen_growth_protocol().uuid, + ], } ) return specimen -# Depends on ExperimentalImagingDataset (circular) +# Depends on ExperimentalImagingDataset def get_template_annotation_method() -> bia_data_model.AnnotationMethod: annotation_method = bia_data_model.AnnotationMethod.model_validate( { "uuid": uuid4(), "title_id": "Template annotation method", "source_dataset": [], # ExperimentalImagingDataset.uuid or url - "method_description": "Template annotation method description", + "protocol_description": "Template annotation method description", "annotation_criteria": "Template annotation criteria", "annotation_coverage": "Template annotation coverage", "method_type": semantic_models.AnnotationType.class_labels, @@ -132,12 +154,9 @@ def get_template_experimentally_captured_image() -> ( return bia_data_model.ExperimentallyCapturedImage.model_validate( { "uuid": uuid4(), - "acquisition_process": [get_template_image_acquisition().uuid], - "representation": [ - get_template_image_representation().uuid, - ], - "submission_dataset": get_template_experimental_imaging_dataset().uuid, - "subject": get_template_specimen(), + "acquisition_process_uuid": [get_template_image_acquisition().uuid], + "submission_dataset_uuid": get_template_experimental_imaging_dataset().uuid, + "subject_uuid": get_template_specimen().uuid, "attribute": {}, } ) @@ -151,14 +170,11 @@ def get_template_derived_image() -> bia_data_model.DerivedImage: derived_image = bia_data_model.DerivedImage.model_validate( { "uuid": uuid4(), - "source_image": [ - get_template_image_representation().uuid, - ], - "submission_dataset": get_template_image_annotation_dataset().uuid, - "creation_process": [get_template_annotation_method().uuid], - "representation": [ + "source_image_uuid": [ get_template_image_representation().uuid, ], + "submission_dataset_uuid": get_template_image_annotation_dataset().uuid, + "creation_process_uuid": [get_template_annotation_method().uuid], "transformation_description": "Template transformation description", "spatial_information": "Template spatial information", "attribute": {}, @@ -168,29 +184,13 @@ def get_template_derived_image() -> bia_data_model.DerivedImage: # Depends on: -# bia_data_model.DerivedImage -# bia_data_model.FileReference (this is a circular dependence!) -# bia_data_model.Study -# bia_data_model.AnnotationFileReference (this is a circular dependence!) # bia_data_model.AnnotationMethod -# -# TODO: Verify that in practice, the Datasets are created then the -# FileReference instances are added. So here we have empty lists -# for the dataset def get_template_image_annotation_dataset() -> bia_data_model.ImageAnnotationDataset: image_annotation_dataset = bia_data_model.ImageAnnotationDataset.model_validate( { "uuid": uuid4(), + "submitted_in_study_uuid": get_template_study().uuid, "title_id": "Template image annotation dataset", - "image": [ - get_template_image_representation().uuid, - ], - "file": [], # This should be a list of FileReference UUIDs ... - "annotation_file": [], # This should be a list of AnnotationFileReference UUIDs ... - "submitted_in_study": get_template_study().uuid, - "annotation_method": [get_template_annotation_method().uuid], - "file_reference_count": 0, - "image_count": 0, "example_image_uri": ["https://dummy.url.org"], } ) @@ -202,9 +202,9 @@ def get_template_image_acquisition() -> bia_data_model.ImageAcquisition: { "uuid": uuid4(), "title_id": "Template image acquisition", - "method_description": "Template method description", + "protocol_description": "Template method description", "imaging_instrument_description": "Template imaging instrument", - "image_acquisition_parameters": "Template image acquisition parameters", + "imaging_method_name": "Template imaging method name", "fbbi_id": [ "Test FBBI ID", ], @@ -216,7 +216,7 @@ def get_template_image_acquisition() -> bia_data_model.ImageAcquisition: def get_template_image_analysis_method() -> semantic_models.ImageAnalysisMethod: return semantic_models.ImageAnalysisMethod.model_validate( { - "method_description": "Template Analysis method", + "protocol_description": "Template Analysis method", "features_analysed": "Template features analysed", } ) @@ -225,7 +225,7 @@ def get_template_image_analysis_method() -> semantic_models.ImageAnalysisMethod: def get_template_image_correlation_method() -> semantic_models.ImageCorrelationMethod: return semantic_models.ImageCorrelationMethod.model_validate( { - "method_description": "Template Analysis method", + "protocol_description": "Template Analysis method", "fiducials_used": "Template fiducials used", "transformation_matrix": "Template transformation matrix", } @@ -233,97 +233,85 @@ def get_template_image_correlation_method() -> semantic_models.ImageCorrelationM # Depends on: -# bia_data_model.ExperimentallyCapturedImage -# bia_data_model.FileReference (this is a circular dependence!) -# bia_data_model.Study # bia_data_model.SpecimenPreparationProtocol # bia_data_model.ImageAcquisition # bia_data_model.BioSample -# -# TODO: Verify that in practice, the Datasets are created then the -# FileReference instances are added. So here we have empty lists -# for the dataset +# bia_data_model.SpecimenGrowthProtocol def get_template_experimental_imaging_dataset() -> ( bia_data_model.ExperimentalImagingDataset ): - experimental_imaging_dataset = bia_data_model.ExperimentalImagingDataset.model_validate( - { - "uuid": uuid4(), - "title_id": "Template experimental image dataset", - "image": [], # This should be a list of Experimentally captured image UUIDs - "file": [], # This should be a list of FileReference UUIDs ... - "submitted_in_study": get_template_study().uuid, - "specimen_preparation_method": [ - get_template_specimen_preparation_protocol().uuid, - ], - "acquisition_method": [ - get_template_image_acquisition().uuid, - ], - "biological_entity": [ - get_template_biosample().uuid, - ], - "analysis_method": [ - get_template_image_analysis_method().model_dump(), - ], - "correlation_method": [ - get_template_image_correlation_method().model_dump(), - ], - "file_reference_count": 0, - "image_count": 0, - "example_image_uri": ["https://dummy.url.org"], - } + experimental_imaging_dataset = ( + bia_data_model.ExperimentalImagingDataset.model_validate( + { + "uuid": uuid4(), + "submitted_in_study_uuid": get_template_study().uuid, + "title_id": "Template experimental image dataset", + "analysis_method": [ + get_template_image_analysis_method().model_dump(), + ], + "correlation_method": [ + get_template_image_correlation_method().model_dump(), + ], + "example_image_uri": ["https://dummy.url.org"], + } + ) ) return experimental_imaging_dataset # Depends on: -# bia_data_model.ImageAnnotationDataset (circular) +# bia_data_model.ImageAnnotationDataset # bia_data_model.ExperimentalImagingDataset (circular) def get_template_annotation_file_reference() -> bia_data_model.AnnotationFileReference: return bia_data_model.AnnotationFileReference.model_validate( { "uuid": uuid4(), - "file_name": "Dummy file name", + "file_path": "Dummy file path", "format": "Dummy format", "size_in_bytes": 10, "uri": "https://dummy.uri.co", "attribute": {}, - "submission_dataset": get_template_image_annotation_dataset().uuid, - "source_image": [ + "submission_dataset_uuid": get_template_image_annotation_dataset().uuid, + "source_image_uuid": [ get_template_image_representation().uuid, ], "transformation_description": "Template transformation description", "spatial_information": "Template spatial information", - "creation_process": [get_template_annotation_method().uuid], + "creation_process_uuid": [get_template_annotation_method().uuid], } ) # Depends on: -# bia_data_model.ImageAnnotationDataset (circular) -# bia_data_model.ExperimentalImagingDataset (circular) +# bia_data_model.ImageAnnotationDataset +# or +# bia_data_model.ExperimentalImagingDataset +# the latter is tested here. def get_template_file_reference() -> bia_data_model.FileReference: file_reference = bia_data_model.FileReference.model_validate( { "uuid": uuid4(), - "file_name": "Dummy file name", + "file_path": "Dummy file path", "format": "Dummy format", "size_in_bytes": 10, "uri": "https://dummy.uri.co", "attribute": {}, - "submission_dataset": get_template_experimental_imaging_dataset().uuid, + "submission_dataset_uuid": get_template_experimental_imaging_dataset().uuid, + "submission_dataset_type": bia_data_model.DatasetType.ExperimentalImagingDataset } ) return file_reference # Depends on: -# bia_data_model.FileReference ( +# bia_data_model.FileReference def get_template_image_representation() -> bia_data_model.ImageRepresentation: return bia_data_model.ImageRepresentation.model_validate( { "uuid": uuid4(), - "original_file_reference": [ + "representation_of_uuid": get_template_experimentally_captured_image().uuid, + "abstract_image_type": bia_data_model.AbstractImageType.DerivedImage, + "original_file_reference_uuid": [ get_template_file_reference().uuid, ], "image_format": "Template image format", @@ -386,7 +374,6 @@ def get_template_study() -> bia_data_model.Study: "licence": semantic_models.LicenceType.CC0, "attribute": {}, "related_publication": [], - # From DocumentMixin "author": [ contributor.model_dump(), ], @@ -396,7 +383,6 @@ def get_template_study() -> bia_data_model.Study: "Template keyword1", "Template keyword2", ], - # Defined in study "experimental_imaging_component": [ uuid4(), ],