From b359ac0cccc33afa452a7b56c28bfd6a8e6af2c7 Mon Sep 17 00:00:00 2001 From: Francois Date: Wed, 24 Jul 2024 14:07:53 +0100 Subject: [PATCH] disallowed extra fields in models, and updated ingest and export code to handle these --- bia-export/bia_export/website_models.py | 6 +- .../47a4ab60-c76d-4424-bfaa-c2a024de720c.json | 16 +---- .../test/input_data/studies/S-BIADTEST.json | 1 - bia-export/test/output_data/bia_export.json | 4 +- .../conversion/annotation_method.py | 5 +- .../bia_ingest_sm/conversion/biosample.py | 4 +- .../experimental_imaging_dataset.py | 6 +- .../conversion/file_reference.py | 2 + .../conversion/image_acquisition.py | 4 +- .../conversion/specimen_growth_protocol.py | 5 +- .../specimen_imaging_preparation_protocol.py | 5 +- .../bia_ingest_sm/conversion/study.py | 1 - .../bia_ingest_sm/conversion/utils.py | 8 ++- bia-ingest-shared-models/test/utils.py | 64 +++++-------------- .../bia_shared_datamodels/bia_data_model.py | 8 ++- 15 files changed, 63 insertions(+), 76 deletions(-) diff --git a/bia-export/bia_export/website_models.py b/bia-export/bia_export/website_models.py index 8927da8a..6c7bb75c 100644 --- a/bia-export/bia_export/website_models.py +++ b/bia-export/bia_export/website_models.py @@ -1,11 +1,11 @@ from __future__ import annotations from pydantic import Field from typing import List, Optional -from bia_shared_datamodels import bia_data_model, semantic_models +from bia_shared_datamodels import bia_data_model -class Study(semantic_models.Study, bia_data_model.DocumentMixin): +class Study(bia_data_model.Study): experimental_imaging_component: Optional[List[ExperimentalImagingDataset]] = Field(default_factory=list, description="""A dataset of that is associated with the study.""") -class ExperimentalImagingDataset(semantic_models.ExperimentalImagingDataset, bia_data_model.DocumentMixin): +class ExperimentalImagingDataset(bia_data_model.ExperimentalImagingDataset): pass \ No newline at end of file diff --git a/bia-export/test/input_data/experimental_imaging_datasets/S-BIADTEST/47a4ab60-c76d-4424-bfaa-c2a024de720c.json b/bia-export/test/input_data/experimental_imaging_datasets/S-BIADTEST/47a4ab60-c76d-4424-bfaa-c2a024de720c.json index 7d62a574..d1fece0f 100644 --- a/bia-export/test/input_data/experimental_imaging_datasets/S-BIADTEST/47a4ab60-c76d-4424-bfaa-c2a024de720c.json +++ b/bia-export/test/input_data/experimental_imaging_datasets/S-BIADTEST/47a4ab60-c76d-4424-bfaa-c2a024de720c.json @@ -1,26 +1,14 @@ { "title_id": "Study Component 1", "uuid": "47a4ab60-c76d-4424-bfaa-c2a024de720c", - "file_reference_count": 4, "description": "Description of study component 1", - "acquisition_process": [ - "c2e44a1b-a43c-476e-8ddf-8587f4c955b3" - ], - "specimen_imaging_preparation_protocol": [ - "7199d730-29f1-4ad8-b599-e9089cbb2d7b" - ], - "biological_entity": [ - "64a67727-4e7c-469a-91c4-6219ae072e99", - "6950718c-4917-47a1-a807-11b874e80a23" - ], - "specimen_growth_protocol": [], "analysis_method": [ { "protocol_description": "Test image analysis", "features_analysed": "Test image analysis overview" } ], + "submitted_in_study_uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71", "correlation_method": [], - "example_image_uri": [], - "image_count": 0 + "example_image_uri": [] } \ No newline at end of file diff --git a/bia-export/test/input_data/studies/S-BIADTEST.json b/bia-export/test/input_data/studies/S-BIADTEST.json index 02c9edc1..1a603344 100644 --- a/bia-export/test/input_data/studies/S-BIADTEST.json +++ b/bia-export/test/input_data/studies/S-BIADTEST.json @@ -70,6 +70,5 @@ } ], "funding_statement": "This work was funded by the EBI", - "annotation_component": [], "attribute": {} } \ No newline at end of file diff --git a/bia-export/test/output_data/bia_export.json b/bia-export/test/output_data/bia_export.json index b74268d8..4924c7b2 100644 --- a/bia-export/test/output_data/bia_export.json +++ b/bia-export/test/output_data/bia_export.json @@ -73,6 +73,7 @@ "attribute": {}, "experimental_imaging_component": [ { + "title_id": "Study Component 1", "uuid": "47a4ab60-c76d-4424-bfaa-c2a024de720c", "description": "Description of study component 1", "analysis_method": [ @@ -82,7 +83,8 @@ } ], "correlation_method": [], - "example_image_uri": [] + "example_image_uri": [], + "submitted_in_study_uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71" } ] } \ No newline at end of file diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/annotation_method.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/annotation_method.py index 67628f65..6f6845b6 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/annotation_method.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/annotation_method.py @@ -4,7 +4,8 @@ dicts_to_api_models, find_sections_recursive, dict_to_uuid, - persist + persist, + filter_model_dictionary ) from ..biostudies import ( Submission, @@ -52,6 +53,8 @@ def extract_annotation_method_dicts(submission: Submission) -> List[Dict[str, An model_dict["accno"] = section.__dict__.get("accno", "") model_dict["accession_id"] = submission.accno model_dict["uuid"] = generate_annotation_method_uuid(model_dict) + model_dict = filter_model_dictionary(model_dict, bia_data_model.AnnotationMethod) + model_dicts.append(model_dict) return model_dicts diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/biosample.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/biosample.py index b5de7a8b..d14a5188 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/biosample.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/biosample.py @@ -4,7 +4,8 @@ dicts_to_api_models, find_sections_recursive, dict_to_uuid, - persist + persist, + filter_model_dictionary ) from ..biostudies import ( Submission, @@ -74,6 +75,7 @@ def extract_biosample_dicts(submission: Submission) -> List[Dict[str, Any]]: model_dict["accession_id"] = submission.accno model_dict["uuid"] = generate_biosample_uuid(model_dict) + model_dict = filter_model_dictionary(model_dict, bia_data_model.BioSample) model_dicts.append(model_dict) return model_dicts diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/experimental_imaging_dataset.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/experimental_imaging_dataset.py index 356f792e..f16e6b0f 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/experimental_imaging_dataset.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/experimental_imaging_dataset.py @@ -5,7 +5,8 @@ get_generic_section_as_list, dict_to_uuid, get_generic_section_as_dict, - persist + persist, + filter_model_dictionary ) from .file_reference import get_file_reference_by_study_component import bia_ingest_sm.conversion.biosample as biosample_conversion @@ -97,6 +98,9 @@ def get_experimental_imaging_dataset( "example_image_uri": [], } model_dict["uuid"] = generate_experimental_imaging_dataset_uuid(model_dict) + + model_dict = filter_model_dictionary(model_dict, bia_data_model.ExperimentalImagingDataset) + experimental_imaging_dataset.append( bia_data_model.ExperimentalImagingDataset.model_validate(model_dict) ) diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/file_reference.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/file_reference.py index d1b921ef..7c06e47b 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/file_reference.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/file_reference.py @@ -3,6 +3,7 @@ from typing import List, Dict from .utils import ( dict_to_uuid, + filter_model_dictionary, ) from ..biostudies import ( Submission, @@ -56,6 +57,7 @@ def get_file_reference_by_study_component( file_dict["submission_dataset"] = fileref_uuid file_dict["format"] = f.type file_dict["attribute"] = attributes_to_dict(f.attributes) + file_dict = filter_model_dictionary(file_dict, bia_data_model.FileReference) file_reference = bia_data_model.FileReference.model_validate(file_dict) output_path = output_dir / f"{fileref_uuid}.json" output_path.write_text(file_reference.model_dump_json(indent=2)) diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/image_acquisition.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/image_acquisition.py index c78d2f9a..07321f13 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/image_acquisition.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/image_acquisition.py @@ -4,7 +4,8 @@ dicts_to_api_models, find_sections_recursive, dict_to_uuid, - persist + persist, + filter_model_dictionary, ) from ..biostudies import ( Submission, @@ -51,6 +52,7 @@ def extract_image_acquisition_dicts(submission: Submission) -> List[Dict[str, An model_dict["accno"] = section.__dict__.get("accno", "") model_dict["accession_id"] = submission.accno model_dict["uuid"] = generate_image_acquisition_uuid(model_dict) + model_dict = filter_model_dictionary(model_dict, bia_data_model.ImageAcquisition) model_dicts.append(model_dict) return model_dicts diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_growth_protocol.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_growth_protocol.py index f5e4b4a3..9c0457d1 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_growth_protocol.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_growth_protocol.py @@ -4,7 +4,8 @@ dicts_to_api_models, find_sections_recursive, dict_to_uuid, - persist + persist, + filter_model_dictionary ) from ..biostudies import ( Submission, @@ -46,6 +47,8 @@ def extract_specimen_growth_protocol_dicts(submission: Submission) -> List[Dict[ model_dict["accno"] = section.__dict__.get("accno", "") model_dict["accession_id"] = submission.accno model_dict["uuid"] = generate_specimen_growth_protocol_uuid(model_dict) + model_dict = filter_model_dictionary(model_dict, bia_data_model.SpecimenGrowthProtocol) + model_dicts.append(model_dict) return model_dicts diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_imaging_preparation_protocol.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_imaging_preparation_protocol.py index d0aa304a..1841a3c4 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_imaging_preparation_protocol.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/specimen_imaging_preparation_protocol.py @@ -4,7 +4,8 @@ dicts_to_api_models, find_sections_recursive, dict_to_uuid, - persist + persist, + filter_model_dictionary ) from ..biostudies import ( Submission, @@ -49,6 +50,8 @@ def extract_specimen_preparation_protocol_dicts(submission: Submission) -> List[ model_dict["accno"] = section.__dict__.get("accno", "") model_dict["accession_id"] = submission.accno model_dict["uuid"] = generate_specimen_imaging_preparation_uuid(model_dict) + model_dict = filter_model_dictionary(model_dict, bia_data_model.SpecimenImagingPrepartionProtocol) + model_dicts.append(model_dict) return model_dicts diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/study.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/study.py index 64745831..ab98aab9 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/study.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/study.py @@ -65,7 +65,6 @@ def get_study( "author": [c.model_dump() for c in contributors], "grant": [g.model_dump() for g in grants], "attribute": study_attributes, - "annotation_component": [], } # study_uuid = dict_to_uuid(study_dict, ["accession_id",]) # study_dict["uuid"] = study_uuid diff --git a/bia-ingest-shared-models/bia_ingest_sm/conversion/utils.py b/bia-ingest-shared-models/bia_ingest_sm/conversion/utils.py index 3c49fe68..e920e787 100644 --- a/bia-ingest-shared-models/bia_ingest_sm/conversion/utils.py +++ b/bia-ingest-shared-models/bia_ingest_sm/conversion/utils.py @@ -150,4 +150,10 @@ def persist(object_list: List, object_path: str, sumbission_accno: str): for object in object_list: output_path = output_dir / f"{object.uuid}.json" output_path.write_text(object.model_dump_json(indent=2)) - logger.info(f"Written {output_path}") \ No newline at end of file + logger.info(f"Written {output_path}") + + +def filter_model_dictionary(dictionary: dict, target_model: Type[BaseModel]): + accepted_fields = target_model.model_fields.keys() + result_dict = {key: dictionary[key] for key in accepted_fields} + return result_dict \ No newline at end of file diff --git a/bia-ingest-shared-models/test/utils.py b/bia-ingest-shared-models/test/utils.py index d6abc8ac..f7342042 100644 --- a/bia-ingest-shared-models/test/utils.py +++ b/bia-ingest-shared-models/test/utils.py @@ -6,8 +6,10 @@ from typing import Dict, List from bia_shared_datamodels import bia_data_model, semantic_models -from bia_ingest_sm.conversion.utils import dict_to_uuid - +from bia_ingest_sm.conversion.utils import ( + dict_to_uuid, + filter_model_dictionary +) def get_test_annotation_method() -> List[bia_data_model.AnnotationMethod]: # For UUID @@ -21,7 +23,7 @@ def get_test_annotation_method() -> List[bia_data_model.AnnotationMethod]: "method_type", "source_dataset", ] - protocol_info = [ + annotation_method_info = [ { "accno": "Annotations-29", "accession_id": "S-BIADTEST", @@ -34,47 +36,12 @@ def get_test_annotation_method() -> List[bia_data_model.AnnotationMethod]: }, ] - protocol = [] - for protocol_dict in protocol_info: - protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider) - protocol.append(bia_data_model.AnnotationMethod.model_validate(protocol_dict)) - return protocol - - -def get_test_specimen_growth_protocol() -> List[bia_data_model.ImageAcquisition]: - # For UUID - attributes_to_consider = [ - "accession_id", - "accno", - "title_id", - "protocol_description", - ] - protocol_info = [ - { - "accno": "Image acquisition-3", - "accession_id": "S-BIADTEST", - "title_id": "Test Primary Screen Image Acquisition", - "protocol_description": "Test image acquisition parameters 1", - "imaging_instrument_description": "Test imaging instrument 1", - "imaging_method_name": "confocal microscopy", - "fbbi_id": [], - }, - { - "accno": "Image acquisition-7", - "accession_id": "S-BIADTEST", - "title_id": "Test Secondary Screen Image Acquisition", - "protocol_description": "Test image acquisition parameters 2", - "imaging_instrument_description": "Test imaging instrument 2", - "imaging_method_name": "fluorescence microscopy", - "fbbi_id": [], - }, - ] - - protocol = [] - for protocol_dict in protocol_info: - protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider) - protocol.append(bia_data_model.ImageAcquisition.model_validate(protocol_dict)) - return protocol + annotation_method = [] + for annotation_method_dict in annotation_method_info: + annotation_method_dict["uuid"] = dict_to_uuid(annotation_method_dict, attributes_to_consider) + annotation_method_dict = filter_model_dictionary(annotation_method_dict, bia_data_model.AnnotationMethod) + annotation_method.append(bia_data_model.AnnotationMethod.model_validate(annotation_method_dict)) + return annotation_method def get_test_specimen_growth_protocol() -> List[bia_data_model.SpecimenGrowthProtocol]: @@ -103,6 +70,7 @@ def get_test_specimen_growth_protocol() -> List[bia_data_model.SpecimenGrowthPro protocol = [] for protocol_dict in protocol_info: protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider) + protocol_dict = filter_model_dictionary(protocol_dict, bia_data_model.SpecimenGrowthProtocol) protocol.append( bia_data_model.SpecimenGrowthProtocol.model_validate(protocol_dict) ) @@ -139,6 +107,7 @@ def get_test_specimen_imaging_preparation_protocol() -> ( protocol = [] for protocol_dict in protocol_info: protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider) + protocol_dict = filter_model_dictionary(protocol_dict, bia_data_model.SpecimenImagingPrepartionProtocol) protocol.append( bia_data_model.SpecimenImagingPrepartionProtocol.model_validate(protocol_dict) ) @@ -213,6 +182,7 @@ def get_test_biosample() -> List[bia_data_model.BioSample]: biosample = [] for biosample_dict in biosample_info: biosample_dict["uuid"] = dict_to_uuid(biosample_dict, attributes_to_consider) + biosample_dict = filter_model_dictionary(biosample_dict, bia_data_model.BioSample) biosample.append(bia_data_model.BioSample.model_validate(biosample_dict)) return biosample @@ -252,6 +222,7 @@ def get_test_image_acquisition() -> List[bia_data_model.ImageAcquisition]: image_acquisition_dict["uuid"] = dict_to_uuid( image_acquisition_dict, attributes_to_consider ) + image_acquisition_dict = filter_model_dictionary(image_acquisition_dict, bia_data_model.ImageAcquisition) image_acquisition.append( bia_data_model.ImageAcquisition.model_validate(image_acquisition_dict) ) @@ -310,6 +281,7 @@ def get_test_experimental_imaging_dataset() -> ( ], ) experimental_imaging_dataset_dict["uuid"] = experimental_imaging_dataset_uuid + experimental_imaging_dataset_dict = filter_model_dictionary(experimental_imaging_dataset_dict, bia_data_model.ExperimentalImagingDataset) experimental_imaging_dataset1 = ( bia_data_model.ExperimentalImagingDataset.model_validate( experimental_imaging_dataset_dict @@ -523,10 +495,6 @@ def get_test_study() -> bia_data_model.Study: "Test keyword3", ], "grant": [g.model_dump() for g in grant], - "experimental_imaging_component": [ - e.uuid for e in get_test_experimental_imaging_dataset() - ], - "annotation_component": [], } study_uuid = dict_to_uuid( study_dict, diff --git a/bia-shared-datamodels/src/bia_shared_datamodels/bia_data_model.py b/bia-shared-datamodels/src/bia_shared_datamodels/bia_data_model.py index 730d7374..48f785d0 100644 --- a/bia-shared-datamodels/src/bia_shared_datamodels/bia_data_model.py +++ b/bia-shared-datamodels/src/bia_shared_datamodels/bia_data_model.py @@ -1,13 +1,19 @@ from __future__ import annotations from . import semantic_models -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, ConfigDict from typing import List, Optional from uuid import UUID from enum import Enum + + class DocumentMixin(BaseModel): + + # Throw error if you try to validate/create model from a dictionary with keys that aren't a field in the model + model_config = ConfigDict(extra="forbid") + uuid: UUID = Field( description="""Unique ID (across the BIA database) used to refer to and identify a document.""" )