Skip to content

Commit

Permalink
disallowed extra fields in models, and updated ingest and export code…
Browse files Browse the repository at this point in the history
… to handle these (#127)
  • Loading branch information
sherwoodf authored Jul 29, 2024
1 parent df16b96 commit a57842b
Show file tree
Hide file tree
Showing 15 changed files with 63 additions and 76 deletions.
6 changes: 3 additions & 3 deletions bia-export/bia_export/website_models.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import annotations
from pydantic import Field
from typing import List, Optional
from bia_shared_datamodels import bia_data_model, semantic_models
from bia_shared_datamodels import bia_data_model


class Study(semantic_models.Study, bia_data_model.DocumentMixin):
class Study(bia_data_model.Study):
experimental_imaging_component: Optional[List[ExperimentalImagingDataset]] = Field(default_factory=list, description="""A dataset of that is associated with the study.""")

class ExperimentalImagingDataset(semantic_models.ExperimentalImagingDataset, bia_data_model.DocumentMixin):
class ExperimentalImagingDataset(bia_data_model.ExperimentalImagingDataset):
pass
Original file line number Diff line number Diff line change
@@ -1,26 +1,14 @@
{
"title_id": "Study Component 1",
"uuid": "47a4ab60-c76d-4424-bfaa-c2a024de720c",
"file_reference_count": 4,
"description": "Description of study component 1",
"acquisition_process": [
"c2e44a1b-a43c-476e-8ddf-8587f4c955b3"
],
"specimen_imaging_preparation_protocol": [
"7199d730-29f1-4ad8-b599-e9089cbb2d7b"
],
"biological_entity": [
"64a67727-4e7c-469a-91c4-6219ae072e99",
"6950718c-4917-47a1-a807-11b874e80a23"
],
"specimen_growth_protocol": [],
"analysis_method": [
{
"protocol_description": "Test image analysis",
"features_analysed": "Test image analysis overview"
}
],
"submitted_in_study_uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71",
"correlation_method": [],
"example_image_uri": [],
"image_count": 0
"example_image_uri": []
}
1 change: 0 additions & 1 deletion bia-export/test/input_data/studies/S-BIADTEST.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,5 @@
}
],
"funding_statement": "This work was funded by the EBI",
"annotation_component": [],
"attribute": {}
}
4 changes: 3 additions & 1 deletion bia-export/test/output_data/bia_export.json
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
"attribute": {},
"experimental_imaging_component": [
{
"title_id": "Study Component 1",
"uuid": "47a4ab60-c76d-4424-bfaa-c2a024de720c",
"description": "Description of study component 1",
"analysis_method": [
Expand All @@ -82,7 +83,8 @@
}
],
"correlation_method": [],
"example_image_uri": []
"example_image_uri": [],
"submitted_in_study_uuid": "a2fdbd58-ee11-4cd9-bc6a-f3d3da7fff71"
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
dicts_to_api_models,
find_sections_recursive,
dict_to_uuid,
persist
persist,
filter_model_dictionary
)
from ..biostudies import (
Submission,
Expand Down Expand Up @@ -52,6 +53,8 @@ def extract_annotation_method_dicts(submission: Submission) -> List[Dict[str, An
model_dict["accno"] = section.__dict__.get("accno", "")
model_dict["accession_id"] = submission.accno
model_dict["uuid"] = generate_annotation_method_uuid(model_dict)
model_dict = filter_model_dictionary(model_dict, bia_data_model.AnnotationMethod)

model_dicts.append(model_dict)

return model_dicts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
dicts_to_api_models,
find_sections_recursive,
dict_to_uuid,
persist
persist,
filter_model_dictionary
)
from ..biostudies import (
Submission,
Expand Down Expand Up @@ -74,6 +75,7 @@ def extract_biosample_dicts(submission: Submission) -> List[Dict[str, Any]]:

model_dict["accession_id"] = submission.accno
model_dict["uuid"] = generate_biosample_uuid(model_dict)
model_dict = filter_model_dictionary(model_dict, bia_data_model.BioSample)
model_dicts.append(model_dict)

return model_dicts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
get_generic_section_as_list,
dict_to_uuid,
get_generic_section_as_dict,
persist
persist,
filter_model_dictionary
)
from .file_reference import get_file_reference_by_study_component
import bia_ingest_sm.conversion.biosample as biosample_conversion
Expand Down Expand Up @@ -97,6 +98,9 @@ def get_experimental_imaging_dataset(
"example_image_uri": [],
}
model_dict["uuid"] = generate_experimental_imaging_dataset_uuid(model_dict)

model_dict = filter_model_dictionary(model_dict, bia_data_model.ExperimentalImagingDataset)

experimental_imaging_dataset.append(
bia_data_model.ExperimentalImagingDataset.model_validate(model_dict)
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import List, Dict
from .utils import (
dict_to_uuid,
filter_model_dictionary,
)
from ..biostudies import (
Submission,
Expand Down Expand Up @@ -56,6 +57,7 @@ def get_file_reference_by_study_component(
file_dict["submission_dataset"] = fileref_uuid
file_dict["format"] = f.type
file_dict["attribute"] = attributes_to_dict(f.attributes)
file_dict = filter_model_dictionary(file_dict, bia_data_model.FileReference)
file_reference = bia_data_model.FileReference.model_validate(file_dict)
output_path = output_dir / f"{fileref_uuid}.json"
output_path.write_text(file_reference.model_dump_json(indent=2))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
dicts_to_api_models,
find_sections_recursive,
dict_to_uuid,
persist
persist,
filter_model_dictionary,
)
from ..biostudies import (
Submission,
Expand Down Expand Up @@ -51,6 +52,7 @@ def extract_image_acquisition_dicts(submission: Submission) -> List[Dict[str, An
model_dict["accno"] = section.__dict__.get("accno", "")
model_dict["accession_id"] = submission.accno
model_dict["uuid"] = generate_image_acquisition_uuid(model_dict)
model_dict = filter_model_dictionary(model_dict, bia_data_model.ImageAcquisition)
model_dicts.append(model_dict)

return model_dicts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
dicts_to_api_models,
find_sections_recursive,
dict_to_uuid,
persist
persist,
filter_model_dictionary
)
from ..biostudies import (
Submission,
Expand Down Expand Up @@ -46,6 +47,8 @@ def extract_specimen_growth_protocol_dicts(submission: Submission) -> List[Dict[
model_dict["accno"] = section.__dict__.get("accno", "")
model_dict["accession_id"] = submission.accno
model_dict["uuid"] = generate_specimen_growth_protocol_uuid(model_dict)
model_dict = filter_model_dictionary(model_dict, bia_data_model.SpecimenGrowthProtocol)

model_dicts.append(model_dict)

return model_dicts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
dicts_to_api_models,
find_sections_recursive,
dict_to_uuid,
persist
persist,
filter_model_dictionary
)
from ..biostudies import (
Submission,
Expand Down Expand Up @@ -49,6 +50,8 @@ def extract_specimen_preparation_protocol_dicts(submission: Submission) -> List[
model_dict["accno"] = section.__dict__.get("accno", "")
model_dict["accession_id"] = submission.accno
model_dict["uuid"] = generate_specimen_imaging_preparation_uuid(model_dict)
model_dict = filter_model_dictionary(model_dict, bia_data_model.SpecimenImagingPrepartionProtocol)

model_dicts.append(model_dict)

return model_dicts
Expand Down
1 change: 0 additions & 1 deletion bia-ingest-shared-models/bia_ingest_sm/conversion/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ def get_study(
"author": [c.model_dump() for c in contributors],
"grant": [g.model_dump() for g in grants],
"attribute": study_attributes,
"annotation_component": [],
}
# study_uuid = dict_to_uuid(study_dict, ["accession_id",])
# study_dict["uuid"] = study_uuid
Expand Down
8 changes: 7 additions & 1 deletion bia-ingest-shared-models/bia_ingest_sm/conversion/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,10 @@ def persist(object_list: List, object_path: str, sumbission_accno: str):
for object in object_list:
output_path = output_dir / f"{object.uuid}.json"
output_path.write_text(object.model_dump_json(indent=2))
logger.info(f"Written {output_path}")
logger.info(f"Written {output_path}")


def filter_model_dictionary(dictionary: dict, target_model: Type[BaseModel]):
accepted_fields = target_model.model_fields.keys()
result_dict = {key: dictionary[key] for key in accepted_fields}
return result_dict
64 changes: 16 additions & 48 deletions bia-ingest-shared-models/test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@

from typing import Dict, List
from bia_shared_datamodels import bia_data_model, semantic_models
from bia_ingest_sm.conversion.utils import dict_to_uuid

from bia_ingest_sm.conversion.utils import (
dict_to_uuid,
filter_model_dictionary
)

def get_test_annotation_method() -> List[bia_data_model.AnnotationMethod]:
# For UUID
Expand All @@ -21,7 +23,7 @@ def get_test_annotation_method() -> List[bia_data_model.AnnotationMethod]:
"method_type",
"source_dataset",
]
protocol_info = [
annotation_method_info = [
{
"accno": "Annotations-29",
"accession_id": "S-BIADTEST",
Expand All @@ -34,47 +36,12 @@ def get_test_annotation_method() -> List[bia_data_model.AnnotationMethod]:
},
]

protocol = []
for protocol_dict in protocol_info:
protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider)
protocol.append(bia_data_model.AnnotationMethod.model_validate(protocol_dict))
return protocol


def get_test_specimen_growth_protocol() -> List[bia_data_model.ImageAcquisition]:
# For UUID
attributes_to_consider = [
"accession_id",
"accno",
"title_id",
"protocol_description",
]
protocol_info = [
{
"accno": "Image acquisition-3",
"accession_id": "S-BIADTEST",
"title_id": "Test Primary Screen Image Acquisition",
"protocol_description": "Test image acquisition parameters 1",
"imaging_instrument_description": "Test imaging instrument 1",
"imaging_method_name": "confocal microscopy",
"fbbi_id": [],
},
{
"accno": "Image acquisition-7",
"accession_id": "S-BIADTEST",
"title_id": "Test Secondary Screen Image Acquisition",
"protocol_description": "Test image acquisition parameters 2",
"imaging_instrument_description": "Test imaging instrument 2",
"imaging_method_name": "fluorescence microscopy",
"fbbi_id": [],
},
]

protocol = []
for protocol_dict in protocol_info:
protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider)
protocol.append(bia_data_model.ImageAcquisition.model_validate(protocol_dict))
return protocol
annotation_method = []
for annotation_method_dict in annotation_method_info:
annotation_method_dict["uuid"] = dict_to_uuid(annotation_method_dict, attributes_to_consider)
annotation_method_dict = filter_model_dictionary(annotation_method_dict, bia_data_model.AnnotationMethod)
annotation_method.append(bia_data_model.AnnotationMethod.model_validate(annotation_method_dict))
return annotation_method


def get_test_specimen_growth_protocol() -> List[bia_data_model.SpecimenGrowthProtocol]:
Expand Down Expand Up @@ -103,6 +70,7 @@ def get_test_specimen_growth_protocol() -> List[bia_data_model.SpecimenGrowthPro
protocol = []
for protocol_dict in protocol_info:
protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider)
protocol_dict = filter_model_dictionary(protocol_dict, bia_data_model.SpecimenGrowthProtocol)
protocol.append(
bia_data_model.SpecimenGrowthProtocol.model_validate(protocol_dict)
)
Expand Down Expand Up @@ -139,6 +107,7 @@ def get_test_specimen_imaging_preparation_protocol() -> (
protocol = []
for protocol_dict in protocol_info:
protocol_dict["uuid"] = dict_to_uuid(protocol_dict, attributes_to_consider)
protocol_dict = filter_model_dictionary(protocol_dict, bia_data_model.SpecimenImagingPrepartionProtocol)
protocol.append(
bia_data_model.SpecimenImagingPrepartionProtocol.model_validate(protocol_dict)
)
Expand Down Expand Up @@ -213,6 +182,7 @@ def get_test_biosample() -> List[bia_data_model.BioSample]:
biosample = []
for biosample_dict in biosample_info:
biosample_dict["uuid"] = dict_to_uuid(biosample_dict, attributes_to_consider)
biosample_dict = filter_model_dictionary(biosample_dict, bia_data_model.BioSample)
biosample.append(bia_data_model.BioSample.model_validate(biosample_dict))
return biosample

Expand Down Expand Up @@ -252,6 +222,7 @@ def get_test_image_acquisition() -> List[bia_data_model.ImageAcquisition]:
image_acquisition_dict["uuid"] = dict_to_uuid(
image_acquisition_dict, attributes_to_consider
)
image_acquisition_dict = filter_model_dictionary(image_acquisition_dict, bia_data_model.ImageAcquisition)
image_acquisition.append(
bia_data_model.ImageAcquisition.model_validate(image_acquisition_dict)
)
Expand Down Expand Up @@ -310,6 +281,7 @@ def get_test_experimental_imaging_dataset() -> (
],
)
experimental_imaging_dataset_dict["uuid"] = experimental_imaging_dataset_uuid
experimental_imaging_dataset_dict = filter_model_dictionary(experimental_imaging_dataset_dict, bia_data_model.ExperimentalImagingDataset)
experimental_imaging_dataset1 = (
bia_data_model.ExperimentalImagingDataset.model_validate(
experimental_imaging_dataset_dict
Expand Down Expand Up @@ -523,10 +495,6 @@ def get_test_study() -> bia_data_model.Study:
"Test keyword3",
],
"grant": [g.model_dump() for g in grant],
"experimental_imaging_component": [
e.uuid for e in get_test_experimental_imaging_dataset()
],
"annotation_component": [],
}
study_uuid = dict_to_uuid(
study_dict,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
from __future__ import annotations

from . import semantic_models
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, ConfigDict
from typing import List, Optional
from uuid import UUID
from enum import Enum




class DocumentMixin(BaseModel):

# Throw error if you try to validate/create model from a dictionary with keys that aren't a field in the model
model_config = ConfigDict(extra="forbid")

uuid: UUID = Field(
description="""Unique ID (across the BIA database) used to refer to and identify a document."""
)
Expand Down

0 comments on commit a57842b

Please sign in to comment.